[SWDEV-553168] Add support for decoding out of band boot time CPER files.
Change-Id: Ic4278698f9c5b5ae56bd56fd43150c0653c1ef05
[ROCm/amdsmi commit: c6698c9100]
Este commit está contenido en:
cometido por
Arif, Maisam
padre
25a6ac3585
commit
2214445327
@@ -1,119 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file aca_constants.h
|
||||
* @brief Shared constants for ACA error decoding
|
||||
*
|
||||
* This file contains string constants and numerical constants that are used
|
||||
* across multiple source files to improve maintainability and prevent typos.
|
||||
*/
|
||||
|
||||
#ifndef ACA_CONSTANTS_H
|
||||
#define ACA_CONSTANTS_H
|
||||
|
||||
/* Error severity constants */
|
||||
#define ACA_SEVERITY_UNKNOWN "UNKNOWN"
|
||||
#define ACA_SEVERITY_FATAL "Fatal"
|
||||
#define ACA_SEVERITY_CORRECTED "Corrected"
|
||||
#define ACA_SEVERITY_UNCORRECTED_NON_FATAL "Uncorrected, Non-fatal"
|
||||
#define ACA_SEVERITY_FAIL_TO_INIT "Fail-to-init"
|
||||
#define ACA_SEVERITY_ALL_CAPS "ALL"
|
||||
|
||||
/* Error category constants */
|
||||
#define ACA_CATEGORY_HBM_ERRORS "HBM Errors"
|
||||
#define ACA_CATEGORY_DEVICE_INTERNAL_ERRORS "Device Internal Errors"
|
||||
#define ACA_CATEGORY_OFF_PACKAGE_LINK_ERRORS "Off-Package Link Errors"
|
||||
#define ACA_CATEGORY_BOOT_TIME_ERRORS "Boot-Time Errors"
|
||||
#define ACA_CATEGORY_CPER_FORMAT "CPER Format"
|
||||
#define ACA_CATEGORY_UNIDENTIFIED_ERRORS "Unidentified Errors"
|
||||
|
||||
/* Common error type constants */
|
||||
#define ACA_ERROR_TYPE_ALL_OTHERS "All Others"
|
||||
#define ACA_ERROR_TYPE_ALL "All"
|
||||
#define ACA_ERROR_TYPE_DECODE_INAPPLICABLE "Decode Inapplicable"
|
||||
#define ACA_ERROR_TYPE_BAD_PAGE_RETIREMENT_THRESHOLD "Bad Page Retirement Threshold"
|
||||
#define ACA_ERROR_TYPE_HARDWARE_ASSERTION "Hardware Assertion (HWA)"
|
||||
#define ACA_ERROR_TYPE_WATCHDOG_TIMEOUT "Watchdog Timeout (WDT)"
|
||||
#define ACA_ERROR_TYPE_ON_DIE_ECC "On-die ECC"
|
||||
#define ACA_ERROR_TYPE_END_TO_END_CRC "End-to-end CRC"
|
||||
#define ACA_ERROR_TYPE_WAFL "WAFL"
|
||||
#define ACA_ERROR_TYPE_XGMI "XGMI"
|
||||
|
||||
/* Boot-time error type constants */
|
||||
#define ACA_ERROR_TYPE_FW_LOAD "FW Load"
|
||||
#define ACA_ERROR_TYPE_HBM_BIST_TEST "HBM BIST Test"
|
||||
#define ACA_ERROR_TYPE_HBM_MEMORY_TEST "HBM Memory Test"
|
||||
#define ACA_ERROR_TYPE_HBM_TRAINING "HBM Training"
|
||||
#define ACA_ERROR_TYPE_UNHANDLED "Unhandled"
|
||||
#define ACA_ERROR_TYPE_UNKNOWN_ERROR "Unknown"
|
||||
#define ACA_ERROR_TYPE_USR_CP_LINK_TRAINING "USR CP Link Training"
|
||||
#define ACA_ERROR_TYPE_USR_DP_LINK_TRAINING "USR DP Link Training"
|
||||
#define ACA_ERROR_TYPE_WAFL_LINK_TRAINING "WAFL Link Training"
|
||||
#define ACA_ERROR_TYPE_XGMI_LINK_TRAINING "XGMI Link Training"
|
||||
#define ACA_ERROR_TYPE_BOOT_CONTROLLER_DATA_ABORT "Boot Controller Data Abort"
|
||||
#define ACA_ERROR_TYPE_BOOT_CONTROLLER_GENERIC "Boot Controller Generic"
|
||||
|
||||
/* Link error type constants */
|
||||
#define ACA_ERROR_TYPE_PCIE_AER "PCIe AER"
|
||||
|
||||
/* CPER format error type constants */
|
||||
#define ACA_ERROR_TYPE_MALFORMED_CPER "Malformed CPER"
|
||||
#define ACA_ERROR_TYPE_INCOMPLETE_ACA_DATA "Incomplete ACA Data"
|
||||
#define ACA_ERROR_TYPE_INVALID_ACA_DATA "Invalid ACA Data"
|
||||
#define ACA_ERROR_TYPE_UNIDENTIFIED_ERROR "Unidentified Error"
|
||||
|
||||
/* Protocol constants */
|
||||
#define ACA_PROTOCOL_CPER "CPER"
|
||||
#define ACA_PROTOCOL_CPER_WITH_SPACE "CPER "
|
||||
|
||||
/* Bank name strings */
|
||||
#define ACA_BANK_UMC "umc"
|
||||
#define ACA_BANK_PSP "psp"
|
||||
#define ACA_BANK_CS "cs"
|
||||
#define ACA_BANK_PIE "pie"
|
||||
#define ACA_BANK_PCS_XGMI "pcs_xgmi"
|
||||
#define ACA_BANK_KPX_SERDES "kpx_serdes"
|
||||
#define ACA_BANK_KPX_WAFL "kpx_wafl"
|
||||
|
||||
/* Numerical constants */
|
||||
#define ACA_FLAG_THRESHOLD_EXCEEDED 0x8
|
||||
#define ACA_REGISTER_ARRAY_SIZE_32_BYTES 4
|
||||
#define ACA_REGISTER_ARRAY_SIZE_128_BYTES 16
|
||||
|
||||
/* Error code ranges */
|
||||
#define ACA_ERROR_CODE_EXT_MIN 0x3A
|
||||
#define ACA_ERROR_CODE_EXT_MAX 0x3E
|
||||
|
||||
/* Instance ID values for XCD and AID error decoding */
|
||||
#define ACA_INSTANCE_ID_XCD0_400 0x36430400
|
||||
#define ACA_INSTANCE_ID_XCD1_400 0x38430400
|
||||
#define ACA_INSTANCE_ID_XCD0_401 0x36430401
|
||||
#define ACA_INSTANCE_ID_XCD1_401 0x38430401
|
||||
#define ACA_INSTANCE_ID_AID_400 0x3B30400
|
||||
#define ACA_INSTANCE_ID_AID_401 0x3B30401
|
||||
|
||||
/* Error return codes */
|
||||
#define ACA_ERROR_INVALID_ACA_DATA_ID 33
|
||||
#define ACA_ERROR_UNIDENTIFIED_ERROR_ID 34
|
||||
|
||||
#endif /* ACA_CONSTANTS_H */
|
||||
@@ -1,60 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file utils.h
|
||||
* @brief Common utility functions
|
||||
*/
|
||||
#ifndef UTILS_H
|
||||
#define UTILS_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
/**
|
||||
* @brief Convert a 64-bit value from little endian to big endian
|
||||
* @param[in] value Value to convert
|
||||
* @return Converted value in big endian
|
||||
*/
|
||||
static inline uint64_t le64_to_be64(uint64_t value) {
|
||||
return ((value & 0xFF00000000000000ULL) >> 56) |
|
||||
((value & 0x00FF000000000000ULL) >> 40) |
|
||||
((value & 0x0000FF0000000000ULL) >> 24) |
|
||||
((value & 0x000000FF00000000ULL) >> 8) |
|
||||
((value & 0x00000000FF000000ULL) << 8) |
|
||||
((value & 0x0000000000FF0000ULL) << 24) |
|
||||
((value & 0x000000000000FF00ULL) << 40) |
|
||||
((value & 0x00000000000000FFULL) << 56);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Convert an array of 64-bit values from little endian to big endian
|
||||
* @param[in,out] array Array to convert
|
||||
* @param[in] len Length of the array
|
||||
*/
|
||||
static inline void convert_array_le_to_be(uint64_t *array, size_t len) {
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
array[i] = le64_to_be64(array[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* UTILS_H */
|
||||
Archivo ejecutable → Archivo normal
+9
-17
@@ -1,4 +1,3 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
@@ -21,20 +20,16 @@
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
/**
|
||||
* @file aca_decode.h
|
||||
* @brief Internal decoder interface and data structures
|
||||
*/
|
||||
#ifndef RAS_DECODE_DECODE_H
|
||||
#define RAS_DECODE_DECODE_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef ACA_DECODE_H
|
||||
#define ACA_DECODE_H
|
||||
|
||||
#include "aca_api.h"
|
||||
#include "ras_decode_api.h"
|
||||
#include "aca_fields.h"
|
||||
#include "json_util.h"
|
||||
|
||||
/**
|
||||
* @brief Internal decoder structure with parsed register fields
|
||||
@@ -67,13 +62,10 @@ typedef struct
|
||||
} aca_raw_data_t;
|
||||
|
||||
/**
|
||||
* @brief Main decode function that processes raw ACA error data
|
||||
* @brief Main decode function that processes raw ACA error data and returns JSON
|
||||
* @param[in] raw_data Pointer to structure containing raw ACA error data
|
||||
* @return Decoded error information structure
|
||||
* @return JsonValue* containing the decoded error information, or NULL on failure
|
||||
*/
|
||||
aca_error_info_t aca_decode(const aca_raw_data_t *raw_data);
|
||||
JsonValue* aca_decode(const aca_raw_data_t *raw_data);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif /* ACA_DECODE_H */
|
||||
#endif /* RAS_DECODE_DECODE_H */
|
||||
+3
-4
@@ -1,4 +1,3 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
@@ -21,7 +20,7 @@
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
/**
|
||||
* @file aca_fields.h
|
||||
* @brief ACA register field definitions and manipulation functions
|
||||
*
|
||||
@@ -30,8 +29,8 @@
|
||||
* definitions for status, IPID, and syndrome registers, along with
|
||||
* functions to initialize and access these fields.
|
||||
*/
|
||||
#ifndef ACA_FIELDS_H
|
||||
#define ACA_FIELDS_H
|
||||
#ifndef RAS_DECODE_FIELDS_H
|
||||
#define RAS_DECODE_FIELDS_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
+2
-3
@@ -1,4 +1,3 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
@@ -28,8 +27,8 @@
|
||||
* into their corresponding names and types.
|
||||
*/
|
||||
|
||||
#ifndef ACA_TABLES_H
|
||||
#define ACA_TABLES_H
|
||||
#ifndef RAS_DECODE_TABLES_H
|
||||
#define RAS_DECODE_TABLES_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
+15
-8
@@ -20,8 +20,8 @@
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ACA_VERSION_H
|
||||
#define ACA_VERSION_H
|
||||
#ifndef RAS_DECODE_VERSION_H
|
||||
#define RAS_DECODE_VERSION_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
@@ -40,12 +40,19 @@ extern "C"
|
||||
*/
|
||||
|
||||
/* Version Components */
|
||||
#define ACA_VERSION_MAJOR 1 /**< Major version number */
|
||||
#define ACA_VERSION_MINOR 0 /**< Minor version number */
|
||||
#define ACA_VERSION_PATCH 0 /**< Patch version number */
|
||||
#define RAS_DECODE_VERSION_MAJOR 2 /**< Major version number */
|
||||
#define RAS_DECODE_VERSION_MINOR 0 /**< Minor version number */
|
||||
#define RAS_DECODE_VERSION_PATCH 0 /**< Patch version number */
|
||||
|
||||
/* Version String */
|
||||
#define ACA_VERSION_STRING "1.0.0"
|
||||
/* Helper macros for string concatenation */
|
||||
#define RAS_DECODE_STRINGIFY(x) #x
|
||||
#define RAS_DECODE_TOSTRING(x) RAS_DECODE_STRINGIFY(x)
|
||||
|
||||
/* Version String - dynamically constructed from components */
|
||||
#define RAS_DECODE_VERSION_STRING \
|
||||
RAS_DECODE_TOSTRING(RAS_DECODE_VERSION_MAJOR) "." \
|
||||
RAS_DECODE_TOSTRING(RAS_DECODE_VERSION_MINOR) "." \
|
||||
RAS_DECODE_TOSTRING(RAS_DECODE_VERSION_PATCH)
|
||||
|
||||
/**
|
||||
* @brief Structure containing version information
|
||||
@@ -92,4 +99,4 @@ extern "C"
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* ACA_VERSION_H */
|
||||
#endif /* RAS_DECODE_VERSION_H */
|
||||
@@ -0,0 +1,219 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BOOT_DECODE_H
|
||||
#define BOOT_DECODE_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include "json_util.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Boot message structure representing OAM boot message
|
||||
*/
|
||||
typedef struct {
|
||||
uint64_t value; ///< 64-bit boot message value
|
||||
} OamBootMsg;
|
||||
|
||||
/**
|
||||
* @brief Decoder function pointer type
|
||||
* @param msg Boot message to decode
|
||||
* @return JsonValue containing decoded information or NULL on failure
|
||||
*/
|
||||
typedef JsonValue* (*boot_decoder_func_t)(OamBootMsg *msg);
|
||||
|
||||
/**
|
||||
* @brief Decoder mapping entry
|
||||
*/
|
||||
typedef struct {
|
||||
uint8_t encoding; ///< Error encoding value
|
||||
boot_decoder_func_t decoder; ///< Decoder function
|
||||
} boot_decoder_entry_t;
|
||||
|
||||
/**
|
||||
* @brief Boot message constants
|
||||
*/
|
||||
#define BOOT_ERROR_PRESENT_MARKER 0xA4
|
||||
#define BOOT_IN_BOOT_MARKER 0xBA
|
||||
#define BOOT_SUCCESS_ENCODING 0xBA
|
||||
|
||||
/**
|
||||
* @brief Error encoding constants
|
||||
*/
|
||||
#define BOOT_ENCODING_HBM_TRAINING 0x01
|
||||
#define BOOT_ENCODING_FW_LOAD 0x04
|
||||
#define BOOT_ENCODING_WAFL_LINK 0x05
|
||||
#define BOOT_ENCODING_XGMI_LINK 0x06
|
||||
#define BOOT_ENCODING_USR_CP_LINK 0x07
|
||||
#define BOOT_ENCODING_USR_DP_LINK 0x08
|
||||
#define BOOT_ENCODING_HBM_MEM_TEST 0x09
|
||||
#define BOOT_ENCODING_HBM_BIST_TEST 0x0A
|
||||
#define BOOT_ENCODING_BOOT_CTRL_GEN_V0 0x0B
|
||||
#define BOOT_ENCODING_BOOT_CTRL_GEN_V1 0x0C
|
||||
#define BOOT_ENCODING_DATA_ABORT 0x0D
|
||||
|
||||
/**
|
||||
* @brief HBM stack decoder constants
|
||||
*/
|
||||
#define HBM_STACK_0 0x01
|
||||
#define HBM_STACK_1 0x02
|
||||
#define HBM_STACK_UNKNOWN -1
|
||||
|
||||
/**
|
||||
* @brief Extract specific byte from 64-bit value
|
||||
* @param value 64-bit value
|
||||
* @param byte_index Byte index (0-7)
|
||||
* @return Extracted byte value
|
||||
*/
|
||||
static inline uint8_t extract_byte(uint64_t value, int byte_index) {
|
||||
return (uint8_t)((value >> (byte_index * 8)) & 0xFF);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Extract specific bits mask
|
||||
* @param num_bits Number of bits to extract
|
||||
* @return Bit mask
|
||||
*/
|
||||
static inline uint32_t extract_bits(int num_bits) {
|
||||
return (1U << num_bits) - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get boot version from boot message
|
||||
* @param msg Boot message
|
||||
* @return Boot version (0 or 1)
|
||||
*/
|
||||
int get_boot_version(OamBootMsg *msg);
|
||||
|
||||
/**
|
||||
* @brief Get error encoding from boot message
|
||||
* @param msg Boot message
|
||||
* @return Error encoding value
|
||||
*/
|
||||
int get_error_encoding(OamBootMsg *msg);
|
||||
|
||||
/**
|
||||
* @brief Check if error is present in boot message
|
||||
* @param msg Boot message
|
||||
* @return true if error present, false otherwise
|
||||
*/
|
||||
bool error_present(OamBootMsg *msg);
|
||||
|
||||
/**
|
||||
* @brief Check if in boot mode
|
||||
* @param msg Boot message
|
||||
* @return true if in boot mode, false otherwise
|
||||
*/
|
||||
bool in_boot(OamBootMsg *msg);
|
||||
|
||||
/**
|
||||
* @brief Get socket number from boot message
|
||||
* @param msg Boot message
|
||||
* @param version Boot version
|
||||
* @return Socket number
|
||||
*/
|
||||
int get_socket(OamBootMsg *msg, int version);
|
||||
|
||||
/**
|
||||
* @brief Get AID number from boot message
|
||||
* @param msg Boot message
|
||||
* @param version Boot version
|
||||
* @return AID number
|
||||
*/
|
||||
int get_aid(OamBootMsg *msg, int version);
|
||||
|
||||
/**
|
||||
* @brief Decode HBM stack value
|
||||
* @param stack Stack value
|
||||
* @return Decoded stack number or HBM_STACK_UNKNOWN
|
||||
*/
|
||||
int decode_hbm_stack(uint8_t stack);
|
||||
|
||||
/**
|
||||
* @brief Create JSON array of failed links
|
||||
* @param byte_value Byte containing link status bits
|
||||
* @param max_links Maximum number of links to check
|
||||
* @return JsonValue array or NULL on failure
|
||||
*/
|
||||
JsonValue* create_failed_links_array(uint8_t byte_value, int max_links);
|
||||
|
||||
/**
|
||||
* @brief Create hex string representation
|
||||
* @param value Value to convert
|
||||
* @param width Width of hex string (with padding)
|
||||
* @return Dynamically allocated hex string or NULL on failure
|
||||
*/
|
||||
char* create_hex_string(uint64_t value, int width);
|
||||
|
||||
// Decoder functions for Version 0
|
||||
JsonValue* decode_hbm_training_v0(OamBootMsg *msg);
|
||||
JsonValue* decode_fw_load_v0(OamBootMsg *msg);
|
||||
JsonValue* decode_wafl_link_training_v0(OamBootMsg *msg);
|
||||
JsonValue* decode_xgmi_link_training_v0(OamBootMsg *msg);
|
||||
JsonValue* decode_usr_cp_link_training_v0(OamBootMsg *msg);
|
||||
JsonValue* decode_usr_dp_link_training_v0(OamBootMsg *msg);
|
||||
JsonValue* decode_hbm_mem_test_v0(OamBootMsg *msg);
|
||||
JsonValue* decode_hbm_bist_test_v0(OamBootMsg *msg);
|
||||
JsonValue* decode_boot_controller_generic_v0(OamBootMsg *msg);
|
||||
|
||||
// Decoder functions for Version 1
|
||||
JsonValue* decode_hbm_training_v1(OamBootMsg *msg);
|
||||
JsonValue* decode_fw_load_v1(OamBootMsg *msg);
|
||||
JsonValue* decode_wafl_link_training_v1(OamBootMsg *msg);
|
||||
JsonValue* decode_xgmi_link_training_v1(OamBootMsg *msg);
|
||||
JsonValue* decode_usr_cp_link_training_v1(OamBootMsg *msg);
|
||||
JsonValue* decode_usr_dp_link_training_v1(OamBootMsg *msg);
|
||||
JsonValue* decode_hbm_mem_test_v1(OamBootMsg *msg);
|
||||
JsonValue* decode_hbm_bist_test_v1(OamBootMsg *msg);
|
||||
JsonValue* decode_boot_controller_generic_v1(OamBootMsg *msg);
|
||||
JsonValue* decode_data_abort_v1(OamBootMsg *msg);
|
||||
JsonValue* decode_boot_success_v1(OamBootMsg *msg);
|
||||
|
||||
// Unhandled error decoders
|
||||
JsonValue* decode_unhandled_error_v0(OamBootMsg *msg);
|
||||
JsonValue* decode_unhandled_error_v1(OamBootMsg *msg);
|
||||
|
||||
/**
|
||||
* @brief Get appropriate decoder function for boot message
|
||||
* @param msg Boot message
|
||||
* @return Decoder function pointer or NULL if no decoder found
|
||||
*/
|
||||
boot_decoder_func_t get_decoder_function(OamBootMsg *msg);
|
||||
|
||||
/**
|
||||
* @brief Orchestrate decoding of multiple boot messages
|
||||
* @param oam_boot_msgs Array of boot message values
|
||||
* @param count Number of boot messages
|
||||
* @return JsonValue object containing decoded results or NULL on failure
|
||||
*/
|
||||
JsonValue* boot_decode_orchestrator(uint64_t *oam_boot_msgs, size_t count);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* BOOT_DECODE_H */
|
||||
-1
@@ -1,4 +1,3 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef JSON_PRINTER_H
|
||||
#define JSON_PRINTER_H
|
||||
|
||||
#include "json_util.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Print a JSON value to stdout in formatted form
|
||||
* @param value JSON value to print
|
||||
*/
|
||||
void print_json_value(JsonValue *value);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* JSON_PRINTER_H */
|
||||
@@ -0,0 +1,171 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef JSON_UTIL_H
|
||||
#define JSON_UTIL_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief JSON value types enumeration
|
||||
*/
|
||||
typedef enum {
|
||||
JSON_NULL,
|
||||
JSON_BOOL,
|
||||
JSON_NUMBER,
|
||||
JSON_STRING,
|
||||
JSON_OBJECT,
|
||||
JSON_ARRAY
|
||||
} JsonType;
|
||||
|
||||
typedef struct JsonValue JsonValue;
|
||||
typedef struct JsonPair JsonPair;
|
||||
|
||||
/**
|
||||
* @brief JSON key-value pair structure for objects
|
||||
*/
|
||||
struct JsonPair {
|
||||
char *key;
|
||||
JsonValue *value;
|
||||
JsonPair *next;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief JSON value structure
|
||||
*/
|
||||
struct JsonValue {
|
||||
JsonType type;
|
||||
union {
|
||||
bool boolean;
|
||||
double number;
|
||||
char *string;
|
||||
JsonPair *object; // Linked list of key-value pairs
|
||||
struct {
|
||||
JsonValue **items;
|
||||
size_t count;
|
||||
size_t capacity;
|
||||
} array;
|
||||
} data;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Create a null JSON value
|
||||
* @return Pointer to new JsonValue or NULL on failure
|
||||
*/
|
||||
JsonValue* json_create_null(void);
|
||||
|
||||
/**
|
||||
* @brief Create a boolean JSON value
|
||||
* @param b Boolean value
|
||||
* @return Pointer to new JsonValue or NULL on failure
|
||||
*/
|
||||
JsonValue* json_create_bool(bool b);
|
||||
|
||||
/**
|
||||
* @brief Create a number JSON value
|
||||
* @param num Numeric value
|
||||
* @return Pointer to new JsonValue or NULL on failure
|
||||
*/
|
||||
JsonValue* json_create_number(double num);
|
||||
|
||||
/**
|
||||
* @brief Create a string JSON value
|
||||
* @param str String value (will be copied)
|
||||
* @return Pointer to new JsonValue or NULL on failure
|
||||
*/
|
||||
JsonValue* json_create_string(const char *str);
|
||||
|
||||
/**
|
||||
* @brief Create an empty JSON object
|
||||
* @return Pointer to new JsonValue or NULL on failure
|
||||
*/
|
||||
JsonValue* json_create_object(void);
|
||||
|
||||
/**
|
||||
* @brief Create an empty JSON array
|
||||
* @return Pointer to new JsonValue or NULL on failure
|
||||
*/
|
||||
JsonValue* json_create_array(void);
|
||||
|
||||
/**
|
||||
* @brief Add a key-value pair to a JSON object
|
||||
* @param obj JSON object to modify
|
||||
* @param key Key string (will be copied)
|
||||
* @param value Value to add
|
||||
*/
|
||||
void json_object_set(JsonValue *obj, const char *key, JsonValue *value);
|
||||
|
||||
/**
|
||||
* @brief Get a value by key from a JSON object
|
||||
* @param obj JSON object to search
|
||||
* @param key Key to search for
|
||||
* @return Pointer to JsonValue or NULL if not found
|
||||
*/
|
||||
JsonValue* json_object_get(JsonValue *obj, const char *key);
|
||||
|
||||
/**
|
||||
* @brief Check if a key exists in a JSON object
|
||||
* @param obj JSON object to check
|
||||
* @param key Key to check for
|
||||
* @return true if key exists, false otherwise
|
||||
*/
|
||||
bool json_object_has_key(JsonValue *obj, const char *key);
|
||||
|
||||
/**
|
||||
* @brief Add a value to a JSON array
|
||||
* @param arr JSON array to modify
|
||||
* @param value Value to add
|
||||
* @return true on success, false on failure
|
||||
*/
|
||||
bool json_array_push(JsonValue *arr, JsonValue *value);
|
||||
|
||||
/**
|
||||
* @brief Get a value by index from a JSON array
|
||||
* @param arr JSON array to access
|
||||
* @param index Array index
|
||||
* @return Pointer to JsonValue or NULL if index out of bounds
|
||||
*/
|
||||
JsonValue* json_array_get(JsonValue *arr, size_t index);
|
||||
|
||||
/**
|
||||
* @brief Get the size of a JSON array
|
||||
* @param arr JSON array
|
||||
* @return Number of elements in array, or 0 if not an array
|
||||
*/
|
||||
size_t json_array_size(JsonValue *arr);
|
||||
|
||||
/**
|
||||
* @brief Free a JSON value and all its contents
|
||||
* @param val JSON value to free
|
||||
*/
|
||||
void json_free(JsonValue *val);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* JSON_UTIL_H */
|
||||
+20
-9
@@ -1,4 +1,3 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
@@ -21,11 +20,13 @@
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ACA_API_H
|
||||
#define ACA_API_H
|
||||
#ifndef RAS_DECODE_API_H
|
||||
#define RAS_DECODE_API_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include "aca_version.h"
|
||||
#include "json_util.h"
|
||||
|
||||
/**
|
||||
* @brief Structure containing decoded error information
|
||||
@@ -39,12 +40,13 @@ typedef struct
|
||||
const char *instance_ref; /**< Reference to instance name string */
|
||||
int oam; /**< OAM value */
|
||||
int aid; /**< AID value */
|
||||
int afid; /**< AFID value (AMD Field ID) */
|
||||
uint64_t raw_status; /**< Raw status register value */
|
||||
uint64_t raw_addr; /**< Raw address register value */
|
||||
uint64_t raw_ipid; /**< Raw IPID register value */
|
||||
uint64_t raw_synd; /**< Raw syndrome register value */
|
||||
uint8_t scrub; /**< Scrub bit from status */
|
||||
uint8_t poison; /**< Poison bit from status */
|
||||
uint8_t deferred; /**< Deferred bit from status */
|
||||
uint8_t error_code_ext; /**< Extended error code from status */
|
||||
} aca_error_info_t;
|
||||
|
||||
@@ -54,18 +56,27 @@ typedef struct
|
||||
* @param[in] array_len Size of register array in elements
|
||||
* @param[in] flag Decoder flags
|
||||
* @param[in] hw_revision Hardware revision number
|
||||
* @param[in] register_context_type Register context type (16-bit): 1 for ACA decode, 9 for boot decode
|
||||
* @return AFID value or -1 if decoding fails
|
||||
*/
|
||||
int decode_afid(const uint64_t *register_array, size_t array_len, uint32_t flag, uint16_t hw_revision);
|
||||
int decode_afid(const uint64_t *register_array, size_t array_len, uint32_t flag, uint16_t hw_revision, uint16_t register_context_type);
|
||||
|
||||
/**
|
||||
* @brief Decodes and returns complete error information from a register array
|
||||
* @brief Decodes and returns complete error information from a register array as JSON
|
||||
* @param[in] register_array Pointer to an array of 64-bit register values
|
||||
* @param[in] array_len Size of register array in elements
|
||||
* @param[in] flag Decoder flags
|
||||
* @param[in] hw_revision Hardware revision number
|
||||
* @return Complete error information structure
|
||||
* @param[in] register_context_type Register context type (16-bit): 1 for ACA decode, 9 for boot decode
|
||||
* @return JsonValue* containing complete error information, or NULL on failure
|
||||
*/
|
||||
aca_error_info_t decode_error_info(const uint64_t *register_array, size_t array_len, uint32_t flag, uint16_t hw_revision);
|
||||
JsonValue* decode_error_info(const uint64_t *register_array, size_t array_len, uint32_t flag, uint16_t hw_revision, uint16_t register_context_type);
|
||||
|
||||
#endif // ACA_API_H
|
||||
/**
|
||||
* @brief Decodes the AFID from a JSON error object based on error category, type, and severity
|
||||
* @param[in] error_json Pointer to JSON object containing error information
|
||||
* @return AFID value or -1 if decoding fails or JSON is NULL
|
||||
*/
|
||||
int decode_error_info_afid(JsonValue *error_json);
|
||||
|
||||
#endif // RAS_DECODE_API_H
|
||||
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file ras_decode_constants.h
|
||||
* @brief Shared constants for ACA error decoding
|
||||
*
|
||||
* This file contains string constants and numerical constants that are used
|
||||
* across multiple source files to improve maintainability and prevent typos.
|
||||
*/
|
||||
|
||||
#ifndef RAS_DECODE_CONSTANTS_H
|
||||
#define RAS_DECODE_CONSTANTS_H
|
||||
|
||||
/* Error severity constants */
|
||||
#define RAS_DECODE_SEVERITY_UNKNOWN "UNKNOWN"
|
||||
#define RAS_DECODE_SEVERITY_FATAL "Fatal"
|
||||
#define RAS_DECODE_SEVERITY_CORRECTED "Corrected"
|
||||
#define RAS_DECODE_SEVERITY_UNCORRECTED_NON_FATAL "Uncorrected, Non-fatal"
|
||||
#define RAS_DECODE_SEVERITY_FAIL_TO_INIT "Fail-to-init"
|
||||
#define RAS_DECODE_SEVERITY_ALL_CAPS "ALL"
|
||||
|
||||
/* Error category constants */
|
||||
#define RAS_DECODE_CATEGORY_HBM_ERRORS "HBM Errors"
|
||||
#define RAS_DECODE_CATEGORY_DEVICE_INTERNAL_ERRORS "Device Internal Errors"
|
||||
#define RAS_DECODE_CATEGORY_OFF_PACKAGE_LINK_ERRORS "Off-Package Link Errors"
|
||||
#define RAS_DECODE_CATEGORY_BOOT_TIME_ERRORS "Boot-Time Errors"
|
||||
#define RAS_DECODE_CATEGORY_CPER_FORMAT "CPER Format"
|
||||
#define RAS_DECODE_CATEGORY_UNIDENTIFIED_ERRORS "Unidentified Errors"
|
||||
|
||||
/* Common error type constants */
|
||||
#define RAS_DECODE_ERROR_TYPE_ALL_OTHERS "All Others"
|
||||
#define RAS_DECODE_ERROR_TYPE_ALL "All"
|
||||
#define RAS_DECODE_ERROR_TYPE_DECODE_INAPPLICABLE "Decode Inapplicable"
|
||||
#define RAS_DECODE_ERROR_TYPE_BAD_PAGE_RETIREMENT_THRESHOLD "Bad Page Retirement Threshold"
|
||||
#define RAS_DECODE_ERROR_TYPE_HARDWARE_ASSERTION "Hardware Assertion (HWA)"
|
||||
#define RAS_DECODE_ERROR_TYPE_WATCHDOG_TIMEOUT "Watchdog Timeout (WDT)"
|
||||
#define RAS_DECODE_ERROR_TYPE_ON_DIE_ECC "On-die ECC"
|
||||
#define RAS_DECODE_ERROR_TYPE_END_TO_END_CRC "End-to-end CRC"
|
||||
#define RAS_DECODE_ERROR_TYPE_WAFL "WAFL"
|
||||
#define RAS_DECODE_ERROR_TYPE_XGMI "XGMI"
|
||||
|
||||
/* Boot-time error type constants */
|
||||
#define RAS_DECODE_ERROR_TYPE_FW_LOAD "FW Load"
|
||||
#define RAS_DECODE_ERROR_TYPE_HBM_BIST_TEST "HBM BIST Test"
|
||||
#define RAS_DECODE_ERROR_TYPE_HBM_MEMORY_TEST "HBM Memory Test"
|
||||
#define RAS_DECODE_ERROR_TYPE_HBM_TRAINING "HBM Training"
|
||||
#define RAS_DECODE_ERROR_TYPE_UNHANDLED "Unhandled"
|
||||
#define RAS_DECODE_ERROR_TYPE_UNKNOWN_ERROR "Unknown"
|
||||
#define RAS_DECODE_ERROR_TYPE_USR_CP_LINK_TRAINING "USR CP Link Training"
|
||||
#define RAS_DECODE_ERROR_TYPE_USR_DP_LINK_TRAINING "USR DP Link Training"
|
||||
#define RAS_DECODE_ERROR_TYPE_WAFL_LINK_TRAINING "WAFL Link Training"
|
||||
#define RAS_DECODE_ERROR_TYPE_XGMI_LINK_TRAINING "XGMI Link Training"
|
||||
#define RAS_DECODE_ERROR_TYPE_BOOT_CONTROLLER_DATA_ABORT "Boot Controller Data Abort"
|
||||
#define RAS_DECODE_ERROR_TYPE_BOOT_CONTROLLER_GENERIC "Boot Controller Generic"
|
||||
#define RAS_DECODE_ERROR_TYPE_BOOT_SUCCESS "Boot Success"
|
||||
|
||||
/* Link error type constants */
|
||||
#define RAS_DECODE_ERROR_TYPE_PCIE_AER "PCIe AER"
|
||||
|
||||
/* CPER format error type constants */
|
||||
#define RAS_DECODE_ERROR_TYPE_MALFORMED_CPER "Malformed CPER"
|
||||
#define RAS_DECODE_ERROR_TYPE_INCOMPLETE_RAS_DECODE_DATA "Incomplete ACA Data"
|
||||
#define RAS_DECODE_ERROR_TYPE_INVALID_RAS_DECODE_DATA "Invalid ACA Data"
|
||||
#define RAS_DECODE_ERROR_TYPE_UNIDENTIFIED_ERROR "Unidentified Error"
|
||||
|
||||
/* Protocol constants */
|
||||
#define RAS_DECODE_PROTOCOL_CPER "CPER"
|
||||
#define RAS_DECODE_PROTOCOL_CPER_WITH_SPACE "CPER "
|
||||
|
||||
/* Bank name strings */
|
||||
#define RAS_DECODE_BANK_UMC "umc"
|
||||
#define RAS_DECODE_BANK_PSP "psp"
|
||||
#define RAS_DECODE_BANK_CS "cs"
|
||||
#define RAS_DECODE_BANK_PIE "pie"
|
||||
#define RAS_DECODE_BANK_PCS_XGMI "pcs_xgmi"
|
||||
#define RAS_DECODE_BANK_KPX_SERDES "kpx_serdes"
|
||||
#define RAS_DECODE_BANK_KPX_WAFL "kpx_wafl"
|
||||
|
||||
/* Numerical constants */
|
||||
#define RAS_DECODE_FLAG_THRESHOLD_EXCEEDED 0x8
|
||||
#define RAS_DECODE_REGISTER_ARRAY_SIZE_32_BYTES 4
|
||||
#define RAS_DECODE_REGISTER_ARRAY_SIZE_128_BYTES 16
|
||||
#define BOOT_REGISTER_ARRAY_SIZE_64_BYTES 8
|
||||
|
||||
/* Error code ranges */
|
||||
#define RAS_DECODE_ERROR_CODE_EXT_MIN 0x3A
|
||||
#define RAS_DECODE_ERROR_CODE_EXT_MAX 0x3E
|
||||
|
||||
/* Instance ID values for XCD and AID error decoding */
|
||||
#define RAS_DECODE_INSTANCE_ID_XCD0_400 0x36430400
|
||||
#define RAS_DECODE_INSTANCE_ID_XCD1_400 0x38430400
|
||||
#define RAS_DECODE_INSTANCE_ID_XCD0_401 0x36430401
|
||||
#define RAS_DECODE_INSTANCE_ID_XCD1_401 0x38430401
|
||||
#define RAS_DECODE_INSTANCE_ID_AID_400 0x3B30400
|
||||
#define RAS_DECODE_INSTANCE_ID_AID_401 0x3B30401
|
||||
|
||||
/* Error return codes */
|
||||
#define RAS_DECODE_ERROR_INVALID_RAS_DECODE_DATA_ID 33
|
||||
#define RAS_DECODE_ERROR_UNIDENTIFIED_ERROR_ID 34
|
||||
|
||||
#endif /* RAS_DECODE_CONSTANTS_H */
|
||||
@@ -44,13 +44,33 @@ set(INC_LIST
|
||||
"${PROJECT_SOURCE_DIR}/rocm_smi/include/rocm_smi/rocm_smi.h"
|
||||
"${PROJECT_SOURCE_DIR}/rocm_smi/include/rocm_smi/rocm_smi_utils.h")
|
||||
|
||||
set(ACA_SRC_DIR "aca-decode")
|
||||
set(SRC_LIST ${SRC_LIST} ${ACA_SRC_DIR}/aca_api.c ${ACA_SRC_DIR}/aca_decode.c ${ACA_SRC_DIR}/aca_fields.c
|
||||
${ACA_SRC_DIR}/aca_tables.c ${ACA_SRC_DIR}/error_map.c)
|
||||
set(ACA_INC_DIR "${PROJECT_SOURCE_DIR}/include/aca-decode")
|
||||
set(INC_LIST ${INC_LIST} ${ACA_INC_DIR}/aca_decode.h ${ACA_INC_DIR}/aca_fields.h ${ACA_INC_DIR}/aca_tables.h
|
||||
${ACA_INC_DIR}/error_map.h)
|
||||
|
||||
set(RAS_DECODE "ras-decode")
|
||||
set(ACA_SRC_DIR "${PROJECT_SOURCE_DIR}/src/${RAS_DECODE}")
|
||||
set(SRC_LIST ${SRC_LIST}
|
||||
${ACA_SRC_DIR}/aca_decode.c
|
||||
${ACA_SRC_DIR}/aca_fields.c
|
||||
${ACA_SRC_DIR}/aca_tables.c
|
||||
${ACA_SRC_DIR}/aca_version.c
|
||||
${ACA_SRC_DIR}/boot_decode.c
|
||||
${ACA_SRC_DIR}/error_map.c
|
||||
${ACA_SRC_DIR}/json_printer.c
|
||||
${ACA_SRC_DIR}/json_util.c
|
||||
# ${ACA_SRC_DIR}/main.c
|
||||
${ACA_SRC_DIR}/ras_decode_api.c
|
||||
)
|
||||
set(ACA_INC_DIR "${PROJECT_SOURCE_DIR}/include/${RAS_DECODE}")
|
||||
set(INC_LIST ${INC_LIST}
|
||||
${ACA_INC_DIR}/aca_decode.h
|
||||
${ACA_INC_DIR}/aca_fields.h
|
||||
${ACA_INC_DIR}/aca_tables.h
|
||||
${ACA_INC_DIR}/aca_version.h
|
||||
${ACA_INC_DIR}/boot_decode.h
|
||||
${ACA_INC_DIR}/error_map.h
|
||||
${ACA_INC_DIR}/json_printer.h
|
||||
${ACA_INC_DIR}/json_util.h
|
||||
${ACA_INC_DIR}/ras_decode_api.h
|
||||
${ACA_INC_DIR}/ras_decode_constants.h
|
||||
)
|
||||
if(ENABLE_ESMI_LIB)
|
||||
list(APPEND INC_LIST ${ESMI_INC_DIR}/e_smi/e_smi.h)
|
||||
list(APPEND INC_LIST ${ESMI_INC_DIR}/e_smi/e_smi_monitor.h)
|
||||
|
||||
@@ -1,94 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "aca_decode.h"
|
||||
#include "aca_constants.h"
|
||||
|
||||
int decode_afid(const uint64_t *register_array, size_t array_len, uint32_t flag, uint16_t hw_revision)
|
||||
{
|
||||
if (!register_array)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
aca_raw_data_t raw_data;
|
||||
|
||||
if (array_len == ACA_REGISTER_ARRAY_SIZE_32_BYTES) // 32 bytes
|
||||
{
|
||||
raw_data.aca_status = register_array[0];
|
||||
raw_data.aca_addr = register_array[1];
|
||||
raw_data.aca_ipid = register_array[2];
|
||||
raw_data.aca_synd = register_array[3];
|
||||
}
|
||||
else if (array_len == ACA_REGISTER_ARRAY_SIZE_128_BYTES) // 128 bytes
|
||||
{
|
||||
raw_data.aca_status = register_array[1];
|
||||
raw_data.aca_addr = register_array[2];
|
||||
raw_data.aca_ipid = register_array[5];
|
||||
raw_data.aca_synd = register_array[6];
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
return -1; // Unsupported size
|
||||
}
|
||||
|
||||
raw_data.flags = flag;
|
||||
raw_data.hw_revision = hw_revision;
|
||||
|
||||
aca_error_info_t error_info = aca_decode(&raw_data);
|
||||
return error_info.afid;
|
||||
}
|
||||
|
||||
aca_error_info_t decode_error_info(const uint64_t *register_array, size_t array_len, uint32_t flag, uint16_t hw_revision)
|
||||
{
|
||||
aca_raw_data_t raw_data = {0};
|
||||
aca_error_info_t error_info = {0};
|
||||
|
||||
if (!register_array)
|
||||
{
|
||||
return error_info;
|
||||
} if (array_len == ACA_REGISTER_ARRAY_SIZE_32_BYTES) // 32 bytes
|
||||
{
|
||||
raw_data.aca_status = register_array[0];
|
||||
raw_data.aca_addr = register_array[1];
|
||||
raw_data.aca_ipid = register_array[2];
|
||||
raw_data.aca_synd = register_array[3];
|
||||
}
|
||||
else if (array_len == ACA_REGISTER_ARRAY_SIZE_128_BYTES) // 128 bytes
|
||||
{
|
||||
raw_data.aca_status = register_array[1];
|
||||
raw_data.aca_addr = register_array[2];
|
||||
raw_data.aca_ipid = register_array[5];
|
||||
raw_data.aca_synd = register_array[6];
|
||||
}
|
||||
else
|
||||
{
|
||||
return error_info; // Return zero-initialized structure for unsupported size
|
||||
}
|
||||
|
||||
raw_data.flags = flag;
|
||||
raw_data.hw_revision = hw_revision;
|
||||
|
||||
return aca_decode(&raw_data);
|
||||
}
|
||||
|
||||
@@ -1,88 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "error_map.h"
|
||||
#include "aca_constants.h"
|
||||
#include <string.h>
|
||||
|
||||
#define AFID_VERSION "0.7"
|
||||
|
||||
static const error_map_entry_t error_map[] = {
|
||||
{1, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_FW_LOAD, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
|
||||
{2, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_HBM_BIST_TEST, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
|
||||
{3, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_HBM_MEMORY_TEST, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
|
||||
{4, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_HBM_TRAINING, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
|
||||
{5, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_UNHANDLED, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
|
||||
{6, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_UNKNOWN_ERROR, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
|
||||
{7, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_USR_CP_LINK_TRAINING, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
|
||||
{8, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_USR_DP_LINK_TRAINING, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
|
||||
{9, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_WAFL_LINK_TRAINING, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
|
||||
{10, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_XGMI_LINK_TRAINING, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
|
||||
{11, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_BOOT_CONTROLLER_DATA_ABORT, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
|
||||
{12, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_BOOT_CONTROLLER_GENERIC, ACA_PROTOCOL_CPER_WITH_SPACE, ACA_SEVERITY_FAIL_TO_INIT},
|
||||
{13, ACA_CATEGORY_OFF_PACKAGE_LINK_ERRORS, ACA_ERROR_TYPE_PCIE_AER, ACA_PROTOCOL_CPER, ACA_SEVERITY_CORRECTED},
|
||||
{14, ACA_CATEGORY_OFF_PACKAGE_LINK_ERRORS, ACA_ERROR_TYPE_PCIE_AER, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
|
||||
{15, ACA_CATEGORY_OFF_PACKAGE_LINK_ERRORS, ACA_ERROR_TYPE_WAFL, ACA_PROTOCOL_CPER, ACA_SEVERITY_CORRECTED},
|
||||
{16, ACA_CATEGORY_OFF_PACKAGE_LINK_ERRORS, ACA_ERROR_TYPE_WAFL, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
|
||||
{17, ACA_CATEGORY_OFF_PACKAGE_LINK_ERRORS, ACA_ERROR_TYPE_XGMI, ACA_PROTOCOL_CPER, ACA_SEVERITY_CORRECTED},
|
||||
{18, ACA_CATEGORY_OFF_PACKAGE_LINK_ERRORS, ACA_ERROR_TYPE_XGMI, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
|
||||
{19, ACA_CATEGORY_HBM_ERRORS, ACA_ERROR_TYPE_BAD_PAGE_RETIREMENT_THRESHOLD, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
|
||||
{20, ACA_CATEGORY_HBM_ERRORS, ACA_ERROR_TYPE_ON_DIE_ECC, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
|
||||
{21, ACA_CATEGORY_HBM_ERRORS, ACA_ERROR_TYPE_END_TO_END_CRC, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
|
||||
{22, ACA_CATEGORY_HBM_ERRORS, ACA_ERROR_TYPE_ON_DIE_ECC, ACA_PROTOCOL_CPER, ACA_SEVERITY_UNCORRECTED_NON_FATAL},
|
||||
{23, ACA_CATEGORY_HBM_ERRORS, ACA_ERROR_TYPE_END_TO_END_CRC, ACA_PROTOCOL_CPER, ACA_SEVERITY_UNCORRECTED_NON_FATAL},
|
||||
{24, ACA_CATEGORY_HBM_ERRORS, ACA_ERROR_TYPE_ALL, ACA_PROTOCOL_CPER, ACA_SEVERITY_CORRECTED},
|
||||
{25, ACA_CATEGORY_HBM_ERRORS, ACA_ERROR_TYPE_ALL_OTHERS, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
|
||||
{26, ACA_CATEGORY_DEVICE_INTERNAL_ERRORS, ACA_ERROR_TYPE_HARDWARE_ASSERTION, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
|
||||
{27, ACA_CATEGORY_DEVICE_INTERNAL_ERRORS, ACA_ERROR_TYPE_WATCHDOG_TIMEOUT, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
|
||||
{28, ACA_CATEGORY_DEVICE_INTERNAL_ERRORS, ACA_ERROR_TYPE_ALL_OTHERS, ACA_PROTOCOL_CPER, ACA_SEVERITY_UNCORRECTED_NON_FATAL},
|
||||
{29, ACA_CATEGORY_DEVICE_INTERNAL_ERRORS, ACA_ERROR_TYPE_ALL_OTHERS, ACA_PROTOCOL_CPER, ACA_SEVERITY_CORRECTED},
|
||||
{30, ACA_CATEGORY_DEVICE_INTERNAL_ERRORS, ACA_ERROR_TYPE_ALL_OTHERS, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
|
||||
{31, ACA_CATEGORY_CPER_FORMAT, ACA_ERROR_TYPE_MALFORMED_CPER, ACA_PROTOCOL_CPER, ACA_SEVERITY_ALL_CAPS},
|
||||
{32, ACA_CATEGORY_CPER_FORMAT, ACA_ERROR_TYPE_INCOMPLETE_ACA_DATA, ACA_PROTOCOL_CPER, ACA_SEVERITY_ALL_CAPS},
|
||||
{33, ACA_CATEGORY_CPER_FORMAT, ACA_ERROR_TYPE_INVALID_ACA_DATA, ACA_PROTOCOL_CPER, ACA_SEVERITY_ALL_CAPS},
|
||||
{34, ACA_CATEGORY_UNIDENTIFIED_ERRORS, ACA_ERROR_TYPE_UNIDENTIFIED_ERROR, ACA_PROTOCOL_CPER, ACA_SEVERITY_ALL_CAPS}};
|
||||
|
||||
static const size_t NUM_ERROR_ENTRIES = sizeof(error_map) / sizeof(error_map[0]);
|
||||
|
||||
int get_error_id(const char *error_category, const char *error_type, const char *error_severity)
|
||||
{
|
||||
if (!error_category || !error_type || !error_severity ||
|
||||
strcmp(error_category, ACA_SEVERITY_UNKNOWN) == 0 ||
|
||||
strcmp(error_type, ACA_SEVERITY_UNKNOWN) == 0 ||
|
||||
strcmp(error_severity, ACA_SEVERITY_UNKNOWN) == 0)
|
||||
{
|
||||
return ACA_ERROR_INVALID_ACA_DATA_ID; // Return ID for "Invalid Error" if any input is "UNKNOWN" or NULL
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < NUM_ERROR_ENTRIES; i++)
|
||||
{
|
||||
if (strcmp(error_map[i].error_category, error_category) == 0 &&
|
||||
strcmp(error_map[i].error_type, error_type) == 0 &&
|
||||
strcmp(error_map[i].error_severity, error_severity) == 0)
|
||||
{
|
||||
return (int)error_map[i].id;
|
||||
}
|
||||
}
|
||||
|
||||
return ACA_ERROR_UNIDENTIFIED_ERROR_ID; // Return ID for "Unidentified Errors" if no match found
|
||||
}
|
||||
@@ -1,148 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file main.c
|
||||
* @brief Demo program showing how to use the ACA decoder
|
||||
*
|
||||
* This is a demonstration program that shows how to use the ACA decoder
|
||||
* with sample raw data to decode ACA error information.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <aca_api.h>
|
||||
#include <aca_version.h>
|
||||
#include <aca_constants.h>
|
||||
#include <stdint.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
// Function prototype
|
||||
void print_error_info(const aca_error_info_t *info);
|
||||
void print_version_info(void);
|
||||
|
||||
// Function to print error info in JSON format
|
||||
void print_error_info(const aca_error_info_t *info)
|
||||
{
|
||||
printf("{\n");
|
||||
printf(" \"bank\": \"%s\",\n", info->bank_ref);
|
||||
printf(" \"error_location\": {\n");
|
||||
printf(" \"oam\": \"%d\",\n", info->oam);
|
||||
printf(" \"aid\": \"%d\",\n", info->aid);
|
||||
printf(" \"instance\": \"%s\"\n", info->instance_ref);
|
||||
printf(" },\n");
|
||||
printf(" \"severity\": \"%s\",\n", info->severity_ref);
|
||||
printf(" \"afid\": \"%d\",\n", info->afid);
|
||||
printf(" \"scrub\": \"%u\",\n", info->scrub);
|
||||
printf(" \"err_ext\": \"%u\",\n", info->error_code_ext);
|
||||
printf(" \"error_category\": \"%s\",\n", info->category_ref);
|
||||
printf(" \"error_type\": \"%s\",\n", info->error_type_ref);
|
||||
printf(" \"address\": \"0x%" PRIx64 "\",\n", info->raw_addr);
|
||||
printf(" \"syndrome\": \"0x%" PRIx64 "\"\n", info->raw_synd);
|
||||
printf("}\n");
|
||||
}
|
||||
|
||||
// Function to print version information
|
||||
void print_version_info(void)
|
||||
{
|
||||
printf("=== ACA Decoder Library Version Information ===\n");
|
||||
printf("Version: %s\n", aca_get_version_string());
|
||||
printf("Major: %d\n", aca_get_version_major());
|
||||
printf("Minor: %d\n", aca_get_version_minor());
|
||||
printf("Patch: %d\n", aca_get_version_patch());
|
||||
|
||||
aca_version_info_t version_info = aca_get_version_info();
|
||||
printf("Complete version info:\n");
|
||||
printf(" Major: %d\n", version_info.major);
|
||||
printf(" Minor: %d\n", version_info.minor);
|
||||
printf(" Patch: %d\n", version_info.patch);
|
||||
printf(" String: %s\n", version_info.string);
|
||||
printf("===============================================\n\n");
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
// Display version information
|
||||
print_version_info();
|
||||
|
||||
// Sample usage of decode_afid with 32-byte register array (HBM FATAL ERROR, expected output is 4)
|
||||
uint64_t register_array_32[ACA_REGISTER_ARRAY_SIZE_32_BYTES] = {0xbaa000000004081b, 0x0, 0x209600090f00, 0x5d000000};
|
||||
int afid_32 = decode_afid(register_array_32, ACA_REGISTER_ARRAY_SIZE_32_BYTES, 0, 1);
|
||||
printf("Decoded AFID (32-byte array): %d\n", afid_32);
|
||||
|
||||
// Sample usage of decode_afid with 32-byte register array (GC FATAL ERROR, expected output is 3)
|
||||
uint64_t register_array_test[ACA_REGISTER_ARRAY_SIZE_32_BYTES] = {0xbea00000003b0000, 0x100000029, 0x1200136430400, 0x20b};
|
||||
int afid_test = decode_afid(register_array_test, ACA_REGISTER_ARRAY_SIZE_32_BYTES, 0, 1);
|
||||
printf("Decoded AFID (test array): %d\n", afid_test);
|
||||
|
||||
// Sample usage of decode_afid with 128-byte register array (HBM CORRECTED ERROR, expected output is 1)
|
||||
uint64_t register_array_128[ACA_REGISTER_ARRAY_SIZE_128_BYTES] = {
|
||||
0xffff,
|
||||
0xdc2040000000011b,
|
||||
0x0,
|
||||
0xd008000801000000,
|
||||
0x25000001ff,
|
||||
0x209600191f00,
|
||||
0xa000000,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0xd008000801000000,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0};
|
||||
int afid_128 = decode_afid(register_array_128, ACA_REGISTER_ARRAY_SIZE_128_BYTES, 0, 1);
|
||||
printf("Decoded AFID (128-byte array): %d\n", afid_128);
|
||||
|
||||
// sample for bad page
|
||||
uint64_t register_array_bad_page[ACA_REGISTER_ARRAY_SIZE_128_BYTES] = {
|
||||
0x1,
|
||||
0xb000000000000137,
|
||||
0x0,
|
||||
0x0,
|
||||
0x1ff00000002,
|
||||
0x9600000000,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0};
|
||||
|
||||
// when flag is 0b1000, it indicates that the error threshold has been exceeded
|
||||
// and is always a HBM error. The expected output is 19.
|
||||
int afid_bad_page = decode_afid(register_array_bad_page, ACA_REGISTER_ARRAY_SIZE_128_BYTES, ACA_FLAG_THRESHOLD_EXCEEDED, 1);
|
||||
printf("Decoded AFID (bad page): %d\n", afid_bad_page);
|
||||
|
||||
const aca_error_info_t error_info_32 = decode_error_info(register_array_32, ACA_REGISTER_ARRAY_SIZE_32_BYTES, 0, 1);
|
||||
print_error_info(&error_info_32);
|
||||
|
||||
const aca_error_info_t error_info_128 = decode_error_info(register_array_128, ACA_REGISTER_ARRAY_SIZE_128_BYTES, 0, 1);
|
||||
print_error_info(&error_info_128);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -29,7 +29,7 @@
|
||||
#include <sstream>
|
||||
|
||||
extern "C" {
|
||||
#include "aca-decode/aca_decode.h"
|
||||
#include "ras-decode/aca_decode.h"
|
||||
}
|
||||
#include "amd_smi/impl/amd_smi_cper.h"
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
@@ -254,16 +254,16 @@ static int cper_dump_sec_desc(const struct cper_sec_desc *desc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aca_decode_fatal(const cper_sec_crashdump_data &data, uint32_t flag, uint16_t hw_revision)
|
||||
static int aca_decode_fatal(const cper_sec_crashdump_data &data, uint32_t flag, uint16_t hw_revision, uint16_t register_context_type)
|
||||
{
|
||||
const uint64_t *register_array = reinterpret_cast<const uint64_t *>(&data.dump.fatal_err);
|
||||
return decode_afid(register_array, sizeof(data.dump.fatal_err)/sizeof(uint64_t), flag, hw_revision);
|
||||
return decode_afid(register_array, sizeof(data.dump.fatal_err)/sizeof(uint64_t), flag, hw_revision, register_context_type);
|
||||
}
|
||||
|
||||
static int aca_decode_corrected_error(const uint32_t *reg_dump, size_t num_bytes, uint32_t flag, uint16_t hw_revision)
|
||||
static int aca_decode_corrected_error(const uint32_t *reg_dump, size_t num_bytes, uint32_t flag, uint16_t hw_revision, uint16_t register_context_type)
|
||||
{
|
||||
const uint64_t *register_array = reinterpret_cast<const uint64_t *>(reg_dump);
|
||||
return decode_afid(register_array, num_bytes, flag, hw_revision);
|
||||
return decode_afid(register_array, num_bytes, flag, hw_revision, register_context_type);
|
||||
}
|
||||
|
||||
static int cper_dump_nonstd_err(const struct cper_sec_nonstd_err *nonstd_err, const cper_sec_desc *section)
|
||||
@@ -299,7 +299,7 @@ exit:
|
||||
LOG_DEBUG(ss);
|
||||
|
||||
return aca_decode_corrected_error(body->err_ctx.reg_dump, sizeof(body->err_ctx.reg_dump)/sizeof(uint64_t),
|
||||
section->flags_mask, section->revision_major);
|
||||
section->flags_mask, section->revision_major, body->err_ctx.reg_ctx_type);
|
||||
}
|
||||
|
||||
static int cper_dump_cr_fatal(const struct cper_sec_crashdump *crashdump, const cper_sec_desc *section)
|
||||
@@ -320,7 +320,7 @@ static int cper_dump_cr_fatal(const struct cper_sec_crashdump *crashdump, const
|
||||
|
||||
LOG_DEBUG(ss);
|
||||
|
||||
return aca_decode_fatal(crashdump->data, section->flags_mask, section->revision_major);
|
||||
return aca_decode_fatal(crashdump->data, section->flags_mask, section->revision_major, crashdump->data.reg_ctx_type);
|
||||
}
|
||||
|
||||
static int cper_dump_cr_boot(const struct cper_sec_crashdump *crashdump, const cper_sec_desc *section)
|
||||
@@ -335,7 +335,7 @@ static int cper_dump_cr_boot(const struct cper_sec_crashdump *crashdump, const c
|
||||
ss << "~~~~CRASH DUMP - BOOT TIME~~~\n\n";
|
||||
LOG_DEBUG(ss);
|
||||
|
||||
return aca_decode_fatal(crashdump->data, section->flags_mask, section->revision_major);
|
||||
return aca_decode_fatal(crashdump->data, section->flags_mask, section->revision_major, crashdump->data.reg_ctx_type);
|
||||
}
|
||||
|
||||
} //namespace
|
||||
|
||||
+135
-60
@@ -32,8 +32,11 @@
|
||||
#include "aca_decode.h"
|
||||
#include "aca_tables.h"
|
||||
#include "error_map.h"
|
||||
#include "aca_constants.h"
|
||||
#include "ras_decode_constants.h"
|
||||
#include "json_util.h"
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
/**
|
||||
* @brief Gets the bank name based on hardware ID and ACA type
|
||||
@@ -61,18 +64,18 @@ aca_decoder_get_bank(const aca_decoder_t *decoder, const char **bank_name)
|
||||
static const char *get_error_severity(const aca_status_fields_t *status)
|
||||
{
|
||||
if (status->poison)
|
||||
return ACA_SEVERITY_UNCORRECTED_NON_FATAL;
|
||||
return RAS_DECODE_SEVERITY_UNCORRECTED_NON_FATAL;
|
||||
if (status->pcc)
|
||||
return ACA_SEVERITY_FATAL;
|
||||
return RAS_DECODE_SEVERITY_FATAL;
|
||||
if (!status->pcc && status->uc && status->tcc)
|
||||
return ACA_SEVERITY_FATAL;
|
||||
return RAS_DECODE_SEVERITY_FATAL;
|
||||
if (!status->pcc && status->uc && !status->tcc)
|
||||
return ACA_SEVERITY_UNCORRECTED_NON_FATAL;
|
||||
return RAS_DECODE_SEVERITY_UNCORRECTED_NON_FATAL;
|
||||
if (!status->pcc && !status->uc && !status->tcc && status->deferred)
|
||||
return ACA_SEVERITY_UNCORRECTED_NON_FATAL;
|
||||
return RAS_DECODE_SEVERITY_UNCORRECTED_NON_FATAL;
|
||||
if (!status->pcc && !status->uc && !status->tcc && !status->deferred)
|
||||
return ACA_SEVERITY_CORRECTED;
|
||||
return ACA_SEVERITY_UNKNOWN;
|
||||
return RAS_DECODE_SEVERITY_CORRECTED;
|
||||
return RAS_DECODE_SEVERITY_UNKNOWN;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -85,31 +88,31 @@ static const char *get_error_category(const char *bank, const char *error_type)
|
||||
{
|
||||
if (!bank || !error_type)
|
||||
{
|
||||
return ACA_SEVERITY_UNKNOWN;
|
||||
return RAS_DECODE_SEVERITY_UNKNOWN;
|
||||
}
|
||||
|
||||
if (strcmp(bank, ACA_BANK_UMC) == 0)
|
||||
if (strcmp(bank, RAS_DECODE_BANK_UMC) == 0)
|
||||
{
|
||||
if (strcmp(error_type, ACA_ERROR_TYPE_ON_DIE_ECC) == 0 ||
|
||||
if (strcmp(error_type, RAS_DECODE_ERROR_TYPE_ON_DIE_ECC) == 0 ||
|
||||
strcmp(error_type, "WriteDataPoisonErr") == 0 ||
|
||||
strcmp(error_type, "AddressCommandParityErr") == 0 ||
|
||||
strcmp(error_type, "WriteDataCrcErr") == 0 ||
|
||||
strcmp(error_type, "EcsErr") == 0 ||
|
||||
strcmp(error_type, "RdCrcErr") == 0 ||
|
||||
strcmp(error_type, ACA_ERROR_TYPE_END_TO_END_CRC) == 0)
|
||||
strcmp(error_type, RAS_DECODE_ERROR_TYPE_END_TO_END_CRC) == 0)
|
||||
{
|
||||
return ACA_CATEGORY_HBM_ERRORS;
|
||||
return RAS_DECODE_CATEGORY_HBM_ERRORS;
|
||||
}
|
||||
}
|
||||
else if (strcmp(bank, ACA_BANK_PCS_XGMI) == 0 ||
|
||||
strcmp(bank, ACA_BANK_KPX_SERDES) == 0 ||
|
||||
strcmp(bank, ACA_BANK_KPX_WAFL) == 0 ||
|
||||
(strcmp(bank, ACA_BANK_PSP) == 0 && strcmp(error_type, ACA_ERROR_TYPE_WAFL) == 0))
|
||||
else if (strcmp(bank, RAS_DECODE_BANK_PCS_XGMI) == 0 ||
|
||||
strcmp(bank, RAS_DECODE_BANK_KPX_SERDES) == 0 ||
|
||||
strcmp(bank, RAS_DECODE_BANK_KPX_WAFL) == 0 ||
|
||||
(strcmp(bank, RAS_DECODE_BANK_PSP) == 0 && strcmp(error_type, RAS_DECODE_ERROR_TYPE_WAFL) == 0))
|
||||
{
|
||||
return ACA_CATEGORY_OFF_PACKAGE_LINK_ERRORS;
|
||||
return RAS_DECODE_CATEGORY_OFF_PACKAGE_LINK_ERRORS;
|
||||
}
|
||||
|
||||
return ACA_CATEGORY_DEVICE_INTERNAL_ERRORS;
|
||||
return RAS_DECODE_CATEGORY_DEVICE_INTERNAL_ERRORS;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -125,55 +128,55 @@ static int get_service_error_type(const char *error_category, const char *error_
|
||||
const char *error_severity, const char **service_error_type)
|
||||
{
|
||||
if (!error_category || !error_type || !error_severity || !service_error_type ||
|
||||
strcmp(error_category, ACA_SEVERITY_UNKNOWN) == 0 ||
|
||||
strcmp(error_type, ACA_SEVERITY_UNKNOWN) == 0 ||
|
||||
strcmp(error_severity, ACA_SEVERITY_UNKNOWN) == 0)
|
||||
strcmp(error_category, RAS_DECODE_SEVERITY_UNKNOWN) == 0 ||
|
||||
strcmp(error_type, RAS_DECODE_SEVERITY_UNKNOWN) == 0 ||
|
||||
strcmp(error_severity, RAS_DECODE_SEVERITY_UNKNOWN) == 0)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
if (strcmp(error_type, ACA_ERROR_TYPE_BAD_PAGE_RETIREMENT_THRESHOLD) == 0)
|
||||
if (strcmp(error_type, RAS_DECODE_ERROR_TYPE_BAD_PAGE_RETIREMENT_THRESHOLD) == 0)
|
||||
{
|
||||
*service_error_type = ACA_ERROR_TYPE_BAD_PAGE_RETIREMENT_THRESHOLD;
|
||||
*service_error_type = RAS_DECODE_ERROR_TYPE_BAD_PAGE_RETIREMENT_THRESHOLD;
|
||||
return 0;
|
||||
}
|
||||
if ((strcmp(error_category, ACA_CATEGORY_HBM_ERRORS) == 0) && (strcmp(error_severity, ACA_SEVERITY_CORRECTED) == 0))
|
||||
if ((strcmp(error_category, RAS_DECODE_CATEGORY_HBM_ERRORS) == 0) && (strcmp(error_severity, RAS_DECODE_SEVERITY_CORRECTED) == 0))
|
||||
{
|
||||
*service_error_type = ACA_ERROR_TYPE_ALL;
|
||||
*service_error_type = RAS_DECODE_ERROR_TYPE_ALL;
|
||||
return 0;
|
||||
}
|
||||
if (strcmp(error_type, "RdCrcErr") == 0)
|
||||
{
|
||||
*service_error_type = ACA_ERROR_TYPE_END_TO_END_CRC;
|
||||
*service_error_type = RAS_DECODE_ERROR_TYPE_END_TO_END_CRC;
|
||||
return 0;
|
||||
}
|
||||
if ((strcmp(error_category, ACA_CATEGORY_HBM_ERRORS) == 0) && (strcmp(error_severity, ACA_SEVERITY_FATAL) == 0) &&
|
||||
(strcmp(error_type, ACA_ERROR_TYPE_ON_DIE_ECC) != 0) && (strcmp(error_type, ACA_ERROR_TYPE_END_TO_END_CRC) != 0))
|
||||
if ((strcmp(error_category, RAS_DECODE_CATEGORY_HBM_ERRORS) == 0) && (strcmp(error_severity, RAS_DECODE_SEVERITY_FATAL) == 0) &&
|
||||
(strcmp(error_type, RAS_DECODE_ERROR_TYPE_ON_DIE_ECC) != 0) && (strcmp(error_type, RAS_DECODE_ERROR_TYPE_END_TO_END_CRC) != 0))
|
||||
{
|
||||
*service_error_type = ACA_ERROR_TYPE_ALL_OTHERS;
|
||||
*service_error_type = RAS_DECODE_ERROR_TYPE_ALL_OTHERS;
|
||||
return 0;
|
||||
}
|
||||
if (strcmp(error_category, ACA_CATEGORY_DEVICE_INTERNAL_ERRORS) == 0)
|
||||
if (strcmp(error_category, RAS_DECODE_CATEGORY_DEVICE_INTERNAL_ERRORS) == 0)
|
||||
{
|
||||
if ((strcmp(error_severity, ACA_SEVERITY_UNCORRECTED_NON_FATAL) == 0 ||
|
||||
strcmp(error_severity, ACA_SEVERITY_CORRECTED) == 0 ||
|
||||
strcmp(error_severity, ACA_SEVERITY_FATAL) == 0) &&
|
||||
strcmp(error_type, ACA_ERROR_TYPE_HARDWARE_ASSERTION) != 0 &&
|
||||
strcmp(error_type, ACA_ERROR_TYPE_WATCHDOG_TIMEOUT) != 0)
|
||||
if ((strcmp(error_severity, RAS_DECODE_SEVERITY_UNCORRECTED_NON_FATAL) == 0 ||
|
||||
strcmp(error_severity, RAS_DECODE_SEVERITY_CORRECTED) == 0 ||
|
||||
strcmp(error_severity, RAS_DECODE_SEVERITY_FATAL) == 0) &&
|
||||
strcmp(error_type, RAS_DECODE_ERROR_TYPE_HARDWARE_ASSERTION) != 0 &&
|
||||
strcmp(error_type, RAS_DECODE_ERROR_TYPE_WATCHDOG_TIMEOUT) != 0)
|
||||
{
|
||||
*service_error_type = ACA_ERROR_TYPE_ALL_OTHERS;
|
||||
*service_error_type = RAS_DECODE_ERROR_TYPE_ALL_OTHERS;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (strcmp(error_category, ACA_CATEGORY_OFF_PACKAGE_LINK_ERRORS) == 0)
|
||||
if (strcmp(error_category, RAS_DECODE_CATEGORY_OFF_PACKAGE_LINK_ERRORS) == 0)
|
||||
{
|
||||
if (strcmp(error_bank, ACA_BANK_PCS_XGMI) == 0)
|
||||
if (strcmp(error_bank, RAS_DECODE_BANK_PCS_XGMI) == 0)
|
||||
{
|
||||
*service_error_type = ACA_ERROR_TYPE_XGMI;
|
||||
*service_error_type = RAS_DECODE_ERROR_TYPE_XGMI;
|
||||
return 0;
|
||||
}
|
||||
if (strcmp(error_bank, ACA_BANK_KPX_WAFL) == 0)
|
||||
if (strcmp(error_bank, RAS_DECODE_BANK_KPX_WAFL) == 0)
|
||||
{
|
||||
*service_error_type = ACA_ERROR_TYPE_WAFL;
|
||||
*service_error_type = RAS_DECODE_ERROR_TYPE_WAFL;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@@ -199,13 +202,15 @@ static void aca_decoder_get_error_info(const aca_decoder_t *decoder, aca_error_i
|
||||
info->raw_synd = decoder->aca_synd;
|
||||
|
||||
info->scrub = decoder->status.scrub;
|
||||
info->poison = decoder->status.poison;
|
||||
info->deferred = decoder->status.deferred;
|
||||
info->error_code_ext = decoder->status.error_code_ext;
|
||||
|
||||
|
||||
result = aca_decoder_get_bank(decoder, &bank);
|
||||
if (result < 0)
|
||||
{
|
||||
bank = ACA_SEVERITY_UNKNOWN;
|
||||
bank = RAS_DECODE_SEVERITY_UNKNOWN;
|
||||
}
|
||||
info->bank_ref = bank;
|
||||
|
||||
@@ -215,13 +220,13 @@ static void aca_decoder_get_error_info(const aca_decoder_t *decoder, aca_error_i
|
||||
}
|
||||
else
|
||||
{
|
||||
info->instance_ref = ACA_ERROR_TYPE_DECODE_INAPPLICABLE;
|
||||
info->instance_ref = RAS_DECODE_ERROR_TYPE_DECODE_INAPPLICABLE;
|
||||
}
|
||||
|
||||
// 0b1000 indicate error threshold has been exceeded, and is always fatal
|
||||
if (decoder->flags & ACA_FLAG_THRESHOLD_EXCEEDED)
|
||||
if (decoder->flags & RAS_DECODE_FLAG_THRESHOLD_EXCEEDED)
|
||||
{
|
||||
info->severity_ref = ACA_SEVERITY_FATAL;
|
||||
info->severity_ref = RAS_DECODE_SEVERITY_FATAL;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -242,31 +247,31 @@ static void aca_decoder_get_error_info(const aca_decoder_t *decoder, aca_error_i
|
||||
info->aid = -1; // Invalid value
|
||||
}
|
||||
|
||||
if (decoder->status.error_code_ext >= ACA_ERROR_CODE_EXT_MIN && decoder->status.error_code_ext <= ACA_ERROR_CODE_EXT_MAX)
|
||||
if (decoder->status.error_code_ext >= RAS_DECODE_ERROR_CODE_EXT_MIN && decoder->status.error_code_ext <= RAS_DECODE_ERROR_CODE_EXT_MAX)
|
||||
{
|
||||
uint32_t instance_id = decoder->ipid.instance_id_lo;
|
||||
uint32_t error_info = decoder->synd.error_information & 0xFF;
|
||||
|
||||
if ((instance_id == ACA_INSTANCE_ID_XCD0_400 || instance_id == ACA_INSTANCE_ID_XCD1_400 ||
|
||||
instance_id == ACA_INSTANCE_ID_XCD0_401 || instance_id == ACA_INSTANCE_ID_XCD1_401) &&
|
||||
if ((instance_id == RAS_DECODE_INSTANCE_ID_XCD0_400 || instance_id == RAS_DECODE_INSTANCE_ID_XCD1_400 ||
|
||||
instance_id == RAS_DECODE_INSTANCE_ID_XCD0_401 || instance_id == RAS_DECODE_INSTANCE_ID_XCD1_401) &&
|
||||
find_error_in_table(xcd_error_table, NUM_XCD_ERRORS, error_info, &error_type) == 0)
|
||||
{
|
||||
info->error_type_ref = error_type;
|
||||
}
|
||||
else if ((instance_id == ACA_INSTANCE_ID_AID_400 || instance_id == ACA_INSTANCE_ID_AID_401) &&
|
||||
else if ((instance_id == RAS_DECODE_INSTANCE_ID_AID_400 || instance_id == RAS_DECODE_INSTANCE_ID_AID_401) &&
|
||||
find_error_in_table(aid_error_table, NUM_AID_ERRORS, error_info, &error_type) == 0)
|
||||
{
|
||||
info->error_type_ref = error_type;
|
||||
}
|
||||
else
|
||||
{
|
||||
info->error_type_ref = ACA_SEVERITY_UNKNOWN;
|
||||
info->error_type_ref = RAS_DECODE_SEVERITY_UNKNOWN;
|
||||
}
|
||||
}
|
||||
// 0b1000 indicate error threshold has been exceeded
|
||||
else if (decoder->flags & ACA_FLAG_THRESHOLD_EXCEEDED)
|
||||
else if (decoder->flags & RAS_DECODE_FLAG_THRESHOLD_EXCEEDED)
|
||||
{
|
||||
info->error_type_ref = ACA_ERROR_TYPE_BAD_PAGE_RETIREMENT_THRESHOLD;
|
||||
info->error_type_ref = RAS_DECODE_ERROR_TYPE_BAD_PAGE_RETIREMENT_THRESHOLD;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -276,14 +281,14 @@ static void aca_decoder_get_error_info(const aca_decoder_t *decoder, aca_error_i
|
||||
}
|
||||
else
|
||||
{
|
||||
info->error_type_ref = ACA_SEVERITY_UNKNOWN;
|
||||
info->error_type_ref = RAS_DECODE_SEVERITY_UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
// 0b1000 indicate error threshold has been exceeded, and is always a HBM error
|
||||
if (decoder->flags & ACA_FLAG_THRESHOLD_EXCEEDED)
|
||||
if (decoder->flags & RAS_DECODE_FLAG_THRESHOLD_EXCEEDED)
|
||||
{
|
||||
info->category_ref = ACA_CATEGORY_HBM_ERRORS;
|
||||
info->category_ref = RAS_DECODE_CATEGORY_HBM_ERRORS;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -295,8 +300,6 @@ static void aca_decoder_get_error_info(const aca_decoder_t *decoder, aca_error_i
|
||||
{
|
||||
service_error = info->error_type_ref;
|
||||
}
|
||||
|
||||
info->afid = get_error_id(info->category_ref, service_error, info->severity_ref);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -324,8 +327,17 @@ static void aca_decoder_init(aca_decoder_t *decoder, uint16_t hw_revision, uint3
|
||||
aca_synd_init(&decoder->synd, synd_reg);
|
||||
}
|
||||
|
||||
aca_error_info_t aca_decode(const aca_raw_data_t *raw_data)
|
||||
/**
|
||||
* @brief Main decode function that processes raw ACA error data and returns JSON
|
||||
* @param[in] raw_data Pointer to structure containing raw ACA error data
|
||||
* @return JsonValue* containing the decoded error information, or NULL on failure
|
||||
*/
|
||||
JsonValue* aca_decode(const aca_raw_data_t *raw_data)
|
||||
{
|
||||
if (!raw_data) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
aca_decoder_t decoder = {0};
|
||||
aca_error_info_t info = {0};
|
||||
|
||||
@@ -337,5 +349,68 @@ aca_error_info_t aca_decode(const aca_raw_data_t *raw_data)
|
||||
raw_data->aca_synd);
|
||||
|
||||
aca_decoder_get_error_info(&decoder, &info);
|
||||
return info;
|
||||
|
||||
// Create the main JSON object
|
||||
JsonValue *json_obj = json_create_object();
|
||||
if (!json_obj) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Add bank
|
||||
json_object_set(json_obj, "bank", json_create_string(info.bank_ref));
|
||||
|
||||
// Create error_location object
|
||||
JsonValue *error_location = json_create_object();
|
||||
if (error_location) {
|
||||
char oam_str[16], aid_str[16];
|
||||
snprintf(oam_str, sizeof(oam_str), "%d", info.oam);
|
||||
snprintf(aid_str, sizeof(aid_str), "%d", info.aid);
|
||||
|
||||
json_object_set(error_location, "oam", json_create_string(oam_str));
|
||||
json_object_set(error_location, "aid", json_create_string(aid_str));
|
||||
json_object_set(error_location, "instance", json_create_string(info.instance_ref));
|
||||
|
||||
json_object_set(json_obj, "error_location", error_location);
|
||||
}
|
||||
|
||||
// Add severity
|
||||
json_object_set(json_obj, "severity", json_create_string(info.severity_ref));
|
||||
|
||||
// Add scrub as string
|
||||
char scrub_str[16];
|
||||
snprintf(scrub_str, sizeof(scrub_str), "%u", info.scrub);
|
||||
json_object_set(json_obj, "scrub", json_create_string(scrub_str));
|
||||
|
||||
// Add poison as string
|
||||
char poison_str[16];
|
||||
snprintf(poison_str, sizeof(poison_str), "%u", info.poison);
|
||||
json_object_set(json_obj, "poison", json_create_string(poison_str));
|
||||
|
||||
// Add deferred as string
|
||||
char deferred_str[16];
|
||||
snprintf(deferred_str, sizeof(deferred_str), "%u", info.deferred);
|
||||
json_object_set(json_obj, "deferred", json_create_string(deferred_str));
|
||||
|
||||
// Add err_ext as string
|
||||
char err_ext_str[16];
|
||||
snprintf(err_ext_str, sizeof(err_ext_str), "%u", info.error_code_ext);
|
||||
json_object_set(json_obj, "err_ext", json_create_string(err_ext_str));
|
||||
|
||||
// Add error_category
|
||||
json_object_set(json_obj, "error_category", json_create_string(info.category_ref));
|
||||
|
||||
// Add error_type
|
||||
json_object_set(json_obj, "error_type", json_create_string(info.error_type_ref));
|
||||
|
||||
// Add address as hex string
|
||||
char address_str[32];
|
||||
snprintf(address_str, sizeof(address_str), "0x%" PRIx64, info.raw_addr);
|
||||
json_object_set(json_obj, "address", json_create_string(address_str));
|
||||
|
||||
// Add syndrome as hex string
|
||||
char syndrome_str[32];
|
||||
snprintf(syndrome_str, sizeof(syndrome_str), "0x%" PRIx64, info.raw_synd);
|
||||
json_object_set(json_obj, "syndrome", json_create_string(syndrome_str));
|
||||
|
||||
return json_obj;
|
||||
}
|
||||
+1
-2
@@ -1,4 +1,3 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
@@ -21,7 +20,7 @@
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
/**
|
||||
* @file aca_fields.c
|
||||
* @brief Implementation of ACA register field handling
|
||||
*
|
||||
+16
-12
@@ -33,7 +33,7 @@
|
||||
*/
|
||||
|
||||
#include "aca_tables.h"
|
||||
#include "aca_constants.h"
|
||||
#include "ras_decode_constants.h"
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
@@ -80,19 +80,19 @@ const aca_error_type_t error_table[] = {
|
||||
{"cs", 0xe, "FTI_ND_ILL_REQ"},
|
||||
{"cs", 0xf, "FTI_ND_ADDR_VIOL"},
|
||||
{"cs", 0x10, "FTI_ND_SEC_VIOL"},
|
||||
{"cs", 0x11, ACA_ERROR_TYPE_HARDWARE_ASSERTION},
|
||||
{"cs", 0x11, RAS_DECODE_ERROR_TYPE_HARDWARE_ASSERTION},
|
||||
{"cs", 0x12, "ST_PRT_ERR"},
|
||||
{"cs", 0x13, "ST_ECC_ERR"},
|
||||
{"cs", 0x14, "ST_TXN_ERR"},
|
||||
{"pie", 0x0, ACA_ERROR_TYPE_HARDWARE_ASSERTION},
|
||||
{"pie", 0x0, RAS_DECODE_ERROR_TYPE_HARDWARE_ASSERTION},
|
||||
{"pie", 0x1, "CSW"},
|
||||
{"pie", 0x2, "GMI"},
|
||||
{"pie", 0x3, "FTI_DAT_STAT"},
|
||||
{"pie", 0x4, "DEF"},
|
||||
{"pie", 0x5, ACA_ERROR_TYPE_WATCHDOG_TIMEOUT},
|
||||
{"pie", 0x5, RAS_DECODE_ERROR_TYPE_WATCHDOG_TIMEOUT},
|
||||
{"pie", 0x6, "CNLI"},
|
||||
{"pie", 0x7, "RSLVFCI"},
|
||||
{"umc", 0x0, ACA_ERROR_TYPE_ON_DIE_ECC},
|
||||
{"umc", 0x0, RAS_DECODE_ERROR_TYPE_ON_DIE_ECC},
|
||||
{"umc", 0x1, "WriteDataPoisonErr"},
|
||||
{"umc", 0x2, "SdpParityErr"},
|
||||
{"umc", 0x4, "AddressCommandParityErr"},
|
||||
@@ -103,7 +103,7 @@ const aca_error_type_t error_table[] = {
|
||||
{"umc", 0xb, "RdCrcErr"},
|
||||
{"umc", 0xd, "MpFwErr"},
|
||||
{"umc", 0xe, "MpParErr"},
|
||||
{"umc", 0xf, ACA_ERROR_TYPE_END_TO_END_CRC},
|
||||
{"umc", 0xf, RAS_DECODE_ERROR_TYPE_END_TO_END_CRC},
|
||||
{"psp", 0x0, "Mp0HighSramError"},
|
||||
{"psp", 0x1, "Mp0LowSramError"},
|
||||
{"psp", 0x2, "Mp0IDataBank0Error"},
|
||||
@@ -127,7 +127,7 @@ const aca_error_type_t error_table[] = {
|
||||
{"psp", 0x3b, "SRAM_EDC"},
|
||||
{"psp", 0x3c, "SMN_Parity"},
|
||||
{"psp", 0x3d, "SMN_Timeout"},
|
||||
{"psp", 0x3f, ACA_ERROR_TYPE_WAFL},
|
||||
{"psp", 0x3f, RAS_DECODE_ERROR_TYPE_WAFL},
|
||||
{"smu", 0x0, "Mp5HighSramError"},
|
||||
{"smu", 0x1, "Mp5LowSramError"},
|
||||
{"smu", 0x2, "Mp5DCacheAError"},
|
||||
@@ -452,7 +452,11 @@ static const aca_instance_entry_t instance_table[] = {
|
||||
{"umc", 0x193F00, "ch7 umc0"},
|
||||
{"umc", 0x393F00, "ch7 umc1"},
|
||||
{"umc", 0x593F00, "ch7 umc2"},
|
||||
{"umc", 0x793F00, "ch7 umc3"}};
|
||||
{"umc", 0x793F00, "ch7 umc3"},
|
||||
{"pcs_xgmi", 0x11A09200, "serdes a pcs0"},
|
||||
{"pcs_xgmi", 0x12109200, "serdes b pcs7"},
|
||||
{"pcs_xgmi", 0x12209200, "serdes b pcs8"},
|
||||
{"pcs_xgmi", 0x11B09200, "xgmi pcs"}};
|
||||
|
||||
const size_t NUM_OAM_AID_ENTRIES = sizeof(oam_aid_table) / sizeof(oam_aid_table[0]);
|
||||
const size_t NUM_BANKS = sizeof(bank_table) / sizeof(bank_table[0]);
|
||||
@@ -478,7 +482,7 @@ int find_bank_name(uint16_t hw_id, uint16_t aca_type, const char **bank_name)
|
||||
}
|
||||
}
|
||||
|
||||
*bank_name = ACA_SEVERITY_UNKNOWN;
|
||||
*bank_name = RAS_DECODE_SEVERITY_UNKNOWN;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -499,7 +503,7 @@ int find_error_type_by_bank(const char *bank, uint32_t error_code, const char **
|
||||
}
|
||||
}
|
||||
|
||||
*error_type = ACA_SEVERITY_UNKNOWN;
|
||||
*error_type = RAS_DECODE_SEVERITY_UNKNOWN;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -520,7 +524,7 @@ int find_error_in_table(const aca_error_entry_t *table, size_t table_size,
|
||||
}
|
||||
}
|
||||
|
||||
*error_type = ACA_SEVERITY_UNKNOWN;
|
||||
*error_type = RAS_DECODE_SEVERITY_UNKNOWN;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -556,6 +560,6 @@ int find_instance_name(const char *bank, uint32_t instance_id_lo, const char **i
|
||||
}
|
||||
}
|
||||
|
||||
*instance_name = ACA_SEVERITY_UNKNOWN;
|
||||
*instance_name = RAS_DECODE_SEVERITY_UNKNOWN;
|
||||
return 1;
|
||||
}
|
||||
+8
-8
@@ -26,32 +26,32 @@
|
||||
|
||||
int aca_get_version_major(void)
|
||||
{
|
||||
return ACA_VERSION_MAJOR;
|
||||
return RAS_DECODE_VERSION_MAJOR;
|
||||
}
|
||||
|
||||
int aca_get_version_minor(void)
|
||||
{
|
||||
return ACA_VERSION_MINOR;
|
||||
return RAS_DECODE_VERSION_MINOR;
|
||||
}
|
||||
|
||||
int aca_get_version_patch(void)
|
||||
{
|
||||
return ACA_VERSION_PATCH;
|
||||
return RAS_DECODE_VERSION_PATCH;
|
||||
}
|
||||
|
||||
const char *aca_get_version_string(void)
|
||||
{
|
||||
return ACA_VERSION_STRING;
|
||||
return RAS_DECODE_VERSION_STRING;
|
||||
}
|
||||
|
||||
aca_version_info_t aca_get_version_info(void)
|
||||
{
|
||||
aca_version_info_t info;
|
||||
|
||||
info.major = ACA_VERSION_MAJOR;
|
||||
info.minor = ACA_VERSION_MINOR;
|
||||
info.patch = ACA_VERSION_PATCH;
|
||||
info.string = ACA_VERSION_STRING;
|
||||
info.major = RAS_DECODE_VERSION_MAJOR;
|
||||
info.minor = RAS_DECODE_VERSION_MINOR;
|
||||
info.patch = RAS_DECODE_VERSION_PATCH;
|
||||
info.string = RAS_DECODE_VERSION_STRING;
|
||||
|
||||
return info;
|
||||
}
|
||||
@@ -0,0 +1,862 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "boot_decode.h"
|
||||
#include "ras_decode_constants.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// Boot decoder mapping tables
|
||||
static const boot_decoder_entry_t boot_decoder_map_v0[] = {
|
||||
{BOOT_ENCODING_HBM_TRAINING, decode_hbm_training_v0},
|
||||
{BOOT_ENCODING_FW_LOAD, decode_fw_load_v0},
|
||||
{BOOT_ENCODING_WAFL_LINK, decode_wafl_link_training_v0},
|
||||
{BOOT_ENCODING_XGMI_LINK, decode_xgmi_link_training_v0},
|
||||
{BOOT_ENCODING_USR_CP_LINK, decode_usr_cp_link_training_v0},
|
||||
{BOOT_ENCODING_USR_DP_LINK, decode_usr_dp_link_training_v0},
|
||||
{BOOT_ENCODING_HBM_MEM_TEST, decode_hbm_mem_test_v0},
|
||||
{BOOT_ENCODING_HBM_BIST_TEST, decode_hbm_bist_test_v0},
|
||||
{BOOT_ENCODING_BOOT_CTRL_GEN_V0, decode_boot_controller_generic_v0},
|
||||
{0, NULL} // Sentinel
|
||||
};
|
||||
|
||||
static const boot_decoder_entry_t boot_decoder_map_v1[] = {
|
||||
{BOOT_ENCODING_HBM_TRAINING, decode_hbm_training_v1},
|
||||
{BOOT_ENCODING_FW_LOAD, decode_fw_load_v1},
|
||||
{BOOT_ENCODING_WAFL_LINK, decode_wafl_link_training_v1},
|
||||
{BOOT_ENCODING_XGMI_LINK, decode_xgmi_link_training_v1},
|
||||
{BOOT_ENCODING_USR_CP_LINK, decode_usr_cp_link_training_v1},
|
||||
{BOOT_ENCODING_USR_DP_LINK, decode_usr_dp_link_training_v1},
|
||||
{BOOT_ENCODING_HBM_MEM_TEST, decode_hbm_mem_test_v1},
|
||||
{BOOT_ENCODING_HBM_BIST_TEST, decode_hbm_bist_test_v1},
|
||||
{BOOT_ENCODING_BOOT_CTRL_GEN_V1, decode_boot_controller_generic_v1},
|
||||
{BOOT_ENCODING_DATA_ABORT, decode_data_abort_v1},
|
||||
{BOOT_SUCCESS_ENCODING, decode_boot_success_v1},
|
||||
{0, NULL} // Sentinel
|
||||
};
|
||||
|
||||
int get_boot_version(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return 0;
|
||||
return extract_byte(msg->value, 1) >> 5;
|
||||
}
|
||||
|
||||
int get_error_encoding(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return 0;
|
||||
return (int)(extract_byte(msg->value, 1) & extract_bits(5));
|
||||
}
|
||||
|
||||
bool error_present(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return false;
|
||||
return extract_byte(msg->value, 0) == BOOT_ERROR_PRESENT_MARKER;
|
||||
}
|
||||
|
||||
bool in_boot(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return false;
|
||||
return extract_byte(msg->value, 0) == BOOT_IN_BOOT_MARKER;
|
||||
}
|
||||
|
||||
int get_socket(OamBootMsg *msg, int version)
|
||||
{
|
||||
if (!msg)
|
||||
return 0;
|
||||
|
||||
if (version == 0)
|
||||
{
|
||||
return extract_byte(msg->value, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
return (int)((extract_byte(msg->value, 2) >> 4) & extract_bits(4));
|
||||
}
|
||||
}
|
||||
|
||||
int get_aid(OamBootMsg *msg, int version)
|
||||
{
|
||||
if (!msg)
|
||||
return 0;
|
||||
|
||||
if (version == 0)
|
||||
{
|
||||
return extract_byte(msg->value, 5);
|
||||
}
|
||||
else
|
||||
{
|
||||
return (int)(extract_byte(msg->value, 2) & extract_bits(4));
|
||||
}
|
||||
}
|
||||
|
||||
int decode_hbm_stack(uint8_t stack)
|
||||
{
|
||||
switch (stack)
|
||||
{
|
||||
case HBM_STACK_0:
|
||||
return 0;
|
||||
case HBM_STACK_1:
|
||||
return 1;
|
||||
default:
|
||||
return HBM_STACK_UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
JsonValue *create_failed_links_array(uint8_t byte_value, int max_links)
|
||||
{
|
||||
JsonValue *array = json_create_array();
|
||||
if (!array)
|
||||
return NULL;
|
||||
|
||||
for (int i = 0; i < max_links; i++)
|
||||
{
|
||||
if ((byte_value >> i) & 0x1)
|
||||
{
|
||||
JsonValue *link_num = json_create_number(i);
|
||||
if (link_num)
|
||||
{
|
||||
json_array_push(array, link_num);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
char *create_hex_string(uint64_t value, int width)
|
||||
{
|
||||
if (width < 0)
|
||||
return NULL;
|
||||
size_t buffer_size = (size_t)width + 3U; // "0x" + digits + null terminator
|
||||
char *hex_str = malloc(buffer_size);
|
||||
if (!hex_str)
|
||||
return NULL;
|
||||
|
||||
snprintf(hex_str, buffer_size, "0x%0*llX", width, (unsigned long long)value);
|
||||
return hex_str;
|
||||
}
|
||||
|
||||
// Version 0 decoder implementations
|
||||
JsonValue *decode_hbm_training_v0(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
uint8_t byte3 = extract_byte(msg->value, 3);
|
||||
uint8_t byte2 = extract_byte(msg->value, 2);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_HBM_TRAINING));
|
||||
json_object_set(result, "socket", json_create_number(get_socket(msg, version)));
|
||||
json_object_set(result, "aid", json_create_number(get_aid(msg, version)));
|
||||
json_object_set(result, "hbm_stack", json_create_number(decode_hbm_stack(byte3)));
|
||||
json_object_set(result, "hbm_channel", json_create_number(byte2));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
JsonValue *decode_fw_load_v0(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
uint8_t byte3 = extract_byte(msg->value, 3);
|
||||
uint8_t byte2 = extract_byte(msg->value, 2);
|
||||
uint16_t fw_id = (byte3 << 8) | byte2;
|
||||
|
||||
char *fw_id_str = create_hex_string(fw_id, 4);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_FW_LOAD));
|
||||
json_object_set(result, "socket", json_create_number(get_socket(msg, version)));
|
||||
json_object_set(result, "aid", json_create_number(get_aid(msg, version)));
|
||||
json_object_set(result, "fw_id", json_create_string(fw_id_str ? fw_id_str : "0x0000"));
|
||||
|
||||
free(fw_id_str);
|
||||
return result;
|
||||
}
|
||||
|
||||
JsonValue *decode_wafl_link_training_v0(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
uint8_t byte2 = extract_byte(msg->value, 2);
|
||||
JsonValue *failed_links = create_failed_links_array(byte2, 2);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_WAFL_LINK_TRAINING));
|
||||
json_object_set(result, "socket", json_create_number(get_socket(msg, version)));
|
||||
json_object_set(result, "aid", json_create_number(get_aid(msg, version)));
|
||||
json_object_set(result, "failed_links", failed_links ? failed_links : json_create_array());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
JsonValue *decode_xgmi_link_training_v0(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
uint8_t byte2 = extract_byte(msg->value, 2);
|
||||
JsonValue *failed_links = create_failed_links_array(byte2, 8);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_XGMI_LINK_TRAINING));
|
||||
json_object_set(result, "socket", json_create_number(get_socket(msg, version)));
|
||||
json_object_set(result, "aid", json_create_number(get_aid(msg, version)));
|
||||
json_object_set(result, "failed_links", failed_links ? failed_links : json_create_array());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
JsonValue *decode_usr_cp_link_training_v0(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
uint8_t byte2 = extract_byte(msg->value, 2);
|
||||
JsonValue *failed_links = create_failed_links_array(byte2, 2);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_USR_CP_LINK_TRAINING));
|
||||
json_object_set(result, "socket", json_create_number(get_socket(msg, version)));
|
||||
json_object_set(result, "aid", json_create_number(get_aid(msg, version)));
|
||||
json_object_set(result, "failed_links", failed_links ? failed_links : json_create_array());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
JsonValue *decode_usr_dp_link_training_v0(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
uint8_t byte2 = extract_byte(msg->value, 2);
|
||||
JsonValue *failed_links = create_failed_links_array(byte2, 4);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_USR_DP_LINK_TRAINING));
|
||||
json_object_set(result, "socket", json_create_number(get_socket(msg, version)));
|
||||
json_object_set(result, "aid", json_create_number(get_aid(msg, version)));
|
||||
json_object_set(result, "failed_links", failed_links ? failed_links : json_create_array());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
JsonValue *decode_hbm_mem_test_v0(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
uint8_t byte3 = extract_byte(msg->value, 3);
|
||||
uint8_t byte2 = extract_byte(msg->value, 2);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_HBM_MEMORY_TEST));
|
||||
json_object_set(result, "socket", json_create_number(get_socket(msg, version)));
|
||||
json_object_set(result, "aid", json_create_number(get_aid(msg, version)));
|
||||
json_object_set(result, "hbm_stack", json_create_number(decode_hbm_stack(byte3)));
|
||||
json_object_set(result, "hbm_channel", json_create_number(byte2));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
JsonValue *decode_hbm_bist_test_v0(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
uint8_t byte3 = extract_byte(msg->value, 3);
|
||||
uint8_t byte2 = extract_byte(msg->value, 2);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_HBM_BIST_TEST));
|
||||
json_object_set(result, "socket", json_create_number(get_socket(msg, version)));
|
||||
json_object_set(result, "aid", json_create_number(get_aid(msg, version)));
|
||||
json_object_set(result, "hbm_stack", json_create_number(decode_hbm_stack(byte3)));
|
||||
json_object_set(result, "hbm_channel", json_create_number(byte2));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
JsonValue *decode_boot_controller_generic_v0(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_BOOT_CONTROLLER_GENERIC));
|
||||
json_object_set(result, "socket", json_create_number(get_socket(msg, version)));
|
||||
json_object_set(result, "aid", json_create_number(get_aid(msg, version)));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Version 1 decoder implementations
|
||||
JsonValue *decode_hbm_training_v1(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
uint8_t byte5 = extract_byte(msg->value, 5);
|
||||
uint8_t byte4 = extract_byte(msg->value, 4);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_HBM_TRAINING));
|
||||
json_object_set(result, "socket", json_create_number(get_socket(msg, version)));
|
||||
json_object_set(result, "aid", json_create_number(get_aid(msg, version)));
|
||||
json_object_set(result, "hbm_stack", json_create_number(decode_hbm_stack(byte5)));
|
||||
json_object_set(result, "hbm_channel", json_create_number(byte4));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
JsonValue *decode_fw_load_v1(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
uint8_t byte5 = extract_byte(msg->value, 5);
|
||||
uint8_t byte4 = extract_byte(msg->value, 4);
|
||||
uint16_t fw_id = (byte5 << 8) | byte4;
|
||||
|
||||
char *fw_id_str = create_hex_string(fw_id, 4);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_FW_LOAD));
|
||||
json_object_set(result, "socket", json_create_number(get_socket(msg, version)));
|
||||
json_object_set(result, "aid", json_create_number(get_aid(msg, version)));
|
||||
json_object_set(result, "fw_id", json_create_string(fw_id_str ? fw_id_str : "0x0000"));
|
||||
|
||||
free(fw_id_str);
|
||||
return result;
|
||||
}
|
||||
|
||||
JsonValue *decode_wafl_link_training_v1(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
uint8_t byte4 = extract_byte(msg->value, 4);
|
||||
JsonValue *failed_links = create_failed_links_array(byte4, 2);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_WAFL_LINK_TRAINING));
|
||||
json_object_set(result, "socket", json_create_number(get_socket(msg, version)));
|
||||
json_object_set(result, "aid", json_create_number(get_aid(msg, version)));
|
||||
json_object_set(result, "failed_links", failed_links ? failed_links : json_create_array());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
JsonValue *decode_xgmi_link_training_v1(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
uint8_t byte4 = extract_byte(msg->value, 4);
|
||||
JsonValue *failed_links = create_failed_links_array(byte4, 8);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_XGMI_LINK_TRAINING));
|
||||
json_object_set(result, "socket", json_create_number(get_socket(msg, version)));
|
||||
json_object_set(result, "aid", json_create_number(get_aid(msg, version)));
|
||||
json_object_set(result, "failed_links", failed_links ? failed_links : json_create_array());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
JsonValue *decode_usr_cp_link_training_v1(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
uint8_t byte4 = extract_byte(msg->value, 4);
|
||||
JsonValue *failed_links = create_failed_links_array(byte4, 2);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_USR_CP_LINK_TRAINING));
|
||||
json_object_set(result, "socket", json_create_number(get_socket(msg, version)));
|
||||
json_object_set(result, "aid", json_create_number(get_aid(msg, version)));
|
||||
json_object_set(result, "failed_links", failed_links ? failed_links : json_create_array());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
JsonValue *decode_usr_dp_link_training_v1(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
uint8_t byte4 = extract_byte(msg->value, 4);
|
||||
JsonValue *failed_links = create_failed_links_array(byte4, 4);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_USR_DP_LINK_TRAINING));
|
||||
json_object_set(result, "socket", json_create_number(get_socket(msg, version)));
|
||||
json_object_set(result, "aid", json_create_number(get_aid(msg, version)));
|
||||
json_object_set(result, "failed_links", failed_links ? failed_links : json_create_array());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
JsonValue *decode_hbm_mem_test_v1(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
uint8_t byte5 = extract_byte(msg->value, 5);
|
||||
uint8_t byte4 = extract_byte(msg->value, 4);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_HBM_MEMORY_TEST));
|
||||
json_object_set(result, "socket", json_create_number(get_socket(msg, version)));
|
||||
json_object_set(result, "aid", json_create_number(get_aid(msg, version)));
|
||||
json_object_set(result, "hbm_stack", json_create_number(decode_hbm_stack(byte5)));
|
||||
json_object_set(result, "hbm_channel", json_create_number(byte4));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
JsonValue *decode_hbm_bist_test_v1(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
uint8_t byte5 = extract_byte(msg->value, 5);
|
||||
uint8_t byte4 = extract_byte(msg->value, 4);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_HBM_BIST_TEST));
|
||||
json_object_set(result, "socket", json_create_number(get_socket(msg, version)));
|
||||
json_object_set(result, "aid", json_create_number(get_aid(msg, version)));
|
||||
json_object_set(result, "hbm_stack", json_create_number(decode_hbm_stack(byte5)));
|
||||
json_object_set(result, "hbm_channel", json_create_number(byte4));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
JsonValue *decode_boot_controller_generic_v1(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
uint8_t byte4 = extract_byte(msg->value, 4);
|
||||
uint8_t byte0 = extract_byte(msg->value, 0);
|
||||
uint8_t byte5 = extract_byte(msg->value, 5);
|
||||
uint8_t byte6 = extract_byte(msg->value, 6);
|
||||
uint8_t byte7 = extract_byte(msg->value, 7);
|
||||
|
||||
char *boot_step_str = create_hex_string(byte4, 2);
|
||||
uint32_t boot_status = (byte7 << 24) | (byte6 << 16) | (byte5 << 8) | byte0;
|
||||
char *boot_status_str = create_hex_string(boot_status, 8);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_BOOT_CONTROLLER_GENERIC));
|
||||
json_object_set(result, "socket", json_create_number(get_socket(msg, version)));
|
||||
json_object_set(result, "aid", json_create_number(get_aid(msg, version)));
|
||||
json_object_set(result, "last_successful_boot_step_number",
|
||||
json_create_string(boot_step_str ? boot_step_str : "0x00"));
|
||||
json_object_set(result, "fw_boot_status",
|
||||
json_create_string(boot_status_str ? boot_status_str : "0x00000000"));
|
||||
|
||||
free(boot_step_str);
|
||||
free(boot_status_str);
|
||||
return result;
|
||||
}
|
||||
|
||||
JsonValue *decode_data_abort_v1(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
uint8_t byte3 = extract_byte(msg->value, 3);
|
||||
uint8_t byte4 = extract_byte(msg->value, 4);
|
||||
uint8_t byte5 = extract_byte(msg->value, 5);
|
||||
uint8_t byte6 = extract_byte(msg->value, 6);
|
||||
uint8_t byte7 = extract_byte(msg->value, 7);
|
||||
|
||||
char *boot_step_str = create_hex_string(byte3, 2);
|
||||
uint32_t exception_addr = (byte7 << 24) | (byte6 << 16) | (byte5 << 8) | byte4;
|
||||
char *exception_addr_str = create_hex_string(exception_addr, 8);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_BOOT_CONTROLLER_DATA_ABORT));
|
||||
json_object_set(result, "socket", json_create_number(get_socket(msg, version)));
|
||||
json_object_set(result, "aid", json_create_number(get_aid(msg, version)));
|
||||
json_object_set(result, "last_successful_boot_step_number",
|
||||
json_create_string(boot_step_str ? boot_step_str : "0x00"));
|
||||
json_object_set(result, "exception_address",
|
||||
json_create_string(exception_addr_str ? exception_addr_str : "0x00000000"));
|
||||
|
||||
free(boot_step_str);
|
||||
free(exception_addr_str);
|
||||
return result;
|
||||
}
|
||||
|
||||
JsonValue *decode_boot_success_v1(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
uint8_t byte4 = extract_byte(msg->value, 4);
|
||||
uint8_t byte0 = extract_byte(msg->value, 0);
|
||||
uint8_t byte5 = extract_byte(msg->value, 5);
|
||||
uint8_t byte6 = extract_byte(msg->value, 6);
|
||||
uint8_t byte7 = extract_byte(msg->value, 7);
|
||||
|
||||
char *boot_step_str = create_hex_string(byte4, 2);
|
||||
uint32_t boot_status = (byte7 << 24) | (byte6 << 16) | (byte5 << 8) | byte0;
|
||||
char *boot_status_str = create_hex_string(boot_status, 8);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_BOOT_SUCCESS));
|
||||
json_object_set(result, "last_successful_boot_step_number",
|
||||
json_create_string(boot_step_str ? boot_step_str : "0x00"));
|
||||
json_object_set(result, "fw_boot_status",
|
||||
json_create_string(boot_status_str ? boot_status_str : "0x00000000"));
|
||||
|
||||
free(boot_step_str);
|
||||
free(boot_status_str);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Unhandled error decoders
|
||||
JsonValue *decode_unhandled_error_v0(OamBootMsg *msg)
|
||||
{
|
||||
(void)msg; // Suppress unused parameter warning
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_UNHANDLED));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
JsonValue *decode_unhandled_error_v1(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
JsonValue *result = json_create_object();
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
uint8_t byte4 = extract_byte(msg->value, 4);
|
||||
uint8_t byte0 = extract_byte(msg->value, 0);
|
||||
uint8_t byte5 = extract_byte(msg->value, 5);
|
||||
uint8_t byte6 = extract_byte(msg->value, 6);
|
||||
uint8_t byte7 = extract_byte(msg->value, 7);
|
||||
|
||||
char *boot_step_str = create_hex_string(byte4, 2);
|
||||
uint32_t boot_status = (byte7 << 24) | (byte6 << 16) | (byte5 << 8) | byte0;
|
||||
char *boot_status_str = create_hex_string(boot_status, 8);
|
||||
|
||||
json_object_set(result, "error_type", json_create_string(RAS_DECODE_ERROR_TYPE_UNHANDLED));
|
||||
json_object_set(result, "last_successful_boot_step_number",
|
||||
json_create_string(boot_step_str ? boot_step_str : "0x00"));
|
||||
json_object_set(result, "fw_boot_status",
|
||||
json_create_string(boot_status_str ? boot_status_str : "0x00000000"));
|
||||
|
||||
free(boot_step_str);
|
||||
free(boot_status_str);
|
||||
return result;
|
||||
}
|
||||
|
||||
boot_decoder_func_t get_decoder_function(OamBootMsg *msg)
|
||||
{
|
||||
if (!msg)
|
||||
return NULL;
|
||||
|
||||
uint8_t byte0 = extract_byte(msg->value, 0);
|
||||
if (byte0 == BOOT_IN_BOOT_MARKER)
|
||||
{
|
||||
int version = get_boot_version(msg);
|
||||
if (version == 1)
|
||||
{
|
||||
return decode_boot_success_v1;
|
||||
}
|
||||
}
|
||||
|
||||
int version = get_boot_version(msg);
|
||||
int encoding = get_error_encoding(msg);
|
||||
|
||||
const boot_decoder_entry_t *decoder_map = (version == 0) ? boot_decoder_map_v0 : boot_decoder_map_v1;
|
||||
|
||||
for (int i = 0; decoder_map[i].decoder != NULL; i++)
|
||||
{
|
||||
if (decoder_map[i].encoding == encoding)
|
||||
{
|
||||
return decoder_map[i].decoder;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL; // No decoder found
|
||||
}
|
||||
|
||||
JsonValue *boot_decode_orchestrator(uint64_t *oam_boot_msgs, size_t count)
|
||||
{
|
||||
if (!oam_boot_msgs || count == 0)
|
||||
return NULL;
|
||||
|
||||
JsonValue *results = json_create_object();
|
||||
if (!results)
|
||||
return NULL;
|
||||
|
||||
// Convert to OamBootMsg structures
|
||||
OamBootMsg *msgs = malloc(count * sizeof(OamBootMsg));
|
||||
if (!msgs)
|
||||
{
|
||||
json_free(results);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
msgs[i].value = oam_boot_msgs[i];
|
||||
}
|
||||
|
||||
// Check error markers across all messages
|
||||
size_t messages_with_markers = 0;
|
||||
bool *has_marker = malloc(count * sizeof(bool));
|
||||
if (!has_marker) {
|
||||
free(msgs);
|
||||
json_free(results);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Count messages with error markers (0xA4) or boot markers (0xBA)
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
has_marker[i] = error_present(&msgs[i]) || in_boot(&msgs[i]);
|
||||
if (has_marker[i]) {
|
||||
messages_with_markers++;
|
||||
}
|
||||
}
|
||||
|
||||
// Determine decoding strategy based on the presence of error markers
|
||||
bool decode_all_as_unhandled = (messages_with_markers == 0);
|
||||
bool decode_only_marked = (messages_with_markers > 0 && messages_with_markers < count);
|
||||
bool decode_all_normally = (messages_with_markers == count);
|
||||
|
||||
// Check if all decoders are NULL (for unhandled error handling)
|
||||
bool all_decoders_none = true;
|
||||
if (!decode_all_as_unhandled) {
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
if (has_marker[i] && get_decoder_function(&msgs[i]) != NULL)
|
||||
{
|
||||
all_decoders_none = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process each message
|
||||
for (size_t i = 0; i < count; i++)
|
||||
{
|
||||
char msg_key[32];
|
||||
snprintf(msg_key, sizeof(msg_key), "msg%zu", i);
|
||||
|
||||
// Skip messages without markers if we're in selective decode mode
|
||||
if (decode_only_marked && !has_marker[i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
JsonValue *msg_result = json_create_object();
|
||||
if (!msg_result)
|
||||
continue;
|
||||
|
||||
boot_decoder_func_t decoder_func = NULL;
|
||||
|
||||
if (decode_all_as_unhandled)
|
||||
{
|
||||
// Rule 3: No messages have markers, decode all as UNHANDLED
|
||||
decoder_func = decode_unhandled_error_v1;
|
||||
}
|
||||
else if (has_marker[i] || decode_all_normally)
|
||||
{
|
||||
// Rule 1 & 2: Decode messages with markers (or all if all have markers)
|
||||
if (all_decoders_none)
|
||||
{
|
||||
// Use unhandled error decoders
|
||||
int encoding = get_error_encoding(&msgs[i]);
|
||||
decoder_func = (encoding == 0) ? decode_unhandled_error_v0 : decode_unhandled_error_v1;
|
||||
}
|
||||
else
|
||||
{
|
||||
decoder_func = get_decoder_function(&msgs[i]);
|
||||
}
|
||||
}
|
||||
// If no decoder function is found, skip this message
|
||||
|
||||
if (decoder_func)
|
||||
{
|
||||
JsonValue *decoded = decoder_func(&msgs[i]);
|
||||
if (decoded)
|
||||
{
|
||||
// Copy all fields from decoded result to msg_result
|
||||
for (JsonPair *pair = decoded->data.object; pair != NULL; pair = pair->next)
|
||||
{
|
||||
// Create a copy of the value for the new object
|
||||
JsonValue *value_copy = NULL;
|
||||
switch (pair->value->type)
|
||||
{
|
||||
case JSON_STRING:
|
||||
value_copy = json_create_string(pair->value->data.string);
|
||||
break;
|
||||
case JSON_NUMBER:
|
||||
value_copy = json_create_number(pair->value->data.number);
|
||||
break;
|
||||
case JSON_BOOL:
|
||||
value_copy = json_create_bool(pair->value->data.boolean);
|
||||
break;
|
||||
case JSON_NULL:
|
||||
value_copy = json_create_null();
|
||||
break;
|
||||
case JSON_ARRAY:
|
||||
// For arrays, we need to copy each element
|
||||
value_copy = json_create_array();
|
||||
if (value_copy)
|
||||
{
|
||||
for (size_t j = 0; j < pair->value->data.array.count; j++)
|
||||
{
|
||||
JsonValue *elem = pair->value->data.array.items[j];
|
||||
JsonValue *elem_copy = NULL;
|
||||
if (elem->type == JSON_NUMBER)
|
||||
{
|
||||
elem_copy = json_create_number(elem->data.number);
|
||||
}
|
||||
if (elem_copy)
|
||||
{
|
||||
json_array_push(value_copy, elem_copy);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (value_copy)
|
||||
{
|
||||
json_object_set(msg_result, pair->key, value_copy);
|
||||
}
|
||||
}
|
||||
json_free(decoded);
|
||||
}
|
||||
}
|
||||
|
||||
json_object_set(results, msg_key, msg_result);
|
||||
}
|
||||
|
||||
free(msgs);
|
||||
free(has_marker);
|
||||
return results;
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "error_map.h"
|
||||
#include "ras_decode_constants.h"
|
||||
#include <string.h>
|
||||
|
||||
#define AFID_VERSION "0.7"
|
||||
|
||||
static const error_map_entry_t error_map[] = {
|
||||
{1, RAS_DECODE_CATEGORY_BOOT_TIME_ERRORS, RAS_DECODE_ERROR_TYPE_FW_LOAD, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FAIL_TO_INIT},
|
||||
{2, RAS_DECODE_CATEGORY_BOOT_TIME_ERRORS, RAS_DECODE_ERROR_TYPE_HBM_BIST_TEST, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FAIL_TO_INIT},
|
||||
{3, RAS_DECODE_CATEGORY_BOOT_TIME_ERRORS, RAS_DECODE_ERROR_TYPE_HBM_MEMORY_TEST, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FAIL_TO_INIT},
|
||||
{4, RAS_DECODE_CATEGORY_BOOT_TIME_ERRORS, RAS_DECODE_ERROR_TYPE_HBM_TRAINING, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FAIL_TO_INIT},
|
||||
{5, RAS_DECODE_CATEGORY_BOOT_TIME_ERRORS, RAS_DECODE_ERROR_TYPE_UNHANDLED, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FAIL_TO_INIT},
|
||||
{6, RAS_DECODE_CATEGORY_BOOT_TIME_ERRORS, RAS_DECODE_ERROR_TYPE_UNKNOWN_ERROR, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FAIL_TO_INIT},
|
||||
{7, RAS_DECODE_CATEGORY_BOOT_TIME_ERRORS, RAS_DECODE_ERROR_TYPE_USR_CP_LINK_TRAINING, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FAIL_TO_INIT},
|
||||
{8, RAS_DECODE_CATEGORY_BOOT_TIME_ERRORS, RAS_DECODE_ERROR_TYPE_USR_DP_LINK_TRAINING, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FAIL_TO_INIT},
|
||||
{9, RAS_DECODE_CATEGORY_BOOT_TIME_ERRORS, RAS_DECODE_ERROR_TYPE_WAFL_LINK_TRAINING, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FAIL_TO_INIT},
|
||||
{10, RAS_DECODE_CATEGORY_BOOT_TIME_ERRORS, RAS_DECODE_ERROR_TYPE_XGMI_LINK_TRAINING, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FAIL_TO_INIT},
|
||||
{11, RAS_DECODE_CATEGORY_BOOT_TIME_ERRORS, RAS_DECODE_ERROR_TYPE_BOOT_CONTROLLER_DATA_ABORT, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FAIL_TO_INIT},
|
||||
{12, RAS_DECODE_CATEGORY_BOOT_TIME_ERRORS, RAS_DECODE_ERROR_TYPE_BOOT_CONTROLLER_GENERIC, RAS_DECODE_PROTOCOL_CPER_WITH_SPACE, RAS_DECODE_SEVERITY_FAIL_TO_INIT},
|
||||
{13, RAS_DECODE_CATEGORY_OFF_PACKAGE_LINK_ERRORS, RAS_DECODE_ERROR_TYPE_PCIE_AER, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_CORRECTED},
|
||||
{14, RAS_DECODE_CATEGORY_OFF_PACKAGE_LINK_ERRORS, RAS_DECODE_ERROR_TYPE_PCIE_AER, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FATAL},
|
||||
{15, RAS_DECODE_CATEGORY_OFF_PACKAGE_LINK_ERRORS, RAS_DECODE_ERROR_TYPE_WAFL, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_CORRECTED},
|
||||
{16, RAS_DECODE_CATEGORY_OFF_PACKAGE_LINK_ERRORS, RAS_DECODE_ERROR_TYPE_WAFL, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FATAL},
|
||||
{17, RAS_DECODE_CATEGORY_OFF_PACKAGE_LINK_ERRORS, RAS_DECODE_ERROR_TYPE_XGMI, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_CORRECTED},
|
||||
{18, RAS_DECODE_CATEGORY_OFF_PACKAGE_LINK_ERRORS, RAS_DECODE_ERROR_TYPE_XGMI, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FATAL},
|
||||
{19, RAS_DECODE_CATEGORY_HBM_ERRORS, RAS_DECODE_ERROR_TYPE_BAD_PAGE_RETIREMENT_THRESHOLD, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FATAL},
|
||||
{20, RAS_DECODE_CATEGORY_HBM_ERRORS, RAS_DECODE_ERROR_TYPE_ON_DIE_ECC, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FATAL},
|
||||
{21, RAS_DECODE_CATEGORY_HBM_ERRORS, RAS_DECODE_ERROR_TYPE_END_TO_END_CRC, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FATAL},
|
||||
{22, RAS_DECODE_CATEGORY_HBM_ERRORS, RAS_DECODE_ERROR_TYPE_ON_DIE_ECC, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_UNCORRECTED_NON_FATAL},
|
||||
{23, RAS_DECODE_CATEGORY_HBM_ERRORS, RAS_DECODE_ERROR_TYPE_END_TO_END_CRC, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_UNCORRECTED_NON_FATAL},
|
||||
{24, RAS_DECODE_CATEGORY_HBM_ERRORS, RAS_DECODE_ERROR_TYPE_ALL, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_CORRECTED},
|
||||
{25, RAS_DECODE_CATEGORY_HBM_ERRORS, RAS_DECODE_ERROR_TYPE_ALL_OTHERS, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FATAL},
|
||||
{26, RAS_DECODE_CATEGORY_DEVICE_INTERNAL_ERRORS, RAS_DECODE_ERROR_TYPE_HARDWARE_ASSERTION, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FATAL},
|
||||
{27, RAS_DECODE_CATEGORY_DEVICE_INTERNAL_ERRORS, RAS_DECODE_ERROR_TYPE_WATCHDOG_TIMEOUT, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FATAL},
|
||||
{28, RAS_DECODE_CATEGORY_DEVICE_INTERNAL_ERRORS, RAS_DECODE_ERROR_TYPE_ALL_OTHERS, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_UNCORRECTED_NON_FATAL},
|
||||
{29, RAS_DECODE_CATEGORY_DEVICE_INTERNAL_ERRORS, RAS_DECODE_ERROR_TYPE_ALL_OTHERS, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_CORRECTED},
|
||||
{30, RAS_DECODE_CATEGORY_DEVICE_INTERNAL_ERRORS, RAS_DECODE_ERROR_TYPE_ALL_OTHERS, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_FATAL},
|
||||
{31, RAS_DECODE_CATEGORY_CPER_FORMAT, RAS_DECODE_ERROR_TYPE_MALFORMED_CPER, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_ALL_CAPS},
|
||||
{32, RAS_DECODE_CATEGORY_CPER_FORMAT, RAS_DECODE_ERROR_TYPE_INCOMPLETE_RAS_DECODE_DATA, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_ALL_CAPS},
|
||||
{33, RAS_DECODE_CATEGORY_CPER_FORMAT, RAS_DECODE_ERROR_TYPE_INVALID_RAS_DECODE_DATA, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_ALL_CAPS},
|
||||
{34, RAS_DECODE_CATEGORY_UNIDENTIFIED_ERRORS, RAS_DECODE_ERROR_TYPE_UNIDENTIFIED_ERROR, RAS_DECODE_PROTOCOL_CPER, RAS_DECODE_SEVERITY_ALL_CAPS}};
|
||||
|
||||
static const size_t NUM_ERROR_ENTRIES = sizeof(error_map) / sizeof(error_map[0]);
|
||||
|
||||
int get_error_id(const char *error_category, const char *error_type, const char *error_severity)
|
||||
{
|
||||
if (!error_category || !error_type || !error_severity ||
|
||||
strcmp(error_category, RAS_DECODE_SEVERITY_UNKNOWN) == 0 ||
|
||||
strcmp(error_type, RAS_DECODE_SEVERITY_UNKNOWN) == 0 ||
|
||||
strcmp(error_severity, RAS_DECODE_SEVERITY_UNKNOWN) == 0)
|
||||
{
|
||||
return RAS_DECODE_ERROR_INVALID_RAS_DECODE_DATA_ID; // Return ID for "Invalid Error" if any input is "UNKNOWN" or NULL
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < NUM_ERROR_ENTRIES; i++)
|
||||
{
|
||||
if (strcmp(error_map[i].error_category, error_category) == 0 &&
|
||||
strcmp(error_map[i].error_type, error_type) == 0 &&
|
||||
strcmp(error_map[i].error_severity, error_severity) == 0)
|
||||
{
|
||||
return (int)error_map[i].id;
|
||||
}
|
||||
}
|
||||
|
||||
return RAS_DECODE_ERROR_UNIDENTIFIED_ERROR_ID; // Return ID for "Unidentified Errors" if no match found
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "json_printer.h"
|
||||
#include <stdio.h>
|
||||
|
||||
static void print_json_value_internal(JsonValue *value, int indent) {
|
||||
if (!value) return;
|
||||
|
||||
switch (value->type) {
|
||||
case JSON_NULL:
|
||||
printf("null");
|
||||
break;
|
||||
case JSON_BOOL:
|
||||
printf("%s", value->data.boolean ? "true" : "false");
|
||||
break;
|
||||
case JSON_NUMBER:
|
||||
printf("%.0f", value->data.number);
|
||||
break;
|
||||
case JSON_STRING:
|
||||
printf("\"%s\"", value->data.string ? value->data.string : "");
|
||||
break;
|
||||
case JSON_OBJECT: {
|
||||
printf("{\n");
|
||||
JsonPair *pair = value->data.object;
|
||||
bool first = true;
|
||||
while (pair) {
|
||||
if (!first) printf(",\n");
|
||||
for (int i = 0; i < indent + 3; i++) printf(" ");
|
||||
printf("\"%s\": ", pair->key);
|
||||
print_json_value_internal(pair->value, indent + 3);
|
||||
pair = pair->next;
|
||||
first = false;
|
||||
}
|
||||
printf("\n");
|
||||
for (int i = 0; i < indent; i++) printf(" ");
|
||||
printf("}");
|
||||
break;
|
||||
}
|
||||
case JSON_ARRAY: {
|
||||
printf("[");
|
||||
for (size_t i = 0; i < value->data.array.count; i++) {
|
||||
if (i > 0) printf(", ");
|
||||
print_json_value_internal(value->data.array.items[i], indent);
|
||||
}
|
||||
printf("]");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void print_json_value(JsonValue *value) {
|
||||
print_json_value_internal(value, 0);
|
||||
printf("\n");
|
||||
}
|
||||
@@ -0,0 +1,205 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "json_util.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define JSON_ARRAY_INITIAL_CAPACITY 16
|
||||
|
||||
JsonValue* json_create_null(void) {
|
||||
JsonValue *val = calloc(1, sizeof(JsonValue));
|
||||
if (!val) return NULL;
|
||||
val->type = JSON_NULL;
|
||||
return val;
|
||||
}
|
||||
|
||||
JsonValue* json_create_bool(bool b) {
|
||||
JsonValue *val = calloc(1, sizeof(JsonValue));
|
||||
if (!val) return NULL;
|
||||
val->type = JSON_BOOL;
|
||||
val->data.boolean = b;
|
||||
return val;
|
||||
}
|
||||
|
||||
JsonValue* json_create_number(double num) {
|
||||
JsonValue *val = calloc(1, sizeof(JsonValue));
|
||||
if (!val) return NULL;
|
||||
val->type = JSON_NUMBER;
|
||||
val->data.number = num;
|
||||
return val;
|
||||
}
|
||||
|
||||
JsonValue* json_create_string(const char *str) {
|
||||
if (!str) return NULL;
|
||||
|
||||
JsonValue *val = calloc(1, sizeof(JsonValue));
|
||||
if (!val) return NULL;
|
||||
|
||||
val->type = JSON_STRING;
|
||||
val->data.string = strdup(str);
|
||||
if (!val->data.string) {
|
||||
free(val);
|
||||
return NULL;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
JsonValue* json_create_object(void) {
|
||||
JsonValue *val = calloc(1, sizeof(JsonValue));
|
||||
if (!val) return NULL;
|
||||
val->type = JSON_OBJECT;
|
||||
val->data.object = NULL;
|
||||
return val;
|
||||
}
|
||||
|
||||
JsonValue* json_create_array(void) {
|
||||
JsonValue *val = calloc(1, sizeof(JsonValue));
|
||||
if (!val) return NULL;
|
||||
|
||||
val->type = JSON_ARRAY;
|
||||
val->data.array.items = malloc(sizeof(JsonValue*) * JSON_ARRAY_INITIAL_CAPACITY);
|
||||
if (!val->data.array.items) {
|
||||
free(val);
|
||||
return NULL;
|
||||
}
|
||||
val->data.array.count = 0;
|
||||
val->data.array.capacity = JSON_ARRAY_INITIAL_CAPACITY;
|
||||
return val;
|
||||
}
|
||||
|
||||
void json_object_set(JsonValue *obj, const char *key, JsonValue *value) {
|
||||
if (!obj || obj->type != JSON_OBJECT || !key || !value) return;
|
||||
|
||||
// Check if key already exists and update it
|
||||
JsonPair *current = obj->data.object;
|
||||
while (current) {
|
||||
if (strcmp(current->key, key) == 0) {
|
||||
json_free(current->value);
|
||||
current->value = value;
|
||||
return;
|
||||
}
|
||||
current = current->next;
|
||||
}
|
||||
|
||||
// Key doesn't exist, create new pair
|
||||
JsonPair *pair = malloc(sizeof(JsonPair));
|
||||
if (!pair) return;
|
||||
|
||||
pair->key = strdup(key);
|
||||
if (!pair->key) {
|
||||
free(pair);
|
||||
return;
|
||||
}
|
||||
|
||||
pair->value = value;
|
||||
pair->next = NULL;
|
||||
|
||||
if (!obj->data.object) {
|
||||
obj->data.object = pair;
|
||||
} else {
|
||||
JsonPair *last = obj->data.object;
|
||||
while (last->next) {
|
||||
last = last->next;
|
||||
}
|
||||
last->next = pair;
|
||||
}
|
||||
}
|
||||
|
||||
JsonValue* json_object_get(JsonValue *obj, const char *key) {
|
||||
if (!obj || obj->type != JSON_OBJECT || !key) return NULL;
|
||||
|
||||
JsonPair *current = obj->data.object;
|
||||
while (current) {
|
||||
if (strcmp(current->key, key) == 0) {
|
||||
return current->value;
|
||||
}
|
||||
current = current->next;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool json_object_has_key(JsonValue *obj, const char *key) {
|
||||
return json_object_get(obj, key) != NULL;
|
||||
}
|
||||
|
||||
bool json_array_push(JsonValue *arr, JsonValue *value) {
|
||||
if (!arr || arr->type != JSON_ARRAY || !value) return false;
|
||||
|
||||
// Resize array if needed
|
||||
if (arr->data.array.count >= arr->data.array.capacity) {
|
||||
size_t new_capacity = arr->data.array.capacity * 2;
|
||||
JsonValue **new_items = realloc(arr->data.array.items,
|
||||
sizeof(JsonValue*) * new_capacity);
|
||||
if (!new_items) return false;
|
||||
|
||||
arr->data.array.items = new_items;
|
||||
arr->data.array.capacity = new_capacity;
|
||||
}
|
||||
|
||||
arr->data.array.items[arr->data.array.count] = value;
|
||||
arr->data.array.count++;
|
||||
return true;
|
||||
}
|
||||
|
||||
JsonValue* json_array_get(JsonValue *arr, size_t index) {
|
||||
if (!arr || arr->type != JSON_ARRAY || index >= arr->data.array.count) {
|
||||
return NULL;
|
||||
}
|
||||
return arr->data.array.items[index];
|
||||
}
|
||||
|
||||
size_t json_array_size(JsonValue *arr) {
|
||||
if (!arr || arr->type != JSON_ARRAY) return 0;
|
||||
return arr->data.array.count;
|
||||
}
|
||||
|
||||
void json_free(JsonValue *val) {
|
||||
if (!val) return;
|
||||
|
||||
switch (val->type) {
|
||||
case JSON_STRING:
|
||||
free(val->data.string);
|
||||
break;
|
||||
case JSON_OBJECT: {
|
||||
JsonPair *current = val->data.object;
|
||||
while (current) {
|
||||
JsonPair *next = current->next;
|
||||
free(current->key);
|
||||
json_free(current->value);
|
||||
free(current);
|
||||
current = next;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case JSON_ARRAY:
|
||||
for (size_t i = 0; i < val->data.array.count; i++) {
|
||||
json_free(val->data.array.items[i]);
|
||||
}
|
||||
free(val->data.array.items);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
free(val);
|
||||
}
|
||||
@@ -0,0 +1,207 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file main.c
|
||||
* @brief Demo program showing how to use the ACA decoder
|
||||
*
|
||||
* This is a demonstration program that shows how to use the ACA decoder
|
||||
* with sample raw data to decode ACA error information.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <ras_decode_api.h>
|
||||
#include <aca_version.h>
|
||||
#include <ras_decode_constants.h>
|
||||
#include <aca_decode.h>
|
||||
#include <json_printer.h>
|
||||
#include <stdint.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
// Function prototype
|
||||
void print_version_info(void);
|
||||
void demonstrate_json_decoding(void);
|
||||
|
||||
void print_version_info(void)
|
||||
{
|
||||
printf("=== ACA Decoder Library Version Information ===\n");
|
||||
printf("Version: %s\n", aca_get_version_string());
|
||||
printf("Major: %d\n", aca_get_version_major());
|
||||
printf("Minor: %d\n", aca_get_version_minor());
|
||||
printf("Patch: %d\n", aca_get_version_patch());
|
||||
|
||||
aca_version_info_t version_info = aca_get_version_info();
|
||||
printf("Complete version info:\n");
|
||||
printf(" Major: %d\n", version_info.major);
|
||||
printf(" Minor: %d\n", version_info.minor);
|
||||
printf(" Patch: %d\n", version_info.patch);
|
||||
printf(" String: %s\n", version_info.string);
|
||||
printf("===============================================\n\n");
|
||||
}
|
||||
|
||||
// Function to demonstrate JSON decoding functionality
|
||||
void demonstrate_json_decoding(void)
|
||||
{
|
||||
printf("=== ACA Decoder - JSON Output Examples ===\n");
|
||||
|
||||
// Example 1: HBM FATAL ERROR (32-byte array)
|
||||
uint64_t register_array_32[RAS_DECODE_REGISTER_ARRAY_SIZE_32_BYTES] = {0xbaa000000004081b, 0x0, 0x209600090f00, 0x5d000000};
|
||||
printf("\n--- HBM FATAL ERROR (32-byte array) ---\n");
|
||||
printf("Decoded AFID: %d\n", decode_afid(register_array_32, RAS_DECODE_REGISTER_ARRAY_SIZE_32_BYTES, 0, 1, 1));
|
||||
|
||||
JsonValue *json_result_32 = decode_error_info(register_array_32, RAS_DECODE_REGISTER_ARRAY_SIZE_32_BYTES, 0, 1, 1);
|
||||
if (json_result_32) {
|
||||
print_json_value(json_result_32);
|
||||
json_free(json_result_32);
|
||||
}
|
||||
|
||||
// Example 2: GC FATAL ERROR
|
||||
uint64_t register_array_test[RAS_DECODE_REGISTER_ARRAY_SIZE_32_BYTES] = {0xbea00000003b0000, 0x100000029, 0x1200136430400, 0x20b};
|
||||
printf("\n--- GC FATAL ERROR ---\n");
|
||||
printf("Decoded AFID: %d\n", decode_afid(register_array_test, RAS_DECODE_REGISTER_ARRAY_SIZE_32_BYTES, 0, 1, 1));
|
||||
|
||||
JsonValue *json_result_test = decode_error_info(register_array_test, RAS_DECODE_REGISTER_ARRAY_SIZE_32_BYTES, 0, 1, 1);
|
||||
if (json_result_test) {
|
||||
print_json_value(json_result_test);
|
||||
json_free(json_result_test);
|
||||
}
|
||||
|
||||
// Example 3: HBM CORRECTED ERROR (128-byte array)
|
||||
uint64_t register_array_128[RAS_DECODE_REGISTER_ARRAY_SIZE_128_BYTES] = {
|
||||
0xffff,
|
||||
0xdc2040000000011b,
|
||||
0x0,
|
||||
0xd008000801000000,
|
||||
0x25000001ff,
|
||||
0x209600191f00,
|
||||
0xa000000,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0xd008000801000000,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0};
|
||||
|
||||
printf("\n--- HBM CORRECTED ERROR (128-byte array) ---\n");
|
||||
printf("Decoded AFID: %d\n", decode_afid(register_array_128, RAS_DECODE_REGISTER_ARRAY_SIZE_128_BYTES, 0, 1, 1));
|
||||
|
||||
JsonValue *json_result_128 = decode_error_info(register_array_128, RAS_DECODE_REGISTER_ARRAY_SIZE_128_BYTES, 0, 1, 1);
|
||||
if (json_result_128) {
|
||||
print_json_value(json_result_128);
|
||||
json_free(json_result_128);
|
||||
}
|
||||
|
||||
// Example 4: PCS XGMI Error
|
||||
uint64_t register_array_pcs_xgmi[RAS_DECODE_REGISTER_ARRAY_SIZE_128_BYTES] = {
|
||||
0xffffffff,
|
||||
0x9820000000060150,
|
||||
0x0,
|
||||
0xd008000200000000,
|
||||
0x27000001f9,
|
||||
0xe05012109201,
|
||||
0xaf812d4a000000,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0};
|
||||
|
||||
printf("\n--- PCS XGMI Error ---\n");
|
||||
printf("Decoded AFID: %d\n", decode_afid(register_array_pcs_xgmi, RAS_DECODE_REGISTER_ARRAY_SIZE_128_BYTES, 0, 1, 1));
|
||||
|
||||
JsonValue *json_result_pcs = decode_error_info(register_array_pcs_xgmi, RAS_DECODE_REGISTER_ARRAY_SIZE_128_BYTES, 0, 1, 1);
|
||||
if (json_result_pcs) {
|
||||
print_json_value(json_result_pcs);
|
||||
json_free(json_result_pcs);
|
||||
}
|
||||
|
||||
// Example 5: Bad page (threshold exceeded flag)
|
||||
uint64_t register_array_bad_page[RAS_DECODE_REGISTER_ARRAY_SIZE_128_BYTES] = {
|
||||
0x1,
|
||||
0xb000000000000137,
|
||||
0x0,
|
||||
0x0,
|
||||
0x1ff00000002,
|
||||
0x9600000000,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0,
|
||||
0x0};
|
||||
|
||||
printf("\n--- Bad Page (Threshold Exceeded) ---\n");
|
||||
printf("Decoded AFID: %d\n", decode_afid(register_array_bad_page, RAS_DECODE_REGISTER_ARRAY_SIZE_128_BYTES, RAS_DECODE_FLAG_THRESHOLD_EXCEEDED, 1, 1));
|
||||
|
||||
JsonValue *json_result_bad_page = decode_error_info(register_array_bad_page, RAS_DECODE_REGISTER_ARRAY_SIZE_128_BYTES, RAS_DECODE_FLAG_THRESHOLD_EXCEEDED, 1, 1);
|
||||
if (json_result_bad_page) {
|
||||
print_json_value(json_result_bad_page);
|
||||
json_free(json_result_bad_page);
|
||||
}
|
||||
|
||||
// Example 6: Boot Error Demo
|
||||
uint64_t boot_messages[8] = {
|
||||
0x3c000228a4, // Oam0bootmsg
|
||||
0x3c001228a4, // Oam1bootmsg
|
||||
0x3c002228a4, // Oam2bootmsg
|
||||
0x3c003128a4, // Oam3bootmsg
|
||||
0x3c004328a4, // Oam4bootmsg
|
||||
0x3c005228a4, // Oam5bootmsg
|
||||
0x3c006228a4, // Oam6bootmsg
|
||||
0x3c007228a4 // Oam7bootmsg
|
||||
};
|
||||
|
||||
printf("\n--- Boot Error Demo ---\n");
|
||||
printf("Decoded AFID: %d\n", decode_afid(boot_messages, sizeof(boot_messages)/sizeof(boot_messages[0]), 0, 1, 9));
|
||||
|
||||
JsonValue *json_result_boot = decode_error_info(boot_messages, sizeof(boot_messages)/sizeof(boot_messages[0]), 0, 1, 9);
|
||||
if (json_result_boot) {
|
||||
print_json_value(json_result_boot);
|
||||
json_free(json_result_boot);
|
||||
} else {
|
||||
printf("Failed to decode boot messages\n");
|
||||
}
|
||||
|
||||
printf("\n===========================================\n");
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
// Display version information
|
||||
print_version_info();
|
||||
|
||||
// Demonstrate the new JSON-based ACA decoding functionality
|
||||
demonstrate_json_decoding();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,204 @@
|
||||
/*
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "aca_decode.h"
|
||||
#include "ras_decode_constants.h"
|
||||
#include "boot_decode.h"
|
||||
#include "error_map.h"
|
||||
#include "json_util.h"
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
int decode_afid(const uint64_t *register_array, size_t array_len, uint32_t flag, uint16_t hw_revision, uint16_t register_context_type)
|
||||
{
|
||||
if (!register_array)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Use decode_error_info to get the JSON result
|
||||
JsonValue *json_result = decode_error_info(register_array, array_len, flag, hw_revision, register_context_type);
|
||||
if (!json_result) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Use the decode_error_info_afid function to extract AFID
|
||||
int afid = decode_error_info_afid(json_result);
|
||||
|
||||
json_free(json_result);
|
||||
return afid;
|
||||
}
|
||||
|
||||
JsonValue* decode_error_info(const uint64_t *register_array, size_t array_len, uint32_t flag, uint16_t hw_revision, uint16_t register_context_type)
|
||||
{
|
||||
if (!register_array)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Check register context type parameter
|
||||
if (register_context_type == 9)
|
||||
{
|
||||
// For boot decode, use boot_decode_orchestrator with register_array and array_len
|
||||
// Flag is not used in boot decode
|
||||
return boot_decode_orchestrator((uint64_t*)register_array, array_len);
|
||||
}
|
||||
else if (register_context_type == 1)
|
||||
{
|
||||
// For ACA decode, use existing logic
|
||||
aca_raw_data_t raw_data = {0};
|
||||
|
||||
if (array_len == RAS_DECODE_REGISTER_ARRAY_SIZE_32_BYTES) // 32 bytes
|
||||
{
|
||||
raw_data.aca_status = register_array[0];
|
||||
raw_data.aca_addr = register_array[1];
|
||||
raw_data.aca_ipid = register_array[2];
|
||||
raw_data.aca_synd = register_array[3];
|
||||
}
|
||||
else if (array_len == RAS_DECODE_REGISTER_ARRAY_SIZE_128_BYTES) // 128 bytes
|
||||
{
|
||||
raw_data.aca_status = register_array[1];
|
||||
raw_data.aca_addr = register_array[2];
|
||||
raw_data.aca_ipid = register_array[5];
|
||||
raw_data.aca_synd = register_array[6];
|
||||
}
|
||||
else
|
||||
{
|
||||
return NULL; // Unsupported size
|
||||
}
|
||||
|
||||
raw_data.flags = flag;
|
||||
raw_data.hw_revision = hw_revision;
|
||||
|
||||
return aca_decode(&raw_data);
|
||||
}
|
||||
else
|
||||
{
|
||||
return NULL; // Invalid register context type
|
||||
}
|
||||
}
|
||||
|
||||
int decode_error_info_afid(JsonValue *error_json)
|
||||
{
|
||||
if (!error_json || error_json->type != JSON_OBJECT) {
|
||||
return -1; // Invalid AFID for null or invalid JSON
|
||||
}
|
||||
|
||||
// Check if this is MCA error
|
||||
JsonValue *category_value = json_object_get(error_json, "error_category");
|
||||
JsonValue *type_value = json_object_get(error_json, "error_type");
|
||||
JsonValue *severity_value = json_object_get(error_json, "severity");
|
||||
|
||||
if (category_value && type_value && severity_value &&
|
||||
category_value->type == JSON_STRING && type_value->type == JSON_STRING && severity_value->type == JSON_STRING) {
|
||||
const char *error_category = category_value->data.string;
|
||||
const char *error_type = type_value->data.string;
|
||||
const char *error_severity = severity_value->data.string;
|
||||
|
||||
// Check for the specific case: HBM Errors + Bad Page Retirement Threshold + Fatal
|
||||
if (strcmp(error_category, RAS_DECODE_CATEGORY_HBM_ERRORS) == 0 &&
|
||||
strcmp(error_type, RAS_DECODE_ERROR_TYPE_BAD_PAGE_RETIREMENT_THRESHOLD) == 0 &&
|
||||
strcmp(error_severity, RAS_DECODE_SEVERITY_FATAL) == 0) {
|
||||
|
||||
// Use the error_type directly as service_error for this case
|
||||
return get_error_id(error_category, error_type, error_severity);
|
||||
}
|
||||
|
||||
// For other cases, we need to determine the service_error_type based on the logic
|
||||
// from get_service_error_type function
|
||||
const char *service_error = NULL;
|
||||
|
||||
// Extract bank if needed for service error type determination
|
||||
JsonValue *bank_value = json_object_get(error_json, "bank");
|
||||
const char *error_bank = (bank_value && bank_value->type == JSON_STRING) ? bank_value->data.string : "";
|
||||
|
||||
if (strcmp(error_type, RAS_DECODE_ERROR_TYPE_BAD_PAGE_RETIREMENT_THRESHOLD) == 0) {
|
||||
service_error = RAS_DECODE_ERROR_TYPE_BAD_PAGE_RETIREMENT_THRESHOLD;
|
||||
}
|
||||
else if (strcmp(error_category, RAS_DECODE_CATEGORY_HBM_ERRORS) == 0 && strcmp(error_severity, RAS_DECODE_SEVERITY_CORRECTED) == 0) {
|
||||
service_error = RAS_DECODE_ERROR_TYPE_ALL;
|
||||
}
|
||||
else if (strcmp(error_type, "RdCrcErr") == 0) {
|
||||
service_error = RAS_DECODE_ERROR_TYPE_END_TO_END_CRC;
|
||||
}
|
||||
else if (strcmp(error_category, RAS_DECODE_CATEGORY_HBM_ERRORS) == 0 && strcmp(error_severity, RAS_DECODE_SEVERITY_FATAL) == 0 &&
|
||||
strcmp(error_type, RAS_DECODE_ERROR_TYPE_ON_DIE_ECC) != 0 && strcmp(error_type, RAS_DECODE_ERROR_TYPE_END_TO_END_CRC) != 0) {
|
||||
service_error = RAS_DECODE_ERROR_TYPE_ALL_OTHERS;
|
||||
}
|
||||
else if (strcmp(error_category, RAS_DECODE_CATEGORY_DEVICE_INTERNAL_ERRORS) == 0) {
|
||||
if ((strcmp(error_severity, RAS_DECODE_SEVERITY_UNCORRECTED_NON_FATAL) == 0 ||
|
||||
strcmp(error_severity, RAS_DECODE_SEVERITY_CORRECTED) == 0 ||
|
||||
strcmp(error_severity, RAS_DECODE_SEVERITY_FATAL) == 0) &&
|
||||
strcmp(error_type, RAS_DECODE_ERROR_TYPE_HARDWARE_ASSERTION) != 0 &&
|
||||
strcmp(error_type, RAS_DECODE_ERROR_TYPE_WATCHDOG_TIMEOUT) != 0) {
|
||||
service_error = RAS_DECODE_ERROR_TYPE_ALL_OTHERS;
|
||||
}
|
||||
}
|
||||
else if (strcmp(error_category, RAS_DECODE_CATEGORY_OFF_PACKAGE_LINK_ERRORS) == 0) {
|
||||
if (strcmp(error_bank, RAS_DECODE_BANK_PCS_XGMI) == 0) {
|
||||
service_error = RAS_DECODE_ERROR_TYPE_XGMI;
|
||||
}
|
||||
else if (strcmp(error_bank, RAS_DECODE_BANK_KPX_WAFL) == 0) {
|
||||
service_error = RAS_DECODE_ERROR_TYPE_WAFL;
|
||||
}
|
||||
}
|
||||
|
||||
if (!service_error) {
|
||||
service_error = error_type; // Fallback to error_type
|
||||
}
|
||||
|
||||
return get_error_id(error_category, service_error, error_severity);
|
||||
}
|
||||
|
||||
// Check if this is a boot error
|
||||
// Find the first msg<i> key to get the error_type
|
||||
JsonPair *current_pair = error_json->data.object;
|
||||
JsonValue *first_msg = NULL;
|
||||
int lowest_msg_index = INT_MAX;
|
||||
|
||||
while (current_pair) {
|
||||
if (strncmp(current_pair->key, "msg", 3) == 0) {
|
||||
// Extract the message index
|
||||
int msg_index = atoi(current_pair->key + 3);
|
||||
if (msg_index < lowest_msg_index) {
|
||||
lowest_msg_index = msg_index;
|
||||
first_msg = current_pair->value;
|
||||
}
|
||||
}
|
||||
current_pair = current_pair->next;
|
||||
}
|
||||
|
||||
if (first_msg && first_msg->type == JSON_OBJECT) {
|
||||
// This is a boot error - extract error_type from the first message
|
||||
JsonValue *boot_error_type = json_object_get(first_msg, "error_type");
|
||||
if (boot_error_type && boot_error_type->type == JSON_STRING) {
|
||||
const char *service_error = NULL;
|
||||
service_error = boot_error_type->data.string;
|
||||
|
||||
// For boot errors, always use Boot-Time Errors category and Fail-to-init severity
|
||||
return get_error_id(RAS_DECODE_CATEGORY_BOOT_TIME_ERRORS, service_error, RAS_DECODE_SEVERITY_FAIL_TO_INIT);
|
||||
}
|
||||
}
|
||||
|
||||
return -1; // Invalid AFID if neither MCA nor boot error format
|
||||
}
|
||||
@@ -44,7 +44,7 @@ fi
|
||||
|
||||
# Find all source files
|
||||
mapfile -t FILES < <(
|
||||
find . \( -name build -o -name .git -o -path "./src/aca-decode" -o -path "./esmi_ib_library" -o -path "./rocm_smi/include/rocm_smi/kfd_ioctl.h" \) -prune -o \
|
||||
find . \( -name build -o -name .git -o -path "./src/ras-decode-instinct-staging" -o -path "./esmi_ib_library" -o -path "./rocm_smi/include/rocm_smi/kfd_ioctl.h" \) -prune -o \
|
||||
\( -name "*.cc" -o -name "*.cpp" -o -name "*.c" \) -print
|
||||
)
|
||||
|
||||
|
||||
Referencia en una nueva incidencia
Block a user