[SWDEV-547223] RAS HBM CRC Read CE failed due to AFID missing 24

cherry-pick aca-decode repo changeset: aca-decode repo: f9e5ad5 (HEAD -> main, origin/main, origin/HEAD) Fix bug in Corrected HBM Error being decoded as AFID 34 (#5)


[ROCm/amdsmi commit: ffca095246]
Этот коммит содержится в:
Oosman Saeed
2025-08-14 20:04:16 +00:00
коммит произвёл Arif, Maisam
родитель 3779562abb
Коммит 7c83dac63d
9 изменённых файлов: 546 добавлений и 138 удалений
+119
Просмотреть файл
@@ -0,0 +1,119 @@
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/**
* @file aca_constants.h
* @brief Shared constants for ACA error decoding
*
* This file contains string constants and numerical constants that are used
* across multiple source files to improve maintainability and prevent typos.
*/
#ifndef ACA_CONSTANTS_H
#define ACA_CONSTANTS_H
/* Error severity constants */
#define ACA_SEVERITY_UNKNOWN "UNKNOWN"
#define ACA_SEVERITY_FATAL "Fatal"
#define ACA_SEVERITY_CORRECTED "Corrected"
#define ACA_SEVERITY_UNCORRECTED_NON_FATAL "Uncorrected, Non-fatal"
#define ACA_SEVERITY_FAIL_TO_INIT "Fail-to-init"
#define ACA_SEVERITY_ALL_CAPS "ALL"
/* Error category constants */
#define ACA_CATEGORY_HBM_ERRORS "HBM Errors"
#define ACA_CATEGORY_DEVICE_INTERNAL_ERRORS "Device Internal Errors"
#define ACA_CATEGORY_OFF_PACKAGE_LINK_ERRORS "Off-Package Link Errors"
#define ACA_CATEGORY_BOOT_TIME_ERRORS "Boot-Time Errors"
#define ACA_CATEGORY_CPER_FORMAT "CPER Format"
#define ACA_CATEGORY_UNIDENTIFIED_ERRORS "Unidentified Errors"
/* Common error type constants */
#define ACA_ERROR_TYPE_ALL_OTHERS "All Others"
#define ACA_ERROR_TYPE_ALL "All"
#define ACA_ERROR_TYPE_DECODE_INAPPLICABLE "Decode Inapplicable"
#define ACA_ERROR_TYPE_BAD_PAGE_RETIREMENT_THRESHOLD "Bad Page Retirement Threshold"
#define ACA_ERROR_TYPE_HARDWARE_ASSERTION "Hardware Assertion (HWA)"
#define ACA_ERROR_TYPE_WATCHDOG_TIMEOUT "Watchdog Timeout (WDT)"
#define ACA_ERROR_TYPE_ON_DIE_ECC "On-die ECC"
#define ACA_ERROR_TYPE_END_TO_END_CRC "End-to-end CRC"
#define ACA_ERROR_TYPE_WAFL "WAFL"
#define ACA_ERROR_TYPE_XGMI "XGMI"
/* Boot-time error type constants */
#define ACA_ERROR_TYPE_FW_LOAD "FW Load"
#define ACA_ERROR_TYPE_HBM_BIST_TEST "HBM BIST Test"
#define ACA_ERROR_TYPE_HBM_MEMORY_TEST "HBM Memory Test"
#define ACA_ERROR_TYPE_HBM_TRAINING "HBM Training"
#define ACA_ERROR_TYPE_UNHANDLED "Unhandled"
#define ACA_ERROR_TYPE_UNKNOWN_ERROR "Unknown"
#define ACA_ERROR_TYPE_USR_CP_LINK_TRAINING "USR CP Link Training"
#define ACA_ERROR_TYPE_USR_DP_LINK_TRAINING "USR DP Link Training"
#define ACA_ERROR_TYPE_WAFL_LINK_TRAINING "WAFL Link Training"
#define ACA_ERROR_TYPE_XGMI_LINK_TRAINING "XGMI Link Training"
#define ACA_ERROR_TYPE_BOOT_CONTROLLER_DATA_ABORT "Boot Controller Data Abort"
#define ACA_ERROR_TYPE_BOOT_CONTROLLER_GENERIC "Boot Controller Generic"
/* Link error type constants */
#define ACA_ERROR_TYPE_PCIE_AER "PCIe AER"
/* CPER format error type constants */
#define ACA_ERROR_TYPE_MALFORMED_CPER "Malformed CPER"
#define ACA_ERROR_TYPE_INCOMPLETE_ACA_DATA "Incomplete ACA Data"
#define ACA_ERROR_TYPE_INVALID_ACA_DATA "Invalid ACA Data"
#define ACA_ERROR_TYPE_UNIDENTIFIED_ERROR "Unidentified Error"
/* Protocol constants */
#define ACA_PROTOCOL_CPER "CPER"
#define ACA_PROTOCOL_CPER_WITH_SPACE "CPER "
/* Bank name strings */
#define ACA_BANK_UMC "umc"
#define ACA_BANK_PSP "psp"
#define ACA_BANK_CS "cs"
#define ACA_BANK_PIE "pie"
#define ACA_BANK_PCS_XGMI "pcs_xgmi"
#define ACA_BANK_KPX_SERDES "kpx_serdes"
#define ACA_BANK_KPX_WAFL "kpx_wafl"
/* Numerical constants */
#define ACA_FLAG_THRESHOLD_EXCEEDED 0x8
#define ACA_REGISTER_ARRAY_SIZE_32_BYTES 4
#define ACA_REGISTER_ARRAY_SIZE_128_BYTES 16
/* Error code ranges */
#define ACA_ERROR_CODE_EXT_MIN 0x3A
#define ACA_ERROR_CODE_EXT_MAX 0x3E
/* Instance ID values for XCD and AID error decoding */
#define ACA_INSTANCE_ID_XCD0_400 0x36430400
#define ACA_INSTANCE_ID_XCD1_400 0x38430400
#define ACA_INSTANCE_ID_XCD0_401 0x36430401
#define ACA_INSTANCE_ID_XCD1_401 0x38430401
#define ACA_INSTANCE_ID_AID_400 0x3B30400
#define ACA_INSTANCE_ID_AID_401 0x3B30401
/* Error return codes */
#define ACA_ERROR_INVALID_ACA_DATA_ID 33
#define ACA_ERROR_UNIDENTIFIED_ERROR_ID 34
#endif /* ACA_CONSTANTS_H */
+95
Просмотреть файл
@@ -0,0 +1,95 @@
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef ACA_VERSION_H
#define ACA_VERSION_H
#ifdef __cplusplus
extern "C"
{
#endif
/**
* @brief ACA Decoder Library Version Information
*
* This header defines version constants and functions for the ACA Decoder library.
* Version follows Semantic Versioning (SemVer) specification: MAJOR.MINOR.PATCH
*
* - MAJOR: Incremented for incompatible API changes
* - MINOR: Incremented for backward-compatible functionality additions
* - PATCH: Incremented for backward-compatible bug fixes
*/
/* Version Components */
#define ACA_VERSION_MAJOR 1 /**< Major version number */
#define ACA_VERSION_MINOR 0 /**< Minor version number */
#define ACA_VERSION_PATCH 0 /**< Patch version number */
/* Version String */
#define ACA_VERSION_STRING "1.0.0"
/**
* @brief Structure containing version information
*/
typedef struct
{
int major; /**< Major version number */
int minor; /**< Minor version number */
int patch; /**< Patch version number */
const char *string; /**< Version string (e.g., "1.0.0") */
} aca_version_info_t;
/**
* @brief Get the major version number
* @return Major version number
*/
int aca_get_version_major(void);
/**
* @brief Get the minor version number
* @return Minor version number
*/
int aca_get_version_minor(void);
/**
* @brief Get the patch version number
* @return Patch version number
*/
int aca_get_version_patch(void);
/**
* @brief Get the version string
* @return Pointer to version string (e.g., "1.0.0")
*/
const char *aca_get_version_string(void);
/**
* @brief Get complete version information
* @return Structure containing all version information
*/
aca_version_info_t aca_get_version_info(void);
#ifdef __cplusplus
}
#endif
#endif /* ACA_VERSION_H */
+15 -28
Просмотреть файл
@@ -1,4 +1,3 @@
// SPDX-License-Identifier: MIT
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
@@ -21,8 +20,8 @@
* THE SOFTWARE.
*/
#include "aca_decode.h"
#include <utils.h>
#include "aca_decode.h"
#include "aca_constants.h"
int decode_afid(const uint64_t *register_array, size_t array_len, uint32_t flag, uint16_t hw_revision)
{
@@ -33,20 +32,21 @@ int decode_afid(const uint64_t *register_array, size_t array_len, uint32_t flag,
aca_raw_data_t raw_data;
if (array_len == 4) // 32 bytes
if (array_len == ACA_REGISTER_ARRAY_SIZE_32_BYTES) // 32 bytes
{
raw_data.aca_status = register_array[0];
raw_data.aca_addr = register_array[1];
raw_data.aca_ipid = register_array[2];
raw_data.aca_synd = register_array[3];
}
else if (array_len == 16) // 128 bytes
else if (array_len == ACA_REGISTER_ARRAY_SIZE_128_BYTES) // 128 bytes
{
raw_data.aca_status = register_array[1];
raw_data.aca_addr = register_array[2];
raw_data.aca_ipid = register_array[5];
raw_data.aca_synd = register_array[6];
}
else
{
return -1; // Unsupported size
@@ -67,32 +67,19 @@ aca_error_info_t decode_error_info(const uint64_t *register_array, size_t array_
if (!register_array)
{
return error_info;
}
// Create a copy of the register array to avoid modifying the original
uint64_t converted_array[16];
if (array_len > 16) {
return error_info;
}
// Copy and convert the array
for (size_t i = 0; i < array_len; i++) {
converted_array[i] = le64_to_be64(register_array[i]);
}
if (array_len == 4) // 32 bytes
} if (array_len == ACA_REGISTER_ARRAY_SIZE_32_BYTES) // 32 bytes
{
raw_data.aca_status = converted_array[0];
raw_data.aca_addr = converted_array[1];
raw_data.aca_ipid = converted_array[2];
raw_data.aca_synd = converted_array[3];
raw_data.aca_status = register_array[0];
raw_data.aca_addr = register_array[1];
raw_data.aca_ipid = register_array[2];
raw_data.aca_synd = register_array[3];
}
else if (array_len == 16) // 128 bytes
else if (array_len == ACA_REGISTER_ARRAY_SIZE_128_BYTES) // 128 bytes
{
raw_data.aca_status = converted_array[1];
raw_data.aca_addr = converted_array[2];
raw_data.aca_ipid = converted_array[5];
raw_data.aca_synd = converted_array[6];
raw_data.aca_status = register_array[1];
raw_data.aca_addr = register_array[2];
raw_data.aca_ipid = register_array[5];
raw_data.aca_synd = register_array[6];
}
else
{
+59 -59
Просмотреть файл
@@ -1,4 +1,3 @@
// SPDX-License-Identifier: MIT
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
@@ -33,6 +32,7 @@
#include "aca_decode.h"
#include "aca_tables.h"
#include "error_map.h"
#include "aca_constants.h"
#include <string.h>
/**
@@ -61,18 +61,18 @@ aca_decoder_get_bank(const aca_decoder_t *decoder, const char **bank_name)
static const char *get_error_severity(const aca_status_fields_t *status)
{
if (status->poison)
return "Uncorrected, Non-fatal";
return ACA_SEVERITY_UNCORRECTED_NON_FATAL;
if (status->pcc)
return "Fatal";
return ACA_SEVERITY_FATAL;
if (!status->pcc && status->uc && status->tcc)
return "Fatal";
return ACA_SEVERITY_FATAL;
if (!status->pcc && status->uc && !status->tcc)
return "Uncorrected, Non-fatal";
return ACA_SEVERITY_UNCORRECTED_NON_FATAL;
if (!status->pcc && !status->uc && !status->tcc && status->deferred)
return "Uncorrected, Non-fatal";
return ACA_SEVERITY_UNCORRECTED_NON_FATAL;
if (!status->pcc && !status->uc && !status->tcc && !status->deferred)
return "Corrected";
return "UNKNOWN";
return ACA_SEVERITY_CORRECTED;
return ACA_SEVERITY_UNKNOWN;
}
/**
@@ -85,31 +85,31 @@ static const char *get_error_category(const char *bank, const char *error_type)
{
if (!bank || !error_type)
{
return "UNKNOWN";
return ACA_SEVERITY_UNKNOWN;
}
if (strcmp(bank, "umc") == 0)
if (strcmp(bank, ACA_BANK_UMC) == 0)
{
if (strcmp(error_type, "On-die ECC") == 0 ||
if (strcmp(error_type, ACA_ERROR_TYPE_ON_DIE_ECC) == 0 ||
strcmp(error_type, "WriteDataPoisonErr") == 0 ||
strcmp(error_type, "AddressCommandParityErr") == 0 ||
strcmp(error_type, "WriteDataCrcErr") == 0 ||
strcmp(error_type, "EcsErr") == 0 ||
strcmp(error_type, "RdCrcErr") == 0 ||
strcmp(error_type, "End-to-end CRC") == 0)
strcmp(error_type, ACA_ERROR_TYPE_END_TO_END_CRC) == 0)
{
return "HBM Errors";
return ACA_CATEGORY_HBM_ERRORS;
}
}
else if (strcmp(bank, "pcs_xgmi") == 0 ||
strcmp(bank, "kpx_serdes") == 0 ||
strcmp(bank, "kpx_wafl") == 0 ||
(strcmp(bank, "psp") == 0 && strcmp(error_type, "WAFL") == 0))
else if (strcmp(bank, ACA_BANK_PCS_XGMI) == 0 ||
strcmp(bank, ACA_BANK_KPX_SERDES) == 0 ||
strcmp(bank, ACA_BANK_KPX_WAFL) == 0 ||
(strcmp(bank, ACA_BANK_PSP) == 0 && strcmp(error_type, ACA_ERROR_TYPE_WAFL) == 0))
{
return "Off-Package Link Errors";
return ACA_CATEGORY_OFF_PACKAGE_LINK_ERRORS;
}
return "Device Internal Errors";
return ACA_CATEGORY_DEVICE_INTERNAL_ERRORS;
}
/**
@@ -125,55 +125,55 @@ static int get_service_error_type(const char *error_category, const char *error_
const char *error_severity, const char **service_error_type)
{
if (!error_category || !error_type || !error_severity || !service_error_type ||
strcmp(error_category, "UNKNOWN") == 0 ||
strcmp(error_type, "UNKNOWN") == 0 ||
strcmp(error_severity, "UNKNOWN") == 0)
strcmp(error_category, ACA_SEVERITY_UNKNOWN) == 0 ||
strcmp(error_type, ACA_SEVERITY_UNKNOWN) == 0 ||
strcmp(error_severity, ACA_SEVERITY_UNKNOWN) == 0)
{
return -1;
}
if (strcmp(error_type, "Bad Page Retirement Threshold") == 0)
if (strcmp(error_type, ACA_ERROR_TYPE_BAD_PAGE_RETIREMENT_THRESHOLD) == 0)
{
*service_error_type = "Bad Page Retirement Threshold";
*service_error_type = ACA_ERROR_TYPE_BAD_PAGE_RETIREMENT_THRESHOLD;
return 0;
}
if ((strcmp(error_category, ACA_CATEGORY_HBM_ERRORS) == 0) && (strcmp(error_severity, ACA_SEVERITY_CORRECTED) == 0))
{
*service_error_type = ACA_ERROR_TYPE_ALL;
return 0;
}
if (strcmp(error_type, "RdCrcErr") == 0)
{
*service_error_type = "End-to-end CRC";
*service_error_type = ACA_ERROR_TYPE_END_TO_END_CRC;
return 0;
}
if ((strcmp(error_category, "HBM Errors") == 0) && (strcmp(error_severity, "Corrected") == 0))
if ((strcmp(error_category, ACA_CATEGORY_HBM_ERRORS) == 0) && (strcmp(error_severity, ACA_SEVERITY_FATAL) == 0) &&
(strcmp(error_type, ACA_ERROR_TYPE_ON_DIE_ECC) != 0) && (strcmp(error_type, ACA_ERROR_TYPE_END_TO_END_CRC) != 0))
{
*service_error_type = "All";
*service_error_type = ACA_ERROR_TYPE_ALL_OTHERS;
return 0;
}
if ((strcmp(error_category, "HBM Errors") == 0) && (strcmp(error_severity, "Fatal") == 0) &&
(strcmp(error_type, "On-die ECC") != 0) && (strcmp(error_type, "End-to-end CRC") != 0))
if (strcmp(error_category, ACA_CATEGORY_DEVICE_INTERNAL_ERRORS) == 0)
{
*service_error_type = "All Others";
return 0;
}
if (strcmp(error_category, "Device Internal Errors") == 0)
{
if ((strcmp(error_severity, "Uncorrected, Non-fatal") == 0 ||
strcmp(error_severity, "Corrected") == 0 ||
strcmp(error_severity, "Fatal") == 0) &&
strcmp(error_type, "Hardware Assertion (HWA)") != 0 &&
strcmp(error_type, "Watchdog Timeout (WDT)") != 0)
if ((strcmp(error_severity, ACA_SEVERITY_UNCORRECTED_NON_FATAL) == 0 ||
strcmp(error_severity, ACA_SEVERITY_CORRECTED) == 0 ||
strcmp(error_severity, ACA_SEVERITY_FATAL) == 0) &&
strcmp(error_type, ACA_ERROR_TYPE_HARDWARE_ASSERTION) != 0 &&
strcmp(error_type, ACA_ERROR_TYPE_WATCHDOG_TIMEOUT) != 0)
{
*service_error_type = "All Others";
*service_error_type = ACA_ERROR_TYPE_ALL_OTHERS;
return 0;
}
}
if (strcmp(error_category, "Off-Package Link Errors") == 0)
if (strcmp(error_category, ACA_CATEGORY_OFF_PACKAGE_LINK_ERRORS) == 0)
{
if (strcmp(error_bank, "pcs_xgmi") == 0)
if (strcmp(error_bank, ACA_BANK_PCS_XGMI) == 0)
{
*service_error_type = "XGMI";
*service_error_type = ACA_ERROR_TYPE_XGMI;
return 0;
}
if (strcmp(error_bank, "kpx_wafl") == 0)
if (strcmp(error_bank, ACA_BANK_KPX_WAFL) == 0)
{
*service_error_type = "WAFL";
*service_error_type = ACA_ERROR_TYPE_WAFL;
return 0;
}
}
@@ -205,7 +205,7 @@ static void aca_decoder_get_error_info(const aca_decoder_t *decoder, aca_error_i
result = aca_decoder_get_bank(decoder, &bank);
if (result < 0)
{
bank = "UNKNOWN";
bank = ACA_SEVERITY_UNKNOWN;
}
info->bank_ref = bank;
@@ -215,13 +215,13 @@ static void aca_decoder_get_error_info(const aca_decoder_t *decoder, aca_error_i
}
else
{
info->instance_ref = "Decode Inapplicable";
info->instance_ref = ACA_ERROR_TYPE_DECODE_INAPPLICABLE;
}
// 0b1000 indicate error threshold has been exceeded, and is always fatal
if (decoder->flags & 0x8)
if (decoder->flags & ACA_FLAG_THRESHOLD_EXCEEDED)
{
info->severity_ref = "Fatal";
info->severity_ref = ACA_SEVERITY_FATAL;
}
else
{
@@ -242,31 +242,31 @@ static void aca_decoder_get_error_info(const aca_decoder_t *decoder, aca_error_i
info->aid = -1; // Invalid value
}
if (decoder->status.error_code_ext >= 0x3A && decoder->status.error_code_ext <= 0x3E)
if (decoder->status.error_code_ext >= ACA_ERROR_CODE_EXT_MIN && decoder->status.error_code_ext <= ACA_ERROR_CODE_EXT_MAX)
{
uint32_t instance_id = decoder->ipid.instance_id_lo;
uint32_t error_info = decoder->synd.error_information & 0xFF;
if ((instance_id == 0x36430400 || instance_id == 0x38430400 ||
instance_id == 0x36430401 || instance_id == 0x38430401) &&
if ((instance_id == ACA_INSTANCE_ID_XCD0_400 || instance_id == ACA_INSTANCE_ID_XCD1_400 ||
instance_id == ACA_INSTANCE_ID_XCD0_401 || instance_id == ACA_INSTANCE_ID_XCD1_401) &&
find_error_in_table(xcd_error_table, NUM_XCD_ERRORS, error_info, &error_type) == 0)
{
info->error_type_ref = error_type;
}
else if ((instance_id == 0x3B30400 || instance_id == 0x3B30401) &&
else if ((instance_id == ACA_INSTANCE_ID_AID_400 || instance_id == ACA_INSTANCE_ID_AID_401) &&
find_error_in_table(aid_error_table, NUM_AID_ERRORS, error_info, &error_type) == 0)
{
info->error_type_ref = error_type;
}
else
{
info->error_type_ref = "UNKNOWN";
info->error_type_ref = ACA_SEVERITY_UNKNOWN;
}
}
// 0b1000 indicate error threshold has been exceeded
else if (decoder->flags & 0x8)
else if (decoder->flags & ACA_FLAG_THRESHOLD_EXCEEDED)
{
info->error_type_ref = "Bad Page Retirement Threshold";
info->error_type_ref = ACA_ERROR_TYPE_BAD_PAGE_RETIREMENT_THRESHOLD;
}
else
{
@@ -276,14 +276,14 @@ static void aca_decoder_get_error_info(const aca_decoder_t *decoder, aca_error_i
}
else
{
info->error_type_ref = "UNKNOWN";
info->error_type_ref = ACA_SEVERITY_UNKNOWN;
}
}
// 0b1000 indicate error threshold has been exceeded, and is always a HBM error
if (decoder->flags & 0x8)
if (decoder->flags & ACA_FLAG_THRESHOLD_EXCEEDED)
{
info->category_ref = "HBM Errors";
info->category_ref = ACA_CATEGORY_HBM_ERRORS;
}
else
{
+11 -11
Просмотреть файл
@@ -1,4 +1,3 @@
// SPDX-License-Identifier: MIT
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
@@ -34,6 +33,7 @@
*/
#include "aca_tables.h"
#include "aca_constants.h"
#include <stdint.h>
#include <stddef.h>
#include <string.h>
@@ -80,19 +80,19 @@ const aca_error_type_t error_table[] = {
{"cs", 0xe, "FTI_ND_ILL_REQ"},
{"cs", 0xf, "FTI_ND_ADDR_VIOL"},
{"cs", 0x10, "FTI_ND_SEC_VIOL"},
{"cs", 0x11, "Hardware Assertion (HWA)"},
{"cs", 0x11, ACA_ERROR_TYPE_HARDWARE_ASSERTION},
{"cs", 0x12, "ST_PRT_ERR"},
{"cs", 0x13, "ST_ECC_ERR"},
{"cs", 0x14, "ST_TXN_ERR"},
{"pie", 0x0, "Hardware Assertion (HWA)"},
{"pie", 0x0, ACA_ERROR_TYPE_HARDWARE_ASSERTION},
{"pie", 0x1, "CSW"},
{"pie", 0x2, "GMI"},
{"pie", 0x3, "FTI_DAT_STAT"},
{"pie", 0x4, "DEF"},
{"pie", 0x5, "Watchdog Timeout (WDT)"},
{"pie", 0x5, ACA_ERROR_TYPE_WATCHDOG_TIMEOUT},
{"pie", 0x6, "CNLI"},
{"pie", 0x7, "RSLVFCI"},
{"umc", 0x0, "On-die ECC"},
{"umc", 0x0, ACA_ERROR_TYPE_ON_DIE_ECC},
{"umc", 0x1, "WriteDataPoisonErr"},
{"umc", 0x2, "SdpParityErr"},
{"umc", 0x4, "AddressCommandParityErr"},
@@ -103,7 +103,7 @@ const aca_error_type_t error_table[] = {
{"umc", 0xb, "RdCrcErr"},
{"umc", 0xd, "MpFwErr"},
{"umc", 0xe, "MpParErr"},
{"umc", 0xf, "End-to-end CRC"},
{"umc", 0xf, ACA_ERROR_TYPE_END_TO_END_CRC},
{"psp", 0x0, "Mp0HighSramError"},
{"psp", 0x1, "Mp0LowSramError"},
{"psp", 0x2, "Mp0IDataBank0Error"},
@@ -127,7 +127,7 @@ const aca_error_type_t error_table[] = {
{"psp", 0x3b, "SRAM_EDC"},
{"psp", 0x3c, "SMN_Parity"},
{"psp", 0x3d, "SMN_Timeout"},
{"psp", 0x3f, "WAFL"},
{"psp", 0x3f, ACA_ERROR_TYPE_WAFL},
{"smu", 0x0, "Mp5HighSramError"},
{"smu", 0x1, "Mp5LowSramError"},
{"smu", 0x2, "Mp5DCacheAError"},
@@ -478,7 +478,7 @@ int find_bank_name(uint16_t hw_id, uint16_t aca_type, const char **bank_name)
}
}
*bank_name = "UNKNOWN";
*bank_name = ACA_SEVERITY_UNKNOWN;
return 1;
}
@@ -499,7 +499,7 @@ int find_error_type_by_bank(const char *bank, uint32_t error_code, const char **
}
}
*error_type = "UNKNOWN";
*error_type = ACA_SEVERITY_UNKNOWN;
return 1;
}
@@ -520,7 +520,7 @@ int find_error_in_table(const aca_error_entry_t *table, size_t table_size,
}
}
*error_type = "UNKNOWN";
*error_type = ACA_SEVERITY_UNKNOWN;
return 1;
}
@@ -556,6 +556,6 @@ int find_instance_name(const char *bank, uint32_t instance_id_lo, const char **i
}
}
*instance_name = "UNKNOWN";
*instance_name = ACA_SEVERITY_UNKNOWN;
return 1;
}
+57
Просмотреть файл
@@ -0,0 +1,57 @@
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "aca_version.h"
/* Implementation of version functions */
int aca_get_version_major(void)
{
return ACA_VERSION_MAJOR;
}
int aca_get_version_minor(void)
{
return ACA_VERSION_MINOR;
}
int aca_get_version_patch(void)
{
return ACA_VERSION_PATCH;
}
const char *aca_get_version_string(void)
{
return ACA_VERSION_STRING;
}
aca_version_info_t aca_get_version_info(void)
{
aca_version_info_t info;
info.major = ACA_VERSION_MAJOR;
info.minor = ACA_VERSION_MINOR;
info.patch = ACA_VERSION_PATCH;
info.string = ACA_VERSION_STRING;
return info;
}
+40 -40
Просмотреть файл
@@ -1,4 +1,3 @@
// SPDX-License-Identifier: MIT
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
@@ -22,56 +21,57 @@
*/
#include "error_map.h"
#include "aca_constants.h"
#include <string.h>
#define AFID_VERSION "0.7"
static const error_map_entry_t error_map[] = {
{1, "Boot-Time Errors", "FW Load", "CPER", "Fail-to-init"},
{2, "Boot-Time Errors", "HBM BIST Test", "CPER", "Fail-to-init"},
{3, "Boot-Time Errors", "HBM Memory Test", "CPER", "Fail-to-init"},
{4, "Boot-Time Errors", "HBM Training", "CPER", "Fail-to-init"},
{5, "Boot-Time Errors", "Unhandled", "CPER", "Fail-to-init"},
{6, "Boot-Time Errors", "Unknown", "CPER", "Fail-to-init"},
{7, "Boot-Time Errors", "USR CP Link Training", "CPER", "Fail-to-init"},
{8, "Boot-Time Errors", "USR DP Link Training", "CPER", "Fail-to-init"},
{9, "Boot-Time Errors", "WAFL Link Training", "CPER", "Fail-to-init"},
{10, "Boot-Time Errors", "XGMI Link Training", "CPER", "Fail-to-init"},
{11, "Boot-Time Errors", "Boot Controller Data Abort", "CPER", "Fail-to-init"},
{12, "Boot-Time Errors", "Boot Controller Generic", "CPER ", "Fail-to-init"},
{13, "Off-Package Link Errors", "PCIe AER", "CPER", "Corrected"},
{14, "Off-Package Link Errors", "PCIe AER", "CPER", "Fatal"},
{15, "Off-Package Link Errors", "WAFL", "CPER", "Corrected"},
{16, "Off-Package Link Errors", "WAFL", "CPER", "Fatal"},
{17, "Off-Package Link Errors", "XGMI", "CPER", "Corrected"},
{18, "Off-Package Link Errors", "XGMI", "CPER", "Fatal"},
{19, "HBM Errors", "Bad Page Retirement Threshold", "CPER", "Fatal"},
{20, "HBM Errors", "On-die ECC", "CPER", "Fatal"},
{21, "HBM Errors", "End-to-end CRC", "CPER", "Fatal"},
{22, "HBM Errors", "On-die ECC", "CPER", "Uncorrected, Non-fatal"},
{23, "HBM Errors", "End-to-end CRC", "CPER", "Uncorrected, Non-fatal"},
{24, "HBM Errors", "All", "CPER", "Corrected"},
{25, "HBM Errors", "All Others", "CPER", "Fatal"},
{26, "Device Internal Errors", "Hardware Assertion (HWA)", "CPER", "Fatal"},
{27, "Device Internal Errors", "Watchdog Timeout (WDT)", "CPER", "Fatal"},
{28, "Device Internal Errors", "All Others", "CPER", "Uncorrected, Non-fatal"},
{29, "Device Internal Errors", "All Others", "CPER", "Corrected"},
{30, "Device Internal Errors", "All Others", "CPER", "Fatal"},
{31, "CPER Format", "Malformed CPER", "CPER", "ALL"},
{32, "CPER Format", "Incomplete ACA Data", "CPER", "ALL"},
{33, "CPER Format", "Invalid ACA Data", "CPER", "ALL"},
{34, "Unidentified Errors", "Unidentified Error", "CPER", "ALL"}};
{1, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_FW_LOAD, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
{2, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_HBM_BIST_TEST, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
{3, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_HBM_MEMORY_TEST, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
{4, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_HBM_TRAINING, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
{5, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_UNHANDLED, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
{6, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_UNKNOWN_ERROR, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
{7, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_USR_CP_LINK_TRAINING, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
{8, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_USR_DP_LINK_TRAINING, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
{9, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_WAFL_LINK_TRAINING, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
{10, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_XGMI_LINK_TRAINING, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
{11, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_BOOT_CONTROLLER_DATA_ABORT, ACA_PROTOCOL_CPER, ACA_SEVERITY_FAIL_TO_INIT},
{12, ACA_CATEGORY_BOOT_TIME_ERRORS, ACA_ERROR_TYPE_BOOT_CONTROLLER_GENERIC, ACA_PROTOCOL_CPER_WITH_SPACE, ACA_SEVERITY_FAIL_TO_INIT},
{13, ACA_CATEGORY_OFF_PACKAGE_LINK_ERRORS, ACA_ERROR_TYPE_PCIE_AER, ACA_PROTOCOL_CPER, ACA_SEVERITY_CORRECTED},
{14, ACA_CATEGORY_OFF_PACKAGE_LINK_ERRORS, ACA_ERROR_TYPE_PCIE_AER, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
{15, ACA_CATEGORY_OFF_PACKAGE_LINK_ERRORS, ACA_ERROR_TYPE_WAFL, ACA_PROTOCOL_CPER, ACA_SEVERITY_CORRECTED},
{16, ACA_CATEGORY_OFF_PACKAGE_LINK_ERRORS, ACA_ERROR_TYPE_WAFL, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
{17, ACA_CATEGORY_OFF_PACKAGE_LINK_ERRORS, ACA_ERROR_TYPE_XGMI, ACA_PROTOCOL_CPER, ACA_SEVERITY_CORRECTED},
{18, ACA_CATEGORY_OFF_PACKAGE_LINK_ERRORS, ACA_ERROR_TYPE_XGMI, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
{19, ACA_CATEGORY_HBM_ERRORS, ACA_ERROR_TYPE_BAD_PAGE_RETIREMENT_THRESHOLD, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
{20, ACA_CATEGORY_HBM_ERRORS, ACA_ERROR_TYPE_ON_DIE_ECC, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
{21, ACA_CATEGORY_HBM_ERRORS, ACA_ERROR_TYPE_END_TO_END_CRC, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
{22, ACA_CATEGORY_HBM_ERRORS, ACA_ERROR_TYPE_ON_DIE_ECC, ACA_PROTOCOL_CPER, ACA_SEVERITY_UNCORRECTED_NON_FATAL},
{23, ACA_CATEGORY_HBM_ERRORS, ACA_ERROR_TYPE_END_TO_END_CRC, ACA_PROTOCOL_CPER, ACA_SEVERITY_UNCORRECTED_NON_FATAL},
{24, ACA_CATEGORY_HBM_ERRORS, ACA_ERROR_TYPE_ALL, ACA_PROTOCOL_CPER, ACA_SEVERITY_CORRECTED},
{25, ACA_CATEGORY_HBM_ERRORS, ACA_ERROR_TYPE_ALL_OTHERS, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
{26, ACA_CATEGORY_DEVICE_INTERNAL_ERRORS, ACA_ERROR_TYPE_HARDWARE_ASSERTION, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
{27, ACA_CATEGORY_DEVICE_INTERNAL_ERRORS, ACA_ERROR_TYPE_WATCHDOG_TIMEOUT, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
{28, ACA_CATEGORY_DEVICE_INTERNAL_ERRORS, ACA_ERROR_TYPE_ALL_OTHERS, ACA_PROTOCOL_CPER, ACA_SEVERITY_UNCORRECTED_NON_FATAL},
{29, ACA_CATEGORY_DEVICE_INTERNAL_ERRORS, ACA_ERROR_TYPE_ALL_OTHERS, ACA_PROTOCOL_CPER, ACA_SEVERITY_CORRECTED},
{30, ACA_CATEGORY_DEVICE_INTERNAL_ERRORS, ACA_ERROR_TYPE_ALL_OTHERS, ACA_PROTOCOL_CPER, ACA_SEVERITY_FATAL},
{31, ACA_CATEGORY_CPER_FORMAT, ACA_ERROR_TYPE_MALFORMED_CPER, ACA_PROTOCOL_CPER, ACA_SEVERITY_ALL_CAPS},
{32, ACA_CATEGORY_CPER_FORMAT, ACA_ERROR_TYPE_INCOMPLETE_ACA_DATA, ACA_PROTOCOL_CPER, ACA_SEVERITY_ALL_CAPS},
{33, ACA_CATEGORY_CPER_FORMAT, ACA_ERROR_TYPE_INVALID_ACA_DATA, ACA_PROTOCOL_CPER, ACA_SEVERITY_ALL_CAPS},
{34, ACA_CATEGORY_UNIDENTIFIED_ERRORS, ACA_ERROR_TYPE_UNIDENTIFIED_ERROR, ACA_PROTOCOL_CPER, ACA_SEVERITY_ALL_CAPS}};
static const size_t NUM_ERROR_ENTRIES = sizeof(error_map) / sizeof(error_map[0]);
int get_error_id(const char *error_category, const char *error_type, const char *error_severity)
{
if (!error_category || !error_type || !error_severity ||
strcmp(error_category, "UNKNOWN") == 0 ||
strcmp(error_type, "UNKNOWN") == 0 ||
strcmp(error_severity, "UNKNOWN") == 0)
strcmp(error_category, ACA_SEVERITY_UNKNOWN) == 0 ||
strcmp(error_type, ACA_SEVERITY_UNKNOWN) == 0 ||
strcmp(error_severity, ACA_SEVERITY_UNKNOWN) == 0)
{
return 33; // Return ID for "Invalid Error" if any input is "UNKNOWN" or NULL
return ACA_ERROR_INVALID_ACA_DATA_ID; // Return ID for "Invalid Error" if any input is "UNKNOWN" or NULL
}
for (size_t i = 0; i < NUM_ERROR_ENTRIES; i++)
@@ -84,5 +84,5 @@ int get_error_id(const char *error_category, const char *error_type, const char
}
}
return 34; // Return ID for "Unidentified Errors" if no match found
return ACA_ERROR_UNIDENTIFIED_ERROR_ID; // Return ID for "Unidentified Errors" if no match found
}
+148
Просмотреть файл
@@ -0,0 +1,148 @@
/*
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/**
* @file main.c
* @brief Demo program showing how to use the ACA decoder
*
* This is a demonstration program that shows how to use the ACA decoder
* with sample raw data to decode ACA error information.
*/
#include <stdio.h>
#include <aca_api.h>
#include <aca_version.h>
#include <aca_constants.h>
#include <stdint.h>
#include <inttypes.h>
// Function prototype
void print_error_info(const aca_error_info_t *info);
void print_version_info(void);
// Function to print error info in JSON format
void print_error_info(const aca_error_info_t *info)
{
printf("{\n");
printf(" \"bank\": \"%s\",\n", info->bank_ref);
printf(" \"error_location\": {\n");
printf(" \"oam\": \"%d\",\n", info->oam);
printf(" \"aid\": \"%d\",\n", info->aid);
printf(" \"instance\": \"%s\"\n", info->instance_ref);
printf(" },\n");
printf(" \"severity\": \"%s\",\n", info->severity_ref);
printf(" \"afid\": \"%d\",\n", info->afid);
printf(" \"scrub\": \"%u\",\n", info->scrub);
printf(" \"err_ext\": \"%u\",\n", info->error_code_ext);
printf(" \"error_category\": \"%s\",\n", info->category_ref);
printf(" \"error_type\": \"%s\",\n", info->error_type_ref);
printf(" \"address\": \"0x%" PRIx64 "\",\n", info->raw_addr);
printf(" \"syndrome\": \"0x%" PRIx64 "\"\n", info->raw_synd);
printf("}\n");
}
// Function to print version information
void print_version_info(void)
{
printf("=== ACA Decoder Library Version Information ===\n");
printf("Version: %s\n", aca_get_version_string());
printf("Major: %d\n", aca_get_version_major());
printf("Minor: %d\n", aca_get_version_minor());
printf("Patch: %d\n", aca_get_version_patch());
aca_version_info_t version_info = aca_get_version_info();
printf("Complete version info:\n");
printf(" Major: %d\n", version_info.major);
printf(" Minor: %d\n", version_info.minor);
printf(" Patch: %d\n", version_info.patch);
printf(" String: %s\n", version_info.string);
printf("===============================================\n\n");
}
int main()
{
// Display version information
print_version_info();
// Sample usage of decode_afid with 32-byte register array (HBM FATAL ERROR, expected output is 4)
uint64_t register_array_32[ACA_REGISTER_ARRAY_SIZE_32_BYTES] = {0xbaa000000004081b, 0x0, 0x209600090f00, 0x5d000000};
int afid_32 = decode_afid(register_array_32, ACA_REGISTER_ARRAY_SIZE_32_BYTES, 0, 1);
printf("Decoded AFID (32-byte array): %d\n", afid_32);
// Sample usage of decode_afid with 32-byte register array (GC FATAL ERROR, expected output is 3)
uint64_t register_array_test[ACA_REGISTER_ARRAY_SIZE_32_BYTES] = {0xbea00000003b0000, 0x100000029, 0x1200136430400, 0x20b};
int afid_test = decode_afid(register_array_test, ACA_REGISTER_ARRAY_SIZE_32_BYTES, 0, 1);
printf("Decoded AFID (test array): %d\n", afid_test);
// Sample usage of decode_afid with 128-byte register array (HBM CORRECTED ERROR, expected output is 1)
uint64_t register_array_128[ACA_REGISTER_ARRAY_SIZE_128_BYTES] = {
0xffff,
0xdc2040000000011b,
0x0,
0xd008000801000000,
0x25000001ff,
0x209600191f00,
0xa000000,
0x0,
0x0,
0x0,
0xd008000801000000,
0x0,
0x0,
0x0,
0x0,
0x0};
int afid_128 = decode_afid(register_array_128, ACA_REGISTER_ARRAY_SIZE_128_BYTES, 0, 1);
printf("Decoded AFID (128-byte array): %d\n", afid_128);
// sample for bad page
uint64_t register_array_bad_page[ACA_REGISTER_ARRAY_SIZE_128_BYTES] = {
0x1,
0xb000000000000137,
0x0,
0x0,
0x1ff00000002,
0x9600000000,
0x0,
0x0,
0x0,
0x0,
0x0,
0x0,
0x0,
0x0,
0x0,
0x0};
// when flag is 0b1000, it indicates that the error threshold has been exceeded
// and is always a HBM error. The expected output is 19.
int afid_bad_page = decode_afid(register_array_bad_page, ACA_REGISTER_ARRAY_SIZE_128_BYTES, ACA_FLAG_THRESHOLD_EXCEEDED, 1);
printf("Decoded AFID (bad page): %d\n", afid_bad_page);
const aca_error_info_t error_info_32 = decode_error_info(register_array_32, ACA_REGISTER_ARRAY_SIZE_32_BYTES, 0, 1);
print_error_info(&error_info_32);
const aca_error_info_t error_info_128 = decode_error_info(register_array_128, ACA_REGISTER_ARRAY_SIZE_128_BYTES, 0, 1);
print_error_info(&error_info_128);
return 0;
}
+2
Просмотреть файл
@@ -29,7 +29,9 @@
#include <sstream>
#include <vector>
extern "C" {
#include "aca-decode/aca_decode.h"
}
#include "amd_smi/impl/amd_smi_cper.h"
#include "rocm_smi/rocm_smi_logger.h"