Skip to content

Instantly share code, notes, and snippets.

@kev009
Created September 27, 2024 00:30
Show Gist options
  • Save kev009/f3d129f509d2ed93a5160defcbb1966e to your computer and use it in GitHub Desktop.
Save kev009/f3d129f509d2ed93a5160defcbb1966e to your computer and use it in GitHub Desktop.
commit af2393a5280b7c7f4346d866315b7814a845d878 (HEAD -> main)
Author: Kevin Bowling <[email protected]>
Date: Wed Sep 25 22:54:34 2024 -0700
e1000: Add PCH ECC Error Handling and Stats
diff --git a/sys/dev/e1000/e1000_hw.h b/sys/dev/e1000/e1000_hw.h
index f17877f3e463..26553f4b7d09 100644
--- a/sys/dev/e1000/e1000_hw.h
+++ b/sys/dev/e1000/e1000_hw.h
@@ -650,6 +650,8 @@ struct e1000_hw_stats {
u64 o2bspc;
u64 b2ospc;
u64 b2ogprc;
+ u64 pbeccsts_c;
+ u64 pbeccsts_uc;
};
struct e1000_vf_stats {
diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c
index ad12171487f6..69b0342f20cb 100644
--- a/sys/dev/e1000/if_em.c
+++ b/sys/dev/e1000/if_em.c
@@ -319,6 +319,7 @@ static void em_if_vlan_filter_enable(struct e1000_softc *);
static void em_if_vlan_filter_disable(struct e1000_softc *);
static void em_if_vlan_filter_write(struct e1000_softc *);
static void em_setup_vlan_hw_support(if_ctx_t ctx);
+static int em_ecc_inject_error(SYSCTL_HANDLER_ARGS);
static int em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
static void em_print_nvm_info(struct e1000_softc *);
static void em_fw_version_locked(if_ctx_t);
@@ -1451,7 +1452,7 @@ em_intr(void *arg)
{
struct e1000_softc *sc = arg;
if_ctx_t ctx = sc->ctx;
- u32 reg_icr;
+ u32 reg_icr, pbeccsts;
reg_icr = E1000_READ_REG(&sc->hw, E1000_ICR);
@@ -1479,6 +1480,28 @@ em_intr(void *arg)
*/
IFDI_INTR_DISABLE(ctx);
+ if (sc->hw.mac.type >= e1000_pch_lpt && sc->hw.mac.type <= igb_mac_min) {
+ if (reg_icr & E1000_ICR_ECCER) {
+ /* Uncorrectable ECC Error */
+
+ pbeccsts = E1000_READ_REG(&sc->hw, E1000_PBECCSTS);
+
+ sc->stats.pbeccsts_c +=
+ pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK;
+ sc->stats.pbeccsts_uc += (pbeccsts &
+ E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >>
+ E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT;
+
+ /*
+ * Let admin reset the interface - we can't grab the STATE_LOCK
+ * mutex in this interrupt filter.
+ */
+ sc->ecc_intr_reset = 1;
+
+ return (FILTER_SCHEDULE_THREAD);
+ }
+ }
+
/* Link status change */
if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
em_handle_link(ctx);
@@ -1838,6 +1861,18 @@ em_if_update_admin_status(if_ctx_t ctx)
u32 link_check, thstat, ctrl;
bool automasked = false;
+ if (sc->ecc_intr_reset) {
+ device_printf(dev, "Uncorrectable ECC Error, requesting reset.");
+ sc->ecc_intr_reset = 0;
+ iflib_request_reset(ctx);
+ /*
+ * We need to go through admin again because update_admin_status is
+ * done after checking the context state..
+ */
+ iflib_admin_intr_deferred(ctx);
+ return;
+ }
+
link_check = thstat = ctrl = 0;
/* Get the cached link value or read phy for real */
switch (hw->phy.media_type) {
@@ -3743,6 +3778,11 @@ em_if_intr_enable(if_ctx_t ctx)
E1000_WRITE_REG(hw, EM_EIAC, sc->ims);
ims_mask |= sc->ims;
}
+
+ if (hw->mac.type >= e1000_pch_lpt && hw->mac.type < igb_mac_min) {
+ ims_mask |= E1000_IMS_ECCER;
+ }
+
E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
E1000_WRITE_FLUSH(hw);
}
@@ -4262,6 +4302,7 @@ static void
em_update_stats_counters(struct e1000_softc *sc)
{
u64 prev_xoffrxc = sc->stats.xoffrxc;
+ u32 pbeccsts;
if(sc->hw.phy.media_type == e1000_media_type_copper ||
(E1000_READ_REG(&sc->hw, E1000_STATUS) & E1000_STATUS_LU)) {
@@ -4358,6 +4399,15 @@ em_update_stats_counters(struct e1000_softc *sc)
sc->stats.tsctfc +=
E1000_READ_REG(&sc->hw, E1000_TSCTFC);
}
+
+ /* PCH Packet Buffer ECC errors */
+ if (sc->hw.mac.type >= e1000_pch_lpt && sc->hw.mac.type < igb_mac_min) {
+ pbeccsts = E1000_READ_REG(&sc->hw, E1000_PBECCSTS);
+ sc->stats.pbeccsts_c += pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK;
+ sc->stats.pbeccsts_uc += (pbeccsts &
+ E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >>
+ E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT;
+ }
}
static uint64_t
@@ -4427,8 +4477,8 @@ em_add_hw_stats(struct e1000_softc *sc)
struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
struct e1000_hw_stats *stats = &sc->stats;
- struct sysctl_oid *stat_node, *queue_node, *int_node;
- struct sysctl_oid_list *stat_list, *queue_list, *int_list;
+ struct sysctl_oid *stat_node, *queue_node, *int_node, *ecc_node;
+ struct sysctl_oid_list *stat_list, *queue_list, *int_list, *ecc_list;
#define QUEUE_NAME_LEN 32
char namebuf[QUEUE_NAME_LEN];
@@ -4708,6 +4758,25 @@ em_add_hw_stats(struct e1000_softc *sc)
SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
CTLFLAG_RD, &sc->stats.icrxoc,
"Interrupt Cause Receiver Overrun Count");
+
+ if (sc->hw.mac.type >= e1000_pch_lpt && sc->hw.mac.type < igb_mac_min) {
+ ecc_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "ecc_stats",
+ CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ECC Statistics");
+ ecc_list = SYSCTL_CHILDREN(ecc_node);
+
+ SYSCTL_ADD_UQUAD(ctx, ecc_list, OID_AUTO, "corrected_errors",
+ CTLFLAG_RD, &sc->stats.pbeccsts_c,
+ "Packet Buffer ECC Corrected Errors");
+
+ SYSCTL_ADD_UQUAD(ctx, ecc_list, OID_AUTO, "uncorrected_errors",
+ CTLFLAG_RD, &sc->stats.pbeccsts_uc,
+ "Packet Buffer ECC Uncorrected Errors");
+
+ SYSCTL_ADD_PROC(ctx, ecc_list, OID_AUTO, "inject_error",
+ CTLTYPE_INT | CTLFLAG_RW, sc, 0,
+ em_ecc_inject_error, "I", "Inject ECC Error");
+ }
+
}
static void
@@ -4833,6 +4902,27 @@ em_sysctl_print_fw_version(SYSCTL_HANDLER_ARGS)
* 32 words, stuff that matters is in that extent.
*
**********************************************************************/
+static int
+em_ecc_inject_error(SYSCTL_HANDLER_ARGS)
+{
+ struct e1000_softc *sc = (struct e1000_softc *)arg1;
+ int error, result;
+
+ result = -1;
+ error = sysctl_handle_int(oidp, &result, 0, req);
+
+ if (error || !req->newptr)
+ return (error);
+
+ if (result == 1) {
+ device_printf(sc->dev, "Injecting ECC Error\n");
+ E1000_WRITE_REG(&sc->hw, 0x01010, ((1 << 24) | (1 << 23)));
+ E1000_WRITE_REG(&sc->hw, 0x01004, 0xFF01000F);
+ }
+
+ return (error);
+}
+
static int
em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
{
diff --git a/sys/dev/e1000/if_em.h b/sys/dev/e1000/if_em.h
index 244762e8ed0d..c725722d8b1c 100644
--- a/sys/dev/e1000/if_em.h
+++ b/sys/dev/e1000/if_em.h
@@ -507,6 +507,7 @@ struct e1000_softc {
u32 dmac;
int link_mask;
int tso_automasked;
+ int ecc_intr_reset;
u64 que_mask;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment