]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
net: Revert devlink health changes.
authorDavid S. Miller <davem@davemloft.net>
Fri, 25 Jan 2019 18:53:23 +0000 (10:53 -0800)
committerDavid S. Miller <davem@davemloft.net>
Fri, 25 Jan 2019 18:53:23 +0000 (10:53 -0800)
This reverts the devlink health changes from 9/17/2019,
Jiri wants things to be designed differently and it was
agreed that the easiest way to do this is start from the
beginning again.

Commits reverted:

cb5ccfbe73b389470e1dc11061bb185ef4bc9aec
880ee82f0313453ec5a6cb122866ac057263066b
c7af343b4e33578b7de91786a3f639c8cfa0d97b
ff253fedab961b22117a73ab808fcfa9e6852b50
6f9d56132eb6d2603d4273cfc65bed914ec47acb
fcd852c69d776c0f46c8f79e8e431e5cc6ddc7b7
8a66704a13d9713593342e29b4f0c19762f5746b
12bd0dcefe88782ac1c9fff632958dd1b71d27e5
aba25279c10094c5c97d09c3491ca86d00b4ad5e
ce019faa70f81555fa17ebc1d5a03651f2e7e15a
b8c45a033acc607201588f7665ba84207e5149e0

And the follow-on build fix:

o33a0efa4baecd689da9474ce0e8b673eb6931c60

Signed-off-by: David S. Miller <davem@davemloft.net>
Documentation/networking/devlink-health.txt [deleted file]
drivers/net/ethernet/mellanox/mlx5/core/Makefile
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h [deleted file]
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c [deleted file]
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
include/net/devlink.h
include/trace/events/devlink.h
include/uapi/linux/devlink.h
net/core/devlink.c

diff --git a/Documentation/networking/devlink-health.txt b/Documentation/networking/devlink-health.txt
deleted file mode 100644 (file)
index 1db3fbe..0000000
+++ /dev/null
@@ -1,86 +0,0 @@
-The health mechanism is targeted for Real Time Alerting, in order to know when
-something bad had happened to a PCI device
-- Provide alert debug information
-- Self healing
-- If problem needs vendor support, provide a way to gather all needed debugging
-  information.
-
-The main idea is to unify and centralize driver health reports in the
-generic devlink instance and allow the user to set different
-attributes of the health reporting and recovery procedures.
-
-The devlink health reporter:
-Device driver creates a "health reporter" per each error/health type.
-Error/Health type can be a known/generic (eg pci error, fw error, rx/tx error)
-or unknown (driver specific).
-For each registered health reporter a driver can issue error/health reports
-asynchronously. All health reports handling is done by devlink.
-Device driver can provide specific callbacks for each "health reporter", e.g.
- - Recovery procedures
- - Diagnostics and object dump procedures
- - OOB initial parameters
-Different parts of the driver can register different types of health reporters
-with different handlers.
-
-Once an error is reported, devlink health will do the following actions:
-  * A log is being send to the kernel trace events buffer
-  * Health status and statistics are being updated for the reporter instance
-  * Object dump is being taken and saved at the reporter instance (as long as
-    there is no other dump which is already stored)
-  * Auto recovery attempt is being done. Depends on:
-    - Auto-recovery configuration
-    - Grace period vs. time passed since last recover
-
-The user interface:
-User can access/change each reporter's parameters and driver specific callbacks
-via devlink, e.g per error type (per health reporter)
- - Configure reporter's generic parameters (like: disable/enable auto recovery)
- - Invoke recovery procedure
- - Run diagnostics
- - Object dump
-
-The devlink health interface (via netlink):
-DEVLINK_CMD_HEALTH_REPORTER_GET
-  Retrieves status and configuration info per DEV and reporter.
-DEVLINK_CMD_HEALTH_REPORTER_SET
-  Allows reporter-related configuration setting.
-DEVLINK_CMD_HEALTH_REPORTER_RECOVER
-  Triggers a reporter's recovery procedure.
-DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE
-  Retrieves diagnostics data from a reporter on a device.
-DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET
-  Retrieves the last stored dump. Devlink health
-  saves a single dump. If an dump is not already stored by the devlink
-  for this reporter, devlink generates a new dump.
-  dump output is defined by the reporter.
-DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR
-  Clears the last saved dump file for the specified reporter.
-
-
-                                               netlink
-                                      +--------------------------+
-                                      |                          |
-                                      |            +             |
-                                      |            |             |
-                                      +--------------------------+
-                                                   |request for ops
-                                                   |(diagnose,
- mlx5_core                             devlink     |recover,
-                                                   |dump)
-+--------+                            +--------------------------+
-|        |                            |    reporter|             |
-|        |                            |  +---------v----------+  |
-|        |   ops execution            |  |                    |  |
-|     <----------------------------------+                    |  |
-|        |                            |  |                    |  |
-|        |                            |  + ^------------------+  |
-|        |                            |    | request for ops     |
-|        |                            |    | (recover, dump)     |
-|        |                            |    |                     |
-|        |                            |  +-+------------------+  |
-|        |     health report          |  | health handler     |  |
-|        +------------------------------->                    |  |
-|        |                            |  +--------------------+  |
-|        |     health reporter create |                          |
-|        +---------------------------->                          |
-+--------+                            +--------------------------+
index 6bb2a860b15b9f08484716fd991ab8699baf25e9..9de9abacf7f6113f0383f4177cb6257d57e63bed 100644 (file)
@@ -22,7 +22,7 @@ mlx5_core-y :=        main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 #
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
                en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
-               en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o
+               en_selftest.o en/port.o en/monitor_stats.o
 
 #
 # Netdev extra
index 27e276c9bf84cb251ac23f3527eaae2851427696..8fa8fdd30b8509f73a27fe4d31b094dfceda5e5d 100644 (file)
@@ -388,7 +388,10 @@ struct mlx5e_txqsq {
        struct mlx5e_channel      *channel;
        int                        txq_ix;
        u32                        rate_limit;
-       struct work_struct         recover_work;
+       struct mlx5e_txqsq_recover {
+               struct work_struct         recover_work;
+               u64                        last_recover;
+       } recover;
 } ____cacheline_aligned_in_smp;
 
 struct mlx5e_dma_info {
@@ -679,13 +682,6 @@ struct mlx5e_rss_params {
        u8      hfunc;
 };
 
-struct mlx5e_modify_sq_param {
-       int curr_state;
-       int next_state;
-       int rl_update;
-       int rl_index;
-};
-
 struct mlx5e_priv {
        /* priv data path fields - start */
        struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
@@ -741,7 +737,6 @@ struct mlx5e_priv {
 #ifdef CONFIG_MLX5_EN_TLS
        struct mlx5e_tls          *tls;
 #endif
-       struct devlink_health_reporter *tx_reporter;
 };
 
 struct mlx5e_profile {
@@ -871,11 +866,6 @@ void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
 void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
                               struct mlx5e_params *params);
 
-int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
-                   struct mlx5e_modify_sq_param *p);
-void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq);
-void mlx5e_tx_disable_queue(struct netdev_queue *txq);
-
 static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
 {
        return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) &&
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h
deleted file mode 100644 (file)
index 2335c5b..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2018 Mellanox Technologies. */
-
-#ifndef __MLX5E_EN_REPORTER_H
-#define __MLX5E_EN_REPORTER_H
-
-#include <linux/mlx5/driver.h>
-#include "en.h"
-
-int mlx5e_tx_reporter_create(struct mlx5e_priv *priv);
-void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv);
-void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq);
-void mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq);
-
-#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
deleted file mode 100644 (file)
index d9675af..0000000
+++ /dev/null
@@ -1,356 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2018 Mellanox Technologies. */
-
-#include <net/devlink.h>
-#include "reporter.h"
-#include "lib/eq.h"
-
-#define MLX5E_TX_REPORTER_PER_SQ_MAX_LEN 256
-
-struct mlx5e_tx_err_ctx {
-       int (*recover)(struct mlx5e_txqsq *sq);
-       struct mlx5e_txqsq *sq;
-};
-
-static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
-{
-       unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
-
-       while (time_before(jiffies, exp_time)) {
-               if (sq->cc == sq->pc)
-                       return 0;
-
-               msleep(20);
-       }
-
-       netdev_err(sq->channel->netdev,
-                  "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
-                  sq->sqn, sq->cc, sq->pc);
-
-       return -ETIMEDOUT;
-}
-
-static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
-{
-       WARN_ONCE(sq->cc != sq->pc,
-                 "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
-                 sq->sqn, sq->cc, sq->pc);
-       sq->cc = 0;
-       sq->dma_fifo_cc = 0;
-       sq->pc = 0;
-}
-
-static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state)
-{
-       struct mlx5_core_dev *mdev = sq->channel->mdev;
-       struct net_device *dev = sq->channel->netdev;
-       struct mlx5e_modify_sq_param msp = {0};
-       int err;
-
-       msp.curr_state = curr_state;
-       msp.next_state = MLX5_SQC_STATE_RST;
-
-       err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
-       if (err) {
-               netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn);
-               return err;
-       }
-
-       memset(&msp, 0, sizeof(msp));
-       msp.curr_state = MLX5_SQC_STATE_RST;
-       msp.next_state = MLX5_SQC_STATE_RDY;
-
-       err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
-       if (err) {
-               netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn);
-               return err;
-       }
-
-       return 0;
-}
-
-static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq)
-{
-       struct mlx5_core_dev *mdev = sq->channel->mdev;
-       struct net_device *dev = sq->channel->netdev;
-       u8 state;
-       int err;
-
-       if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
-               return 0;
-
-       err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
-       if (err) {
-               netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
-                          sq->sqn, err);
-               return err;
-       }
-
-       if (state != MLX5_RQC_STATE_ERR) {
-               netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn);
-               return -EINVAL;
-       }
-
-       mlx5e_tx_disable_queue(sq->txq);
-
-       err = mlx5e_wait_for_sq_flush(sq);
-       if (err)
-               return err;
-
-       /* At this point, no new packets will arrive from the stack as TXQ is
-        * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
-        * pending WQEs.  SQ can safely reset the SQ.
-        */
-
-       err = mlx5e_sq_to_ready(sq, state);
-       if (err)
-               return err;
-
-       mlx5e_reset_txqsq_cc_pc(sq);
-       sq->stats->recover++;
-       mlx5e_activate_txqsq(sq);
-
-       return 0;
-}
-
-void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq)
-{
-       char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN];
-       struct mlx5e_tx_err_ctx err_ctx = {0};
-
-       err_ctx.sq       = sq;
-       err_ctx.recover  = mlx5e_tx_reporter_err_cqe_recover;
-       sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn);
-
-       devlink_health_report(sq->channel->priv->tx_reporter, err_str,
-                             &err_ctx);
-}
-
-static int mlx5e_tx_reporter_timeout_recover(struct mlx5e_txqsq *sq)
-{
-       struct mlx5_eq_comp *eq = sq->cq.mcq.eq;
-       u32 eqe_count;
-
-       netdev_err(sq->channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
-                  eq->core.eqn, eq->core.cons_index, eq->core.irqn);
-
-       eqe_count = mlx5_eq_poll_irq_disabled(eq);
-       if (!eqe_count) {
-               clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
-               return 1;
-       }
-
-       netdev_err(sq->channel->netdev, "Recover %d eqes on EQ 0x%x\n",
-                  eqe_count, eq->core.eqn);
-       sq->channel->stats->eq_rearm++;
-       return 0;
-}
-
-void mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq)
-{
-       struct mlx5e_tx_err_ctx err_ctx;
-       char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN];
-
-       err_ctx.sq       = sq;
-       err_ctx.recover  = mlx5e_tx_reporter_timeout_recover;
-       sprintf(err_str,
-               "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
-               sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
-               jiffies_to_usecs(jiffies - sq->txq->trans_start));
-       devlink_health_report(sq->channel->priv->tx_reporter, err_str,
-                             &err_ctx);
-}
-
-/* state lock cannot be grabbed within this function.
- * It can cause a dead lock or a read-after-free.
- */
-int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx)
-{
-       return err_ctx->recover(err_ctx->sq);
-}
-
-static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv)
-{
-       int err;
-
-       mutex_lock(&priv->state_lock);
-       mlx5e_close_locked(priv->netdev);
-       err = mlx5e_open_locked(priv->netdev);
-       mutex_unlock(&priv->state_lock);
-
-       return err;
-}
-
-static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
-                                    void *context)
-{
-       struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
-       struct mlx5e_tx_err_ctx *err_ctx = context;
-
-       return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) :
-                        mlx5e_tx_reporter_recover_all(priv);
-}
-
-static int
-mlx5e_tx_reporter_build_diagnose_output(struct devlink_health_buffer *buffer,
-                                       u32 sqn, u8 state, u8 stopped)
-{
-       int err, i;
-       int nest = 0;
-       char name[20];
-
-       err = devlink_health_buffer_nest_start(buffer,
-                                              DEVLINK_ATTR_HEALTH_BUFFER_OBJECT);
-       if (err)
-               goto buffer_error;
-       nest++;
-
-       err = devlink_health_buffer_nest_start(buffer,
-                                              DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR);
-       if (err)
-               goto buffer_error;
-       nest++;
-
-       sprintf(name, "SQ 0x%x", sqn);
-       err = devlink_health_buffer_put_object_name(buffer, name);
-       if (err)
-               goto buffer_error;
-
-       err = devlink_health_buffer_nest_start(buffer,
-                                              DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE);
-       if (err)
-               goto buffer_error;
-       nest++;
-
-       err = devlink_health_buffer_nest_start(buffer,
-                                              DEVLINK_ATTR_HEALTH_BUFFER_OBJECT);
-       if (err)
-               goto buffer_error;
-       nest++;
-
-       err = devlink_health_buffer_nest_start(buffer,
-                                              DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR);
-       if (err)
-               goto buffer_error;
-       nest++;
-
-       err = devlink_health_buffer_put_object_name(buffer, "HW state");
-       if (err)
-               goto buffer_error;
-
-       err = devlink_health_buffer_nest_start(buffer,
-                                              DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE);
-       if (err)
-               goto buffer_error;
-       nest++;
-
-       err = devlink_health_buffer_put_value_u8(buffer, state);
-       if (err)
-               goto buffer_error;
-
-       devlink_health_buffer_nest_end(buffer); /* DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE */
-       nest--;
-
-       devlink_health_buffer_nest_end(buffer); /* DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR */
-       nest--;
-
-       err = devlink_health_buffer_nest_start(buffer,
-                                              DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR);
-       if (err)
-               goto buffer_error;
-       nest++;
-
-       err = devlink_health_buffer_put_object_name(buffer, "stopped");
-       if (err)
-               goto buffer_error;
-
-       err = devlink_health_buffer_nest_start(buffer,
-                                              DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE);
-       if (err)
-               goto buffer_error;
-       nest++;
-
-       err = devlink_health_buffer_put_value_u8(buffer, stopped);
-       if (err)
-               goto buffer_error;
-
-       for (i = 0; i < nest; i++)
-               devlink_health_buffer_nest_end(buffer);
-
-       return 0;
-
-buffer_error:
-       for (i = 0; i < nest; i++)
-               devlink_health_buffer_nest_cancel(buffer);
-       return err;
-}
-
-static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
-                                     struct devlink_health_buffer **buffers_array,
-                                     unsigned int buffer_size,
-                                     unsigned int num_buffers)
-{
-       struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
-       unsigned int buff = 0;
-       int i = 0, err = 0;
-
-       if (buffer_size < MLX5E_TX_REPORTER_PER_SQ_MAX_LEN)
-               return -ENOMEM;
-
-       mutex_lock(&priv->state_lock);
-
-       if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-               mutex_unlock(&priv->state_lock);
-               return 0;
-       }
-
-       while (i < priv->channels.num * priv->channels.params.num_tc) {
-               struct mlx5e_txqsq *sq = priv->txq2sq[i];
-               u8 state;
-
-               err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
-               if (err)
-                       break;
-
-               err = mlx5e_tx_reporter_build_diagnose_output(buffers_array[buff],
-                                                             sq->sqn, state,
-                                                             netif_xmit_stopped(sq->txq));
-               if (err) {
-                       if (++buff == num_buffers)
-                               break;
-               } else {
-                       i++;
-               }
-       }
-
-       mutex_unlock(&priv->state_lock);
-       return err;
-}
-
-static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
-               .name = "TX",
-               .recover = mlx5e_tx_reporter_recover,
-               .diagnose_size = MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC *
-                                MLX5E_TX_REPORTER_PER_SQ_MAX_LEN,
-               .diagnose = mlx5e_tx_reporter_diagnose,
-               .dump_size = 0,
-               .dump = NULL,
-};
-
-#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
-int mlx5e_tx_reporter_create(struct mlx5e_priv *priv)
-{
-       struct mlx5_core_dev *mdev = priv->mdev;
-       struct devlink *devlink = priv_to_devlink(mdev);
-
-       priv->tx_reporter =
-               devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops,
-                                              MLX5_REPORTER_TX_GRACEFUL_PERIOD,
-                                              true, priv);
-       return PTR_ERR_OR_ZERO(priv->tx_reporter);
-}
-
-void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv)
-{
-       devlink_health_reporter_destroy(priv->tx_reporter);
-}
index dee0c8f3d4e968940a73804087ddae0383229990..8cfd2ec7c0a209afe424eca2a7be3301cf79223f 100644 (file)
@@ -51,7 +51,6 @@
 #include "en/xdp.h"
 #include "lib/eq.h"
 #include "en/monitor_stats.h"
-#include "en/reporter.h"
 
 struct mlx5e_rq_param {
        u32                     rqc[MLX5_ST_SZ_DW(rqc)];
@@ -1161,7 +1160,7 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
        return 0;
 }
 
-static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
+static void mlx5e_sq_recover(struct work_struct *work);
 static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
                             int txq_ix,
                             struct mlx5e_params *params,
@@ -1183,7 +1182,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
        sq->uar_map   = mdev->mlx5e_res.bfreg.map;
        sq->min_inline_mode = params->tx_min_inline_mode;
        sq->stats     = &c->priv->channel_stats[c->ix].sq[tc];
-       INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
+       INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover);
        if (MLX5_IPSEC_DEV(c->priv->mdev))
                set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
        if (mlx5_accel_is_tls_device(c->priv->mdev))
@@ -1271,8 +1270,15 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
        return err;
 }
 
-int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
-                   struct mlx5e_modify_sq_param *p)
+struct mlx5e_modify_sq_param {
+       int curr_state;
+       int next_state;
+       bool rl_update;
+       int rl_index;
+};
+
+static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
+                          struct mlx5e_modify_sq_param *p)
 {
        void *in;
        void *sqc;
@@ -1370,7 +1376,17 @@ static int mlx5e_open_txqsq(struct mlx5e_channel *c,
        return err;
 }
 
-void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
+static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
+{
+       WARN_ONCE(sq->cc != sq->pc,
+                 "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
+                 sq->sqn, sq->cc, sq->pc);
+       sq->cc = 0;
+       sq->dma_fifo_cc = 0;
+       sq->pc = 0;
+}
+
+static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
 {
        sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix);
        clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
@@ -1379,7 +1395,7 @@ void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
        netif_tx_start_queue(sq->txq);
 }
 
-void mlx5e_tx_disable_queue(struct netdev_queue *txq)
+static inline void netif_tx_disable_queue(struct netdev_queue *txq)
 {
        __netif_tx_lock_bh(txq);
        netif_tx_stop_queue(txq);
@@ -1395,7 +1411,7 @@ static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
        /* prevent netif_tx_wake_queue */
        napi_synchronize(&c->napi);
 
-       mlx5e_tx_disable_queue(sq->txq);
+       netif_tx_disable_queue(sq->txq);
 
        /* last doorbell out, godspeed .. */
        if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) {
@@ -1415,7 +1431,6 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
        struct mlx5_rate_limit rl = {0};
 
        cancel_work_sync(&sq->dim.work);
-       cancel_work_sync(&sq->recover_work);
        mlx5e_destroy_sq(mdev, sq->sqn);
        if (sq->rate_limit) {
                rl.rate = sq->rate_limit;
@@ -1425,15 +1440,105 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
        mlx5e_free_txqsq(sq);
 }
 
-static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
+static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
+{
+       unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
+
+       while (time_before(jiffies, exp_time)) {
+               if (sq->cc == sq->pc)
+                       return 0;
+
+               msleep(20);
+       }
+
+       netdev_err(sq->channel->netdev,
+                  "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
+                  sq->sqn, sq->cc, sq->pc);
+
+       return -ETIMEDOUT;
+}
+
+static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state)
 {
-       struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq,
-                                             recover_work);
+       struct mlx5_core_dev *mdev = sq->channel->mdev;
+       struct net_device *dev = sq->channel->netdev;
+       struct mlx5e_modify_sq_param msp = {0};
+       int err;
 
-       if (!sq->channel->priv->tx_reporter)
+       msp.curr_state = curr_state;
+       msp.next_state = MLX5_SQC_STATE_RST;
+
+       err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
+       if (err) {
+               netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn);
+               return err;
+       }
+
+       memset(&msp, 0, sizeof(msp));
+       msp.curr_state = MLX5_SQC_STATE_RST;
+       msp.next_state = MLX5_SQC_STATE_RDY;
+
+       err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
+       if (err) {
+               netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn);
+               return err;
+       }
+
+       return 0;
+}
+
+static void mlx5e_sq_recover(struct work_struct *work)
+{
+       struct mlx5e_txqsq_recover *recover =
+               container_of(work, struct mlx5e_txqsq_recover,
+                            recover_work);
+       struct mlx5e_txqsq *sq = container_of(recover, struct mlx5e_txqsq,
+                                             recover);
+       struct mlx5_core_dev *mdev = sq->channel->mdev;
+       struct net_device *dev = sq->channel->netdev;
+       u8 state;
+       int err;
+
+       err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
+       if (err) {
+               netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
+                          sq->sqn, err);
+               return;
+       }
+
+       if (state != MLX5_RQC_STATE_ERR) {
+               netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn);
+               return;
+       }
+
+       netif_tx_disable_queue(sq->txq);
+
+       if (mlx5e_wait_for_sq_flush(sq))
                return;
 
-       mlx5e_tx_reporter_err_cqe(sq);
+       /* If the interval between two consecutive recovers per SQ is too
+        * short, don't recover to avoid infinite loop of ERR_CQE -> recover.
+        * If we reached this state, there is probably a bug that needs to be
+        * fixed. let's keep the queue close and let tx timeout cleanup.
+        */
+       if (jiffies_to_msecs(jiffies - recover->last_recover) <
+           MLX5E_SQ_RECOVER_MIN_INTERVAL) {
+               netdev_err(dev, "Recover SQ 0x%x canceled, too many error CQEs\n",
+                          sq->sqn);
+               return;
+       }
+
+       /* At this point, no new packets will arrive from the stack as TXQ is
+        * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
+        * pending WQEs.  SQ can safely reset the SQ.
+        */
+       if (mlx5e_sq_to_ready(sq, state))
+               return;
+
+       mlx5e_reset_txqsq_cc_pc(sq);
+       sq->stats->recover++;
+       recover->last_recover = jiffies;
+       mlx5e_activate_txqsq(sq);
 }
 
 static int mlx5e_open_icosq(struct mlx5e_channel *c,
@@ -3102,7 +3207,6 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
 {
        int tc;
 
-       mlx5e_tx_reporter_destroy(priv);
        for (tc = 0; tc < priv->profile->max_tc; tc++)
                mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
 }
@@ -4074,14 +4178,31 @@ netdev_features_t mlx5e_features_check(struct sk_buff *skb,
        return features;
 }
 
+static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
+                                       struct mlx5e_txqsq *sq)
+{
+       struct mlx5_eq_comp *eq = sq->cq.mcq.eq;
+       u32 eqe_count;
+
+       netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
+                  eq->core.eqn, eq->core.cons_index, eq->core.irqn);
+
+       eqe_count = mlx5_eq_poll_irq_disabled(eq);
+       if (!eqe_count)
+               return false;
+
+       netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->core.eqn);
+       sq->channel->stats->eq_rearm++;
+       return true;
+}
+
 static void mlx5e_tx_timeout_work(struct work_struct *work)
 {
        struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
                                               tx_timeout_work);
-       int i;
-
-       if (!priv->tx_reporter)
-               return;
+       struct net_device *dev = priv->netdev;
+       bool reopen_channels = false;
+       int i, err;
 
        rtnl_lock();
        mutex_lock(&priv->state_lock);
@@ -4090,16 +4211,36 @@ static void mlx5e_tx_timeout_work(struct work_struct *work)
                goto unlock;
 
        for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) {
-               struct netdev_queue *dev_queue =
-                       netdev_get_tx_queue(priv->netdev, i);
+               struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i);
                struct mlx5e_txqsq *sq = priv->txq2sq[i];
 
                if (!netif_xmit_stopped(dev_queue))
                        continue;
 
-               mlx5e_tx_reporter_timeout(sq);
+               netdev_err(dev,
+                          "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
+                          i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
+                          jiffies_to_usecs(jiffies - dev_queue->trans_start));
+
+               /* If we recover a lost interrupt, most likely TX timeout will
+                * be resolved, skip reopening channels
+                */
+               if (!mlx5e_tx_timeout_eq_recover(dev, sq)) {
+                       clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+                       reopen_channels = true;
+               }
        }
 
+       if (!reopen_channels)
+               goto unlock;
+
+       mlx5e_close_locked(dev);
+       err = mlx5e_open_locked(dev);
+       if (err)
+               netdev_err(priv->netdev,
+                          "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
+                          err);
+
 unlock:
        mutex_unlock(&priv->state_lock);
        rtnl_unlock();
@@ -4767,7 +4908,6 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
 #ifdef CONFIG_MLX5_CORE_EN_DCB
        mlx5e_dcbnl_initialize(priv);
 #endif
-       mlx5e_tx_reporter_create(priv);
        return 0;
 }
 
index a8e052a5ce3662c4c83e26a578d90bc62bc40074..598ad7e4d5c97872c17fe4ae8387e82a2555c96e 100644 (file)
@@ -514,7 +514,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
                                mlx5e_dump_error_cqe(sq,
                                                     (struct mlx5_err_cqe *)cqe);
                                queue_work(cq->channel->priv->wq,
-                                          &sq->recover_work);
+                                          &sq->recover.recover_work);
                        }
                        stats->cqe_err++;
                }
index a81a1b7a67d705e756e7536821e2be08e8711efa..67f4293bc9703f6f9b406882651aa7533783b581 100644 (file)
@@ -30,7 +30,6 @@ struct devlink {
        struct list_head param_list;
        struct list_head region_list;
        u32 snapshot_id;
-       struct list_head reporter_list;
        struct devlink_dpipe_headers *dpipe_headers;
        const struct devlink_ops *ops;
        struct device *dev;
@@ -424,36 +423,6 @@ struct devlink_region;
 
 typedef void devlink_snapshot_data_dest_t(const void *data);
 
-struct devlink_health_buffer;
-struct devlink_health_reporter;
-
-/**
- * struct devlink_health_reporter_ops - Reporter operations
- * @name: reporter name
- * dump_size: dump buffer size allocated by the devlink
- * diagnose_size: diagnose buffer size allocated by the devlink
- * recover: callback to recover from reported error
- *          if priv_ctx is NULL, run a full recover
- * dump: callback to dump an object
- *       if priv_ctx is NULL, run a full dump
- * diagnose: callback to diagnose the current status
- */
-
-struct devlink_health_reporter_ops {
-       char *name;
-       unsigned int dump_size;
-       unsigned int diagnose_size;
-       int (*recover)(struct devlink_health_reporter *reporter,
-                      void *priv_ctx);
-       int (*dump)(struct devlink_health_reporter *reporter,
-                   struct devlink_health_buffer **buffers_array,
-                   unsigned int buffer_size, unsigned int num_buffers,
-                   void *priv_ctx);
-       int (*diagnose)(struct devlink_health_reporter *reporter,
-                       struct devlink_health_buffer **buffers_array,
-                       unsigned int buffer_size, unsigned int num_buffers);
-};
-
 struct devlink_ops {
        int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
        int (*port_type_set)(struct devlink_port *devlink_port,
@@ -615,34 +584,6 @@ int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
                                   u8 *data, u32 snapshot_id,
                                   devlink_snapshot_data_dest_t *data_destructor);
 
-int devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer,
-                                    int attrtype);
-void devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer);
-void devlink_health_buffer_nest_cancel(struct devlink_health_buffer *buffer);
-int devlink_health_buffer_put_object_name(struct devlink_health_buffer *buffer,
-                                         char *name);
-int devlink_health_buffer_put_value_u8(struct devlink_health_buffer *buffer,
-                                      u8 value);
-int devlink_health_buffer_put_value_u32(struct devlink_health_buffer *buffer,
-                                       u32 value);
-int devlink_health_buffer_put_value_u64(struct devlink_health_buffer *buffer,
-                                       u64 value);
-int devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer,
-                                          char *name);
-int devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
-                                        void *data, int len);
-struct devlink_health_reporter *
-devlink_health_reporter_create(struct devlink *devlink,
-                              const struct devlink_health_reporter_ops *ops,
-                              u64 graceful_period, bool auto_recover,
-                              void *priv);
-void
-devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
-
-void *
-devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
-int devlink_health_report(struct devlink_health_reporter *reporter,
-                         const char *msg, void *priv_ctx);
 #else
 
 static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
@@ -903,91 +844,6 @@ devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
        return 0;
 }
 
-static inline int
-devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer,
-                                int attrtype)
-{
-       return 0;
-}
-
-static inline void
-devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer)
-{
-}
-
-static inline void
-devlink_health_buffer_nest_cancel(struct devlink_health_buffer *buffer)
-{
-}
-
-static inline int
-devlink_health_buffer_put_object_name(struct devlink_health_buffer *buffer,
-                                     char *name)
-{
-       return 0;
-}
-
-static inline int
-devlink_health_buffer_put_value_u8(struct devlink_health_buffer *buffer,
-                                  u8 value)
-{
-       return 0;
-}
-
-static inline int
-devlink_health_buffer_put_value_u32(struct devlink_health_buffer *buffer,
-                                   u32 value)
-{
-       return 0;
-}
-
-static inline int
-devlink_health_buffer_put_value_u64(struct devlink_health_buffer *buffer,
-                                   u64 value)
-{
-       return 0;
-}
-
-static inline int
-devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer,
-                                      char *name)
-{
-       return 0;
-}
-
-static inline int
-devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
-                                    void *data, int len)
-{
-       return 0;
-}
-
-static inline struct devlink_health_reporter *
-devlink_health_reporter_create(struct devlink *devlink,
-                              const struct devlink_health_reporter_ops *ops,
-                              u64 graceful_period, bool auto_recover,
-                              void *priv)
-{
-       return NULL;
-}
-
-static inline void
-devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
-{
-}
-
-static inline void *
-devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
-{
-       return NULL;
-}
-
-static inline int
-devlink_health_report(struct devlink_health_reporter *reporter,
-                     const char *msg, void *priv_ctx)
-{
-       return 0;
-}
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
index 7e39d2fc7c75af37d96f1539b8ed8f4b985609cd..44acfbca12661d817a26ed6b2955ccdbbc60a9bf 100644 (file)
@@ -46,65 +46,6 @@ TRACE_EVENT(devlink_hwmsg,
                  (int) __entry->len, __get_dynamic_array(buf), __entry->len)
 );
 
-TRACE_EVENT(devlink_health_report,
-       TP_PROTO(const struct devlink *devlink, const char *reporter_name,
-                const char *msg),
-
-       TP_ARGS(devlink, reporter_name, msg),
-
-       TP_STRUCT__entry(
-               __string(bus_name, devlink->dev->bus->name)
-               __string(dev_name, dev_name(devlink->dev))
-               __string(driver_name, devlink->dev->driver->name)
-               __string(reporter_name, msg)
-               __string(msg, msg)
-       ),
-
-       TP_fast_assign(
-               __assign_str(bus_name, devlink->dev->bus->name);
-               __assign_str(dev_name, dev_name(devlink->dev));
-               __assign_str(driver_name, devlink->dev->driver->name);
-               __assign_str(reporter_name, reporter_name);
-               __assign_str(msg, msg);
-       ),
-
-       TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: %s",
-                 __get_str(bus_name), __get_str(dev_name),
-                 __get_str(driver_name), __get_str(reporter_name),
-                 __get_str(msg))
-);
-
-TRACE_EVENT(devlink_health_recover_aborted,
-       TP_PROTO(const struct devlink *devlink, const char *reporter_name,
-                bool health_state, u64 time_since_last_recover),
-
-       TP_ARGS(devlink, reporter_name, health_state, time_since_last_recover),
-
-       TP_STRUCT__entry(
-               __string(bus_name, devlink->dev->bus->name)
-               __string(dev_name, dev_name(devlink->dev))
-               __string(driver_name, devlink->dev->driver->name)
-               __string(reporter_name, reporter_name)
-               __field(bool, health_state)
-               __field(u64, time_since_last_recover)
-       ),
-
-       TP_fast_assign(
-               __assign_str(bus_name, devlink->dev->bus->name);
-               __assign_str(dev_name, dev_name(devlink->dev));
-               __assign_str(driver_name, devlink->dev->driver->name);
-               __assign_str(reporter_name, reporter_name);
-               __entry->health_state = health_state;
-               __entry->time_since_last_recover = time_since_last_recover;
-       ),
-
-       TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: health_state=%d time_since_last_recover = %llu recover aborted",
-                 __get_str(bus_name), __get_str(dev_name),
-                 __get_str(driver_name), __get_str(reporter_name),
-                 __entry->health_state,
-                 __entry->time_since_last_recover)
-);
-
 #endif /* _TRACE_DEVLINK_H */
 
 /* This part must be outside protection */
@@ -123,9 +64,6 @@ static inline void trace_devlink_hwmsg(const struct devlink *devlink,
 {
 }
 
-static inline void trace_devlink_health(const char *msg)
-{
-}
 #endif /* _TRACE_DEVLINK_H */
 
 #endif
index 6b26bb2ce4dcb0839e20ea5a91c8caea8033f5a5..6e52d3660654dd031ad1c9d7ab91fd9a64f91bee 100644 (file)
@@ -89,13 +89,6 @@ enum devlink_command {
        DEVLINK_CMD_REGION_DEL,
        DEVLINK_CMD_REGION_READ,
 
-       DEVLINK_CMD_HEALTH_REPORTER_GET,
-       DEVLINK_CMD_HEALTH_REPORTER_SET,
-       DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
-       DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
-       DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
-       DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
-
        /* add new commands above here */
        __DEVLINK_CMD_MAX,
        DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -292,24 +285,6 @@ enum devlink_attr {
        DEVLINK_ATTR_REGION_CHUNK_ADDR,         /* u64 */
        DEVLINK_ATTR_REGION_CHUNK_LEN,          /* u64 */
 
-       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT,              /* nested */
-       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR,         /* nested */
-       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME,         /* string */
-       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE,        /* nested */
-       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY,  /* nested */
-       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,   /* u8 */
-       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,   /* dynamic */
-
-       DEVLINK_ATTR_HEALTH_REPORTER,                   /* nested */
-       DEVLINK_ATTR_HEALTH_REPORTER_NAME,              /* string */
-       DEVLINK_ATTR_HEALTH_REPORTER_STATE,             /* u8 */
-       DEVLINK_ATTR_HEALTH_REPORTER_ERR,               /* u64 */
-       DEVLINK_ATTR_HEALTH_REPORTER_RECOVER,           /* u64 */
-       DEVLINK_ATTR_HEALTH_REPORTER_DUMP_AVAIL,        /* u8 */
-       DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,           /* u64 */
-       DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,   /* u64 */
-       DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,      /* u8 */
-
        /* add new attributes above here, update the policy in devlink.c */
 
        __DEVLINK_ATTR_MAX,
index 24f266468ca539a3797c7164dcaa010184865c8e..abb0da9d7b4b7c8a21a91341f56852db41f06c67 100644 (file)
@@ -3597,1015 +3597,6 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
        return 0;
 }
 
-#define DEVLINK_HEALTH_BUFFER_SIZE (4096 - GENL_HDRLEN)
-#define DEVLINK_HEALTH_BUFFER_DATA_SIZE (DEVLINK_HEALTH_BUFFER_SIZE / 2)
-#define DEVLINK_HEALTH_SIZE_TO_BUFFERS(size) DIV_ROUND_UP_ULL(size, DEVLINK_HEALTH_BUFFER_DATA_SIZE)
-#define DEVLINK_HEALTH_BUFFER_MAX_CHUNK 1024
-
-struct devlink_health_buffer {
-       void *data;
-       u64 offset;
-       u64 bytes_left;
-       u64 bytes_left_metadata;
-       u64 max_nested_depth;
-       u64 curr_nest;
-};
-
-struct devlink_health_buffer_desc {
-       int attrtype;
-       u16 len;
-       u8 nla_type;
-       u8 nest_end;
-       int value[0];
-};
-
-static void
-devlink_health_buffers_reset(struct devlink_health_buffer **buffers_list,
-                            u64 num_of_buffers)
-{
-       u64 i;
-
-       for (i = 0; i < num_of_buffers; i++) {
-               memset(buffers_list[i]->data, 0, DEVLINK_HEALTH_BUFFER_SIZE);
-               buffers_list[i]->offset = 0;
-               buffers_list[i]->bytes_left = DEVLINK_HEALTH_BUFFER_DATA_SIZE;
-               buffers_list[i]->bytes_left_metadata =
-                       DEVLINK_HEALTH_BUFFER_DATA_SIZE;
-               buffers_list[i]->max_nested_depth = 0;
-               buffers_list[i]->curr_nest = 0;
-       }
-}
-
-static void
-devlink_health_buffers_destroy(struct devlink_health_buffer **buffers_list,
-                              u64 size);
-
-static struct devlink_health_buffer **
-devlink_health_buffers_create(u64 size)
-{
-       struct devlink_health_buffer **buffers_list;
-       u64 num_of_buffers = DEVLINK_HEALTH_SIZE_TO_BUFFERS(size);
-       u64 i;
-
-       buffers_list = kcalloc(num_of_buffers,
-                              sizeof(struct devlink_health_buffer *),
-                              GFP_KERNEL);
-       if (!buffers_list)
-               return NULL;
-
-       for (i = 0; i < num_of_buffers; i++) {
-               struct devlink_health_buffer *buffer;
-               void *data;
-
-               buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
-               data = kzalloc(DEVLINK_HEALTH_BUFFER_SIZE, GFP_KERNEL);
-               if (!buffer || !data) {
-                       kfree(buffer);
-                       kfree(data);
-                       goto buffers_cleanup;
-               }
-               buffers_list[i] = buffer;
-               buffer->data = data;
-       }
-       devlink_health_buffers_reset(buffers_list, num_of_buffers);
-
-       return buffers_list;
-
-buffers_cleanup:
-       devlink_health_buffers_destroy(buffers_list, --i);
-       kfree(buffers_list);
-       return NULL;
-}
-
-static void
-devlink_health_buffers_destroy(struct devlink_health_buffer **buffers_list,
-                              u64 num_of_buffers)
-{
-       u64 i;
-
-       for (i = 0; i < num_of_buffers; i++) {
-               kfree(buffers_list[i]->data);
-               kfree(buffers_list[i]);
-       }
-}
-
-void
-devlink_health_buffer_offset_inc(struct devlink_health_buffer *buffer,
-                                int len)
-{
-       buffer->offset += len;
-}
-
-/* In order to store a nest, need two descriptors, for start and end */
-#define DEVLINK_HEALTH_BUFFER_NEST_SIZE (sizeof(struct devlink_health_buffer_desc) * 2)
-
-int devlink_health_buffer_verify_len(struct devlink_health_buffer *buffer,
-                                    int len, int metadata_len)
-{
-       if (len > DEVLINK_HEALTH_BUFFER_DATA_SIZE)
-               return -EINVAL;
-
-       if (buffer->bytes_left < len ||
-           buffer->bytes_left_metadata < metadata_len)
-               return -ENOMEM;
-
-       return 0;
-}
-
-static struct devlink_health_buffer_desc *
-devlink_health_buffer_get_desc_from_offset(struct devlink_health_buffer *buffer)
-{
-       return buffer->data + buffer->offset;
-}
-
-int
-devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer,
-                                int attrtype)
-{
-       struct devlink_health_buffer_desc *desc;
-       int err;
-
-       err = devlink_health_buffer_verify_len(buffer, 0,
-                                              DEVLINK_HEALTH_BUFFER_NEST_SIZE);
-       if (err)
-               return err;
-
-       if (attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT &&
-           attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR &&
-           attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE &&
-           attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY)
-               return -EINVAL;
-
-       desc = devlink_health_buffer_get_desc_from_offset(buffer);
-
-       desc->attrtype = attrtype;
-       buffer->bytes_left_metadata -= DEVLINK_HEALTH_BUFFER_NEST_SIZE;
-       devlink_health_buffer_offset_inc(buffer, sizeof(*desc));
-
-       buffer->curr_nest++;
-       buffer->max_nested_depth = max(buffer->max_nested_depth,
-                                      buffer->curr_nest);
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_start);
-
-enum devlink_health_buffer_nest_end_cancel {
-       DEVLINK_HEALTH_BUFFER_NEST_END = 1,
-       DEVLINK_HEALTH_BUFFER_NEST_CANCEL,
-};
-
-static void
-devlink_health_buffer_nest_end_cancel(struct devlink_health_buffer *buffer,
-                                     enum devlink_health_buffer_nest_end_cancel nest)
-{
-       struct devlink_health_buffer_desc *desc;
-
-       WARN_ON(!buffer->curr_nest);
-       buffer->curr_nest--;
-
-       desc = devlink_health_buffer_get_desc_from_offset(buffer);
-       desc->nest_end = nest;
-       devlink_health_buffer_offset_inc(buffer, sizeof(*desc));
-}
-
-void devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer)
-{
-       devlink_health_buffer_nest_end_cancel(buffer,
-                                             DEVLINK_HEALTH_BUFFER_NEST_END);
-}
-EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_end);
-
-void devlink_health_buffer_nest_cancel(struct devlink_health_buffer *buffer)
-{
-       devlink_health_buffer_nest_end_cancel(buffer,
-                                             DEVLINK_HEALTH_BUFFER_NEST_CANCEL);
-}
-EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_cancel);
-
-int
-devlink_health_buffer_put_object_name(struct devlink_health_buffer *buffer,
-                                     char *name)
-{
-       struct devlink_health_buffer_desc *desc;
-       int err;
-
-       err = devlink_health_buffer_verify_len(buffer, strlen(name) + 1,
-                                              sizeof(*desc));
-       if (err)
-               return err;
-
-       desc = devlink_health_buffer_get_desc_from_offset(buffer);
-       desc->attrtype = DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME;
-       desc->nla_type = NLA_NUL_STRING;
-       desc->len = strlen(name) + 1;
-       memcpy(&desc->value, name, desc->len);
-       devlink_health_buffer_offset_inc(buffer, sizeof(*desc) + desc->len);
-
-       buffer->bytes_left_metadata -= sizeof(*desc);
-       buffer->bytes_left -= (strlen(name) + 1);
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_health_buffer_put_object_name);
-
-static int
-devlink_health_buffer_put_value(struct devlink_health_buffer *buffer,
-                               u8 nla_type, void *value, int len)
-{
-       struct devlink_health_buffer_desc *desc;
-       int err;
-
-       err = devlink_health_buffer_verify_len(buffer, len, sizeof(*desc));
-       if (err)
-               return err;
-
-       desc = devlink_health_buffer_get_desc_from_offset(buffer);
-       desc->attrtype = DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA;
-       desc->nla_type = nla_type;
-       desc->len = len;
-       memcpy(&desc->value, value, len);
-       devlink_health_buffer_offset_inc(buffer, sizeof(*desc) + desc->len);
-
-       buffer->bytes_left_metadata -= sizeof(*desc);
-       buffer->bytes_left -= len;
-
-       return 0;
-}
-
-int
-devlink_health_buffer_put_value_u8(struct devlink_health_buffer *buffer,
-                                  u8 value)
-{
-       int err;
-
-       err = devlink_health_buffer_put_value(buffer, NLA_U8, &value,
-                                             sizeof(value));
-       if (err)
-               return err;
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u8);
-
-int
-devlink_health_buffer_put_value_u32(struct devlink_health_buffer *buffer,
-                                   u32 value)
-{
-       int err;
-
-       err = devlink_health_buffer_put_value(buffer, NLA_U32, &value,
-                                             sizeof(value));
-       if (err)
-               return err;
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u32);
-
-int
-devlink_health_buffer_put_value_u64(struct devlink_health_buffer *buffer,
-                                   u64 value)
-{
-       int err;
-
-       err = devlink_health_buffer_put_value(buffer, NLA_U64, &value,
-                                             sizeof(value));
-       if (err)
-               return err;
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u64);
-
-int
-devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer,
-                                      char *name)
-{
-       int err;
-
-       if (strlen(name) + 1 > DEVLINK_HEALTH_BUFFER_MAX_CHUNK)
-               return -EINVAL;
-
-       err = devlink_health_buffer_put_value(buffer, NLA_NUL_STRING, name,
-                                             strlen(name) + 1);
-       if (err)
-               return err;
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_string);
-
-int
-devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
-                                    void *data, int len)
-{
-       int err;
-
-       if (len > DEVLINK_HEALTH_BUFFER_MAX_CHUNK)
-               return -EINVAL;
-
-       err = devlink_health_buffer_put_value(buffer, NLA_BINARY, data, len);
-       if (err)
-               return err;
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_data);
-
-static int
-devlink_health_buffer_fill_data(struct sk_buff *skb,
-                               struct devlink_health_buffer_desc *desc)
-{
-       int err = -EINVAL;
-
-       switch (desc->nla_type) {
-       case NLA_U8:
-               err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
-                                *(u8 *)desc->value);
-               break;
-       case NLA_U32:
-               err = nla_put_u32(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
-                                 *(u32 *)desc->value);
-               break;
-       case NLA_U64:
-               err = nla_put_u64_64bit(skb,
-                                       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
-                                       *(u64 *)desc->value, DEVLINK_ATTR_PAD);
-               break;
-       case NLA_NUL_STRING:
-               err = nla_put_string(skb,
-                                    DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
-                                    (char *)&desc->value);
-               break;
-       case NLA_BINARY:
-               err = nla_put(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
-                             desc->len, (void *)&desc->value);
-               break;
-       }
-
-       return err;
-}
-
-static int
-devlink_health_buffer_fill_type(struct sk_buff *skb,
-                               struct devlink_health_buffer_desc *desc)
-{
-       int err = -EINVAL;
-
-       switch (desc->nla_type) {
-       case NLA_U8:
-               err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
-                                NLA_U8);
-               break;
-       case NLA_U32:
-               err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
-                                NLA_U32);
-               break;
-       case NLA_U64:
-               err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
-                                NLA_U64);
-               break;
-       case NLA_NUL_STRING:
-               err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
-                                NLA_NUL_STRING);
-               break;
-       case NLA_BINARY:
-               err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
-                                NLA_BINARY);
-               break;
-       }
-
-       return err;
-}
-
-static inline struct devlink_health_buffer_desc *
-devlink_health_buffer_get_next_desc(struct devlink_health_buffer_desc *desc)
-{
-       return (void *)&desc->value + desc->len;
-}
-
-static int
-devlink_health_buffer_prepare_skb(struct sk_buff *skb,
-                                 struct devlink_health_buffer *buffer)
-{
-       struct devlink_health_buffer_desc *last_desc, *desc;
-       struct nlattr **buffer_nlattr;
-       int err;
-       int i = 0;
-
-       buffer_nlattr = kcalloc(buffer->max_nested_depth,
-                               sizeof(*buffer_nlattr), GFP_KERNEL);
-       if (!buffer_nlattr)
-               return -EINVAL;
-
-       last_desc = devlink_health_buffer_get_desc_from_offset(buffer);
-       desc = buffer->data;
-       while (desc != last_desc) {
-               switch (desc->attrtype) {
-               case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT:
-               case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR:
-               case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE:
-               case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY:
-                       buffer_nlattr[i] = nla_nest_start(skb, desc->attrtype);
-                       if (!buffer_nlattr[i])
-                               goto nla_put_failure;
-                       i++;
-                       break;
-               case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA:
-                       err = devlink_health_buffer_fill_data(skb, desc);
-                       if (err)
-                               goto nla_put_failure;
-                       err = devlink_health_buffer_fill_type(skb, desc);
-                       if (err)
-                               goto nla_put_failure;
-                       break;
-               case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME:
-                       err = nla_put_string(skb, desc->attrtype,
-                                            (char *)&desc->value);
-                       if (err)
-                               goto nla_put_failure;
-                       break;
-               default:
-                       WARN_ON(!desc->nest_end);
-                       WARN_ON(i <= 0);
-                       if (desc->nest_end == DEVLINK_HEALTH_BUFFER_NEST_END)
-                               nla_nest_end(skb, buffer_nlattr[--i]);
-                       else
-                               nla_nest_cancel(skb, buffer_nlattr[--i]);
-                       break;
-               }
-               desc = devlink_health_buffer_get_next_desc(desc);
-       }
-
-       return 0;
-
-nla_put_failure:
-       kfree(buffer_nlattr);
-       return err;
-}
-
-static int
-devlink_health_buffer_snd(struct genl_info *info,
-                         enum devlink_command cmd, int flags,
-                         struct devlink_health_buffer **buffers_array,
-                         u64 num_of_buffers)
-{
-       struct sk_buff *skb;
-       struct nlmsghdr *nlh;
-       void *hdr;
-       int err;
-       u64 i;
-
-       for (i = 0; i < num_of_buffers; i++) {
-               /* Skip buffer if driver did not fill it up with any data */
-               if (!buffers_array[i]->offset)
-                       continue;
-
-               skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
-               if (!skb)
-                       return -ENOMEM;
-
-               hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
-                                 &devlink_nl_family, NLM_F_MULTI, cmd);
-               if (!hdr)
-                       goto nla_put_failure;
-
-               err = devlink_health_buffer_prepare_skb(skb, buffers_array[i]);
-               if (err)
-                       goto nla_put_failure;
-
-               genlmsg_end(skb, hdr);
-               err = genlmsg_reply(skb, info);
-               if (err)
-                       return err;
-       }
-
-       skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
-       if (!skb)
-               return -ENOMEM;
-       nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
-                       NLMSG_DONE, 0, flags | NLM_F_MULTI);
-       if (!nlh)
-               goto nla_put_failure;
-
-       err = genlmsg_reply(skb, info);
-       if (err)
-               return err;
-
-       return 0;
-
-nla_put_failure:
-       err = -EIO;
-       nlmsg_free(skb);
-       return err;
-}
-
-struct devlink_health_reporter {
-       struct list_head list;
-       struct devlink_health_buffer **dump_buffers_array;
-       struct mutex dump_lock; /* lock parallel read/write from dump buffers */
-       struct devlink_health_buffer **diagnose_buffers_array;
-       struct mutex diagnose_lock; /* lock parallel read/write from diagnose buffers */
-       void *priv;
-       const struct devlink_health_reporter_ops *ops;
-       struct devlink *devlink;
-       u64 graceful_period;
-       bool auto_recover;
-       u8 health_state;
-       u8 dump_avail;
-       u64 dump_ts;
-       u64 error_count;
-       u64 recovery_count;
-       u64 last_recovery_ts;
-};
-
-enum devlink_health_reporter_state {
-       DEVLINK_HEALTH_REPORTER_STATE_HEALTHY,
-       DEVLINK_HEALTH_REPORTER_STATE_ERROR,
-};
-
-void *
-devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
-{
-       return reporter->priv;
-}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_priv);
-
-static struct devlink_health_reporter *
-devlink_health_reporter_find_by_name(struct devlink *devlink,
-                                    const char *reporter_name)
-{
-       struct devlink_health_reporter *reporter;
-
-       list_for_each_entry(reporter, &devlink->reporter_list, list)
-               if (!strcmp(reporter->ops->name, reporter_name))
-                       return reporter;
-       return NULL;
-}
-
-/**
- *     devlink_health_reporter_create - create devlink health reporter
- *
- *     @devlink: devlink
- *     @ops: ops
- *     @graceful_period: to avoid recovery loops, in msecs
- *     @auto_recover: auto recover when error occurs
- *     @priv: priv
- */
-struct devlink_health_reporter *
-devlink_health_reporter_create(struct devlink *devlink,
-                              const struct devlink_health_reporter_ops *ops,
-                              u64 graceful_period, bool auto_recover,
-                              void *priv)
-{
-       struct devlink_health_reporter *reporter;
-
-       mutex_lock(&devlink->lock);
-       if (devlink_health_reporter_find_by_name(devlink, ops->name)) {
-               reporter = ERR_PTR(-EEXIST);
-               goto unlock;
-       }
-
-       if (WARN_ON(ops->dump && !ops->dump_size) ||
-           WARN_ON(ops->diagnose && !ops->diagnose_size) ||
-           WARN_ON(auto_recover && !ops->recover) ||
-           WARN_ON(graceful_period && !ops->recover)) {
-               reporter = ERR_PTR(-EINVAL);
-               goto unlock;
-       }
-
-       reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
-       if (!reporter) {
-               reporter = ERR_PTR(-ENOMEM);
-               goto unlock;
-       }
-
-       if (ops->dump) {
-               reporter->dump_buffers_array =
-                       devlink_health_buffers_create(ops->dump_size);
-               if (!reporter->dump_buffers_array) {
-                       kfree(reporter);
-                       reporter = ERR_PTR(-ENOMEM);
-                       goto unlock;
-               }
-       }
-
-       if (ops->diagnose) {
-               reporter->diagnose_buffers_array =
-                       devlink_health_buffers_create(ops->diagnose_size);
-               if (!reporter->diagnose_buffers_array) {
-                       devlink_health_buffers_destroy(reporter->dump_buffers_array,
-                                                      DEVLINK_HEALTH_SIZE_TO_BUFFERS(ops->dump_size));
-                       kfree(reporter);
-                       reporter = ERR_PTR(-ENOMEM);
-                       goto unlock;
-               }
-       }
-
-       list_add_tail(&reporter->list, &devlink->reporter_list);
-       mutex_init(&reporter->dump_lock);
-       mutex_init(&reporter->diagnose_lock);
-
-       reporter->priv = priv;
-       reporter->ops = ops;
-       reporter->devlink = devlink;
-       reporter->graceful_period = graceful_period;
-       reporter->auto_recover = auto_recover;
-unlock:
-       mutex_unlock(&devlink->lock);
-       return reporter;
-}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
-
-/**
- *     devlink_health_reporter_destroy - destroy devlink health reporter
- *
- *     @reporter: devlink health reporter to destroy
- */
-void
-devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
-{
-       mutex_lock(&reporter->devlink->lock);
-       list_del(&reporter->list);
-       devlink_health_buffers_destroy(reporter->dump_buffers_array,
-                                      DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size));
-       devlink_health_buffers_destroy(reporter->diagnose_buffers_array,
-                                      DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size));
-       kfree(reporter);
-       mutex_unlock(&reporter->devlink->lock);
-}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
-
-static int
-devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
-                               void *priv_ctx)
-{
-       int err;
-
-       if (!reporter->ops->recover)
-               return -EOPNOTSUPP;
-
-       err = reporter->ops->recover(reporter, priv_ctx);
-       if (err)
-               return err;
-
-       reporter->recovery_count++;
-       reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
-       reporter->last_recovery_ts = jiffies;
-
-       return 0;
-}
-
-static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
-                                 void *priv_ctx)
-{
-       int err;
-
-       if (!reporter->ops->dump)
-               return 0;
-
-       if (reporter->dump_avail)
-               return 0;
-
-       devlink_health_buffers_reset(reporter->dump_buffers_array,
-                                    DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size));
-       err = reporter->ops->dump(reporter, reporter->dump_buffers_array,
-                                    DEVLINK_HEALTH_BUFFER_SIZE,
-                                    DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size),
-                                    priv_ctx);
-       if (!err) {
-               reporter->dump_avail = true;
-               reporter->dump_ts = jiffies;
-       }
-
-       return err;
-}
-
-int devlink_health_report(struct devlink_health_reporter *reporter,
-                         const char *msg, void *priv_ctx)
-{
-       struct devlink *devlink = reporter->devlink;
-       int err = 0;
-
-       /* write a log message of the current error */
-       WARN_ON(!msg);
-       trace_devlink_health_report(devlink, reporter->ops->name, msg);
-       reporter->error_count++;
-
-       /* abort if the previous error wasn't recovered */
-       if (reporter->auto_recover &&
-           (reporter->health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
-            jiffies - reporter->last_recovery_ts <
-            msecs_to_jiffies(reporter->graceful_period))) {
-               trace_devlink_health_recover_aborted(devlink,
-                                                    reporter->ops->name,
-                                                    reporter->health_state,
-                                                    jiffies -
-                                                    reporter->last_recovery_ts);
-               return -ECANCELED;
-       }
-
-       reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
-
-       mutex_lock(&reporter->dump_lock);
-       /* store current dump of current error, for later analysis */
-       devlink_health_do_dump(reporter, priv_ctx);
-       mutex_unlock(&reporter->dump_lock);
-
-       if (reporter->auto_recover)
-               err = devlink_health_reporter_recover(reporter, priv_ctx);
-
-       return err;
-}
-EXPORT_SYMBOL_GPL(devlink_health_report);
-
-static struct devlink_health_reporter *
-devlink_health_reporter_get_from_info(struct devlink *devlink,
-                                     struct genl_info *info)
-{
-       char *reporter_name;
-
-       if (!info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME])
-               return NULL;
-
-       reporter_name =
-               nla_data(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]);
-       return devlink_health_reporter_find_by_name(devlink, reporter_name);
-}
-
-static int
-devlink_nl_health_reporter_fill(struct sk_buff *msg,
-                               struct devlink *devlink,
-                               struct devlink_health_reporter *reporter,
-                               enum devlink_command cmd, u32 portid,
-                               u32 seq, int flags)
-{
-       struct nlattr *reporter_attr;
-       void *hdr;
-
-       hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
-       if (!hdr)
-               return -EMSGSIZE;
-
-       if (devlink_nl_put_handle(msg, devlink))
-               goto genlmsg_cancel;
-
-       reporter_attr = nla_nest_start(msg, DEVLINK_ATTR_HEALTH_REPORTER);
-       if (!reporter_attr)
-               goto genlmsg_cancel;
-       if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME,
-                          reporter->ops->name))
-               goto reporter_nest_cancel;
-       if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE,
-                      reporter->health_state))
-               goto reporter_nest_cancel;
-       if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR,
-                             reporter->error_count, DEVLINK_ATTR_PAD))
-               goto reporter_nest_cancel;
-       if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER,
-                             reporter->recovery_count, DEVLINK_ATTR_PAD))
-               goto reporter_nest_cancel;
-       if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
-                             reporter->graceful_period,
-                             DEVLINK_ATTR_PAD))
-               goto reporter_nest_cancel;
-       if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,
-                      reporter->auto_recover))
-               goto reporter_nest_cancel;
-       if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_AVAIL,
-                      reporter->dump_avail))
-               goto reporter_nest_cancel;
-       if (reporter->dump_avail &&
-           nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
-                             jiffies_to_msecs(reporter->dump_ts),
-                             DEVLINK_ATTR_PAD))
-               goto reporter_nest_cancel;
-
-       nla_nest_end(msg, reporter_attr);
-       genlmsg_end(msg, hdr);
-       return 0;
-
-reporter_nest_cancel:
-       nla_nest_end(msg, reporter_attr);
-genlmsg_cancel:
-       genlmsg_cancel(msg, hdr);
-       return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
-                                                  struct genl_info *info)
-{
-       struct devlink *devlink = info->user_ptr[0];
-       struct devlink_health_reporter *reporter;
-       struct sk_buff *msg;
-       int err;
-
-       reporter = devlink_health_reporter_get_from_info(devlink, info);
-       if (!reporter)
-               return -EINVAL;
-
-       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-       if (!msg)
-               return -ENOMEM;
-
-       err = devlink_nl_health_reporter_fill(msg, devlink, reporter,
-                                             DEVLINK_CMD_HEALTH_REPORTER_GET,
-                                             info->snd_portid, info->snd_seq,
-                                             0);
-       if (err) {
-               nlmsg_free(msg);
-               return err;
-       }
-
-       return genlmsg_reply(msg, info);
-}
-
-static int
-devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
-                                         struct netlink_callback *cb)
-{
-       struct devlink_health_reporter *reporter;
-       struct devlink *devlink;
-       int start = cb->args[0];
-       int idx = 0;
-       int err;
-
-       mutex_lock(&devlink_mutex);
-       list_for_each_entry(devlink, &devlink_list, list) {
-               if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
-                       continue;
-               mutex_lock(&devlink->lock);
-               list_for_each_entry(reporter, &devlink->reporter_list,
-                                   list) {
-                       if (idx < start) {
-                               idx++;
-                               continue;
-                       }
-                       err = devlink_nl_health_reporter_fill(msg, devlink,
-                                                             reporter,
-                                                             DEVLINK_CMD_HEALTH_REPORTER_GET,
-                                                             NETLINK_CB(cb->skb).portid,
-                                                             cb->nlh->nlmsg_seq,
-                                                             NLM_F_MULTI);
-                       if (err) {
-                               mutex_unlock(&devlink->lock);
-                               goto out;
-                       }
-                       idx++;
-               }
-               mutex_unlock(&devlink->lock);
-       }
-out:
-       mutex_unlock(&devlink_mutex);
-
-       cb->args[0] = idx;
-       return msg->len;
-}
-
-static int
-devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
-                                       struct genl_info *info)
-{
-       struct devlink *devlink = info->user_ptr[0];
-       struct devlink_health_reporter *reporter;
-
-       reporter = devlink_health_reporter_get_from_info(devlink, info);
-       if (!reporter)
-               return -EINVAL;
-
-       if (!reporter->ops->recover &&
-           (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] ||
-            info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]))
-               return -EINVAL;
-
-       if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
-               reporter->graceful_period =
-                       nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]);
-
-       if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])
-               reporter->auto_recover =
-                       nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]);
-
-       return 0;
-}
-
-static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
-                                                      struct genl_info *info)
-{
-       struct devlink *devlink = info->user_ptr[0];
-       struct devlink_health_reporter *reporter;
-
-       reporter = devlink_health_reporter_get_from_info(devlink, info);
-       if (!reporter)
-               return -EINVAL;
-
-       return devlink_health_reporter_recover(reporter, NULL);
-}
-
-static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
-                                                       struct genl_info *info)
-{
-       struct devlink *devlink = info->user_ptr[0];
-       struct devlink_health_reporter *reporter;
-       u64 num_of_buffers;
-       int err;
-
-       reporter = devlink_health_reporter_get_from_info(devlink, info);
-       if (!reporter)
-               return -EINVAL;
-
-       if (!reporter->ops->diagnose)
-               return -EOPNOTSUPP;
-
-       num_of_buffers =
-               DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size);
-
-       mutex_lock(&reporter->diagnose_lock);
-       devlink_health_buffers_reset(reporter->diagnose_buffers_array,
-                                    num_of_buffers);
-
-       err = reporter->ops->diagnose(reporter,
-                                     reporter->diagnose_buffers_array,
-                                     DEVLINK_HEALTH_BUFFER_SIZE,
-                                     num_of_buffers);
-       if (err)
-               goto out;
-
-       err = devlink_health_buffer_snd(info,
-                                       DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
-                                       0, reporter->diagnose_buffers_array,
-                                       num_of_buffers);
-       if (err)
-               goto out;
-
-       mutex_unlock(&reporter->diagnose_lock);
-       return 0;
-
-out:
-       mutex_unlock(&reporter->diagnose_lock);
-       return err;
-}
-
-static void
-devlink_health_dump_clear(struct devlink_health_reporter *reporter)
-{
-       reporter->dump_avail = false;
-       reporter->dump_ts = 0;
-       devlink_health_buffers_reset(reporter->dump_buffers_array,
-                                    DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size));
-}
-
-static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb,
-                                                       struct genl_info *info)
-{
-       struct devlink *devlink = info->user_ptr[0];
-       struct devlink_health_reporter *reporter;
-       u64 num_of_buffers;
-       int err;
-
-       reporter = devlink_health_reporter_get_from_info(devlink, info);
-       if (!reporter)
-               return -EINVAL;
-
-       if (!reporter->ops->dump)
-               return -EOPNOTSUPP;
-
-       num_of_buffers =
-               DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size);
-
-       mutex_lock(&reporter->dump_lock);
-       err = devlink_health_do_dump(reporter, NULL);
-       if (err)
-               goto out;
-
-       err = devlink_health_buffer_snd(info,
-                                       DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
-                                       0, reporter->dump_buffers_array,
-                                       num_of_buffers);
-
-out:
-       mutex_unlock(&reporter->dump_lock);
-       return err;
-}
-
-static int
-devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
-                                              struct genl_info *info)
-{
-       struct devlink *devlink = info->user_ptr[0];
-       struct devlink_health_reporter *reporter;
-
-       reporter = devlink_health_reporter_get_from_info(devlink, info);
-       if (!reporter)
-               return -EINVAL;
-
-       mutex_lock(&reporter->dump_lock);
-       devlink_health_dump_clear(reporter);
-       mutex_unlock(&reporter->dump_lock);
-       return 0;
-}
-
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
        [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
        [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -4631,9 +3622,6 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
        [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
        [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
        [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
-       [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
-       [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
-       [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -4854,51 +3842,6 @@ static const struct genl_ops devlink_nl_ops[] = {
                .flags = GENL_ADMIN_PERM,
                .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
        },
-       {
-               .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET,
-               .doit = devlink_nl_cmd_health_reporter_get_doit,
-               .dumpit = devlink_nl_cmd_health_reporter_get_dumpit,
-               .policy = devlink_nl_policy,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
-               /* can be retrieved by unprivileged users */
-       },
-       {
-               .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET,
-               .doit = devlink_nl_cmd_health_reporter_set_doit,
-               .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
-       },
-       {
-               .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
-               .doit = devlink_nl_cmd_health_reporter_recover_doit,
-               .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
-       },
-       {
-               .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
-               .doit = devlink_nl_cmd_health_reporter_diagnose_doit,
-               .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
-       },
-       {
-               .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
-               .doit = devlink_nl_cmd_health_reporter_dump_get_doit,
-               .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
-                                 DEVLINK_NL_FLAG_NO_LOCK,
-       },
-       {
-               .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
-               .doit = devlink_nl_cmd_health_reporter_dump_clear_doit,
-               .policy = devlink_nl_policy,
-               .flags = GENL_ADMIN_PERM,
-               .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
-                                 DEVLINK_NL_FLAG_NO_LOCK,
-       },
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
@@ -4939,7 +3882,6 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
        INIT_LIST_HEAD(&devlink->resource_list);
        INIT_LIST_HEAD(&devlink->param_list);
        INIT_LIST_HEAD(&devlink->region_list);
-       INIT_LIST_HEAD(&devlink->reporter_list);
        mutex_init(&devlink->lock);
        return devlink;
 }