Skip to content

Commit 29429f3

Browse files
Daniel Jurgensdavem330
authored andcommitted
net/mlx5e: Timeout if SQ doesn't flush during close
Avoid an infinite loop by timing out waiting for the SQ to flush. Also clean up the TX descriptors if that happens. Fixes: f62b8bb ('net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality') Signed-off-by: Daniel Jurgens <[email protected]> Signed-off-by: Saeed Mahameed <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 65ee670 commit 29429f3

3 files changed

Lines changed: 56 additions & 3 deletions

File tree

drivers/net/ethernet/mellanox/mlx5/core/en.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,7 @@ struct mlx5e_sq_dma {
305305
enum {
306306
MLX5E_SQ_STATE_WAKE_TXQ_ENABLE,
307307
MLX5E_SQ_STATE_BF_ENABLE,
308+
MLX5E_SQ_STATE_TX_TIMEOUT,
308309
};
309310

310311
struct mlx5e_ico_wqe_info {
@@ -589,6 +590,7 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
589590
int mlx5e_napi_poll(struct napi_struct *napi, int budget);
590591
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
591592
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
593+
void mlx5e_free_tx_descs(struct mlx5e_sq *sq);
592594

593595
void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
594596
void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@
3939
#include "eswitch.h"
4040
#include "vxlan.h"
4141

42+
enum {
43+
MLX5_EN_QP_FLUSH_TIMEOUT_MS = 5000,
44+
MLX5_EN_QP_FLUSH_MSLEEP_QUANT = 20,
45+
MLX5_EN_QP_FLUSH_MAX_ITER = MLX5_EN_QP_FLUSH_TIMEOUT_MS /
46+
MLX5_EN_QP_FLUSH_MSLEEP_QUANT,
47+
};
48+
4249
struct mlx5e_rq_param {
4350
u32 rqc[MLX5_ST_SZ_DW(rqc)];
4451
struct mlx5_wq_param wq;
@@ -782,6 +789,9 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq)
782789

783790
static void mlx5e_close_sq(struct mlx5e_sq *sq)
784791
{
792+
int tout = 0;
793+
int err;
794+
785795
if (sq->txq) {
786796
clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state);
787797
/* prevent netif_tx_wake_queue */
@@ -792,15 +802,24 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq)
792802
if (mlx5e_sq_has_room_for(sq, 1))
793803
mlx5e_send_nop(sq, true);
794804

795-
mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
805+
err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY,
806+
MLX5_SQC_STATE_ERR);
807+
if (err)
808+
set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
796809
}
797810

798-
while (sq->cc != sq->pc) /* wait till sq is empty */
799-
msleep(20);
811+
/* wait till sq is empty, unless a TX timeout occurred on this SQ */
812+
while (sq->cc != sq->pc &&
813+
!test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)) {
814+
msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
815+
if (tout++ > MLX5_EN_QP_FLUSH_MAX_ITER)
816+
set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
817+
}
800818

801819
/* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */
802820
napi_synchronize(&sq->channel->napi);
803821

822+
mlx5e_free_tx_descs(sq);
804823
mlx5e_disable_sq(sq);
805824
mlx5e_destroy_sq(sq);
806825
}

drivers/net/ethernet/mellanox/mlx5/core/en_tx.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,35 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
341341
return mlx5e_sq_xmit(sq, skb);
342342
}
343343

344+
void mlx5e_free_tx_descs(struct mlx5e_sq *sq)
345+
{
346+
struct mlx5e_tx_wqe_info *wi;
347+
struct sk_buff *skb;
348+
u16 ci;
349+
int i;
350+
351+
while (sq->cc != sq->pc) {
352+
ci = sq->cc & sq->wq.sz_m1;
353+
skb = sq->skb[ci];
354+
wi = &sq->wqe_info[ci];
355+
356+
if (!skb) { /* nop */
357+
sq->cc++;
358+
continue;
359+
}
360+
361+
for (i = 0; i < wi->num_dma; i++) {
362+
struct mlx5e_sq_dma *dma =
363+
mlx5e_dma_get(sq, sq->dma_fifo_cc++);
364+
365+
mlx5e_tx_dma_unmap(sq->pdev, dma);
366+
}
367+
368+
dev_kfree_skb_any(skb);
369+
sq->cc += wi->num_wqebbs;
370+
}
371+
}
372+
344373
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
345374
{
346375
struct mlx5e_sq *sq;
@@ -352,6 +381,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
352381

353382
sq = container_of(cq, struct mlx5e_sq, cq);
354383

384+
if (unlikely(test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)))
385+
return false;
386+
355387
npkts = 0;
356388
nbytes = 0;
357389

0 commit comments

Comments
 (0)