Skip to content

Commit a0e0cb8

Browse files
mjruhljgunthorpe
authored andcommitted
IB/hfi1: Eliminate races in the SDMA send error path
pq_update() can only be called in two places: from the completion function when the complete (npkts) sequence of packets has been submitted and processed, or from setup function if a subset of the packets were submitted (i.e. the error path). Currently both paths can call pq_update() if an error occurrs. This race will cause the n_req value to go negative, hanging file_close(), or cause a crash by freeing the txlist more than once. Several variables are used to determine SDMA send state. Most of these are unnecessary, and have code inspectible races between the setup function and the completion function, in both the send path and the error path. The request 'status' value can be set by the setup or by the completion function. This is code inspectibly racy. Since the status is not needed in the completion code or by the caller it has been removed. The request 'done' value races between usage by the setup and the completion function. The completion function does not need this. When the number of processed packets matches npkts, it is done. The 'has_error' value races between usage of the setup and the completion function. This can cause incorrect error handling and leave the n_req in an incorrect value (i.e. negative). Simplify the code by removing all of the unneeded state checks and variables. Clean up iovs node when it is freed. Eliminate race conditions in the error path: If all packets are submitted, the completion handler will set the completion status correctly (ok or aborted). If all packets are not submitted, the caller must wait until the submitted packets have completed, and then set the completion status. These two change eliminate the race condition in the error path. Reviewed-by: Mitko Haralanov <[email protected]> Reviewed-by: Mike Marciniszyn <[email protected]> Signed-off-by: Michael J. Ruhl <[email protected]> Signed-off-by: Dennis Dalessandro <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent f1a3154 commit a0e0cb8

2 files changed

Lines changed: 39 additions & 51 deletions

File tree

drivers/infiniband/hw/hfi1/user_sdma.c

Lines changed: 39 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
328328
u8 opcode, sc, vl;
329329
u16 pkey;
330330
u32 slid;
331-
int req_queued = 0;
332331
u16 dlid;
333332
u32 selector;
334333

@@ -392,20 +391,21 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
392391
req->data_len = 0;
393392
req->pq = pq;
394393
req->cq = cq;
395-
req->status = -1;
396394
req->ahg_idx = -1;
397395
req->iov_idx = 0;
398396
req->sent = 0;
399397
req->seqnum = 0;
400398
req->seqcomp = 0;
401399
req->seqsubmitted = 0;
402400
req->tids = NULL;
403-
req->done = 0;
404401
req->has_error = 0;
405402
INIT_LIST_HEAD(&req->txps);
406403

407404
memcpy(&req->info, &info, sizeof(info));
408405

406+
/* The request is initialized, count it */
407+
atomic_inc(&pq->n_reqs);
408+
409409
if (req_opcode(info.ctrl) == EXPECTED) {
410410
/* expected must have a TID info and at least one data vector */
411411
if (req->data_iovs < 2) {
@@ -500,7 +500,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
500500
ret = pin_vector_pages(req, &req->iovs[i]);
501501
if (ret) {
502502
req->data_iovs = i;
503-
req->status = ret;
504503
goto free_req;
505504
}
506505
req->data_len += req->iovs[i].iov.iov_len;
@@ -561,14 +560,10 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
561560
req->ahg_idx = sdma_ahg_alloc(req->sde);
562561

563562
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
564-
atomic_inc(&pq->n_reqs);
565-
req_queued = 1;
566563
/* Send the first N packets in the request to buy us some time */
567564
ret = user_sdma_send_pkts(req, pcount);
568-
if (unlikely(ret < 0 && ret != -EBUSY)) {
569-
req->status = ret;
565+
if (unlikely(ret < 0 && ret != -EBUSY))
570566
goto free_req;
571-
}
572567

573568
/*
574569
* It is possible that the SDMA engine would have processed all the
@@ -588,14 +583,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
588583
while (req->seqsubmitted != req->info.npkts) {
589584
ret = user_sdma_send_pkts(req, pcount);
590585
if (ret < 0) {
591-
if (ret != -EBUSY) {
592-
req->status = ret;
593-
WRITE_ONCE(req->has_error, 1);
594-
if (READ_ONCE(req->seqcomp) ==
595-
req->seqsubmitted - 1)
596-
goto free_req;
597-
return ret;
598-
}
586+
if (ret != -EBUSY)
587+
goto free_req;
599588
wait_event_interruptible_timeout(
600589
pq->busy.wait_dma,
601590
(pq->state == SDMA_PKT_Q_ACTIVE),
@@ -606,10 +595,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
606595
*count += idx;
607596
return 0;
608597
free_req:
609-
user_sdma_free_request(req, true);
610-
if (req_queued)
598+
/*
599+
* If the submitted seqsubmitted == npkts, the completion routine
600+
* controls the final state. If sequbmitted < npkts, wait for any
601+
* outstanding packets to finish before cleaning up.
602+
*/
603+
if (req->seqsubmitted < req->info.npkts) {
604+
if (req->seqsubmitted)
605+
wait_event(pq->busy.wait_dma,
606+
(req->seqcomp == req->seqsubmitted - 1));
607+
user_sdma_free_request(req, true);
611608
pq_update(pq);
612-
set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
609+
set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
610+
}
613611
return ret;
614612
}
615613

@@ -917,7 +915,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
917915
ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
918916
req->seqsubmitted += count;
919917
if (req->seqsubmitted == req->info.npkts) {
920-
WRITE_ONCE(req->done, 1);
921918
/*
922919
* The txreq has already been submitted to the HW queue
923920
* so we can free the AHG entry now. Corruption will not
@@ -1365,11 +1362,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
13651362
return idx;
13661363
}
13671364

1368-
/*
1369-
* SDMA tx request completion callback. Called when the SDMA progress
1370-
* state machine gets notification that the SDMA descriptors for this
1371-
* tx request have been processed by the DMA engine. Called in
1372-
* interrupt context.
1365+
/**
1366+
* user_sdma_txreq_cb() - SDMA tx request completion callback.
1367+
* @txreq: valid sdma tx request
1368+
* @status: success/failure of request
1369+
*
1370+
* Called when the SDMA progress state machine gets notification that
1371+
* the SDMA descriptors for this tx request have been processed by the
1372+
* DMA engine. Called in interrupt context.
1373+
* Only do work on completed sequences.
13731374
*/
13741375
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
13751376
{
@@ -1378,7 +1379,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
13781379
struct user_sdma_request *req;
13791380
struct hfi1_user_sdma_pkt_q *pq;
13801381
struct hfi1_user_sdma_comp_q *cq;
1381-
u16 idx;
1382+
enum hfi1_sdma_comp_state state = COMPLETE;
13821383

13831384
if (!tx->req)
13841385
return;
@@ -1391,31 +1392,19 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
13911392
SDMA_DBG(req, "SDMA completion with error %d",
13921393
status);
13931394
WRITE_ONCE(req->has_error, 1);
1395+
state = ERROR;
13941396
}
13951397

13961398
req->seqcomp = tx->seqnum;
13971399
kmem_cache_free(pq->txreq_cache, tx);
1398-
tx = NULL;
1399-
1400-
idx = req->info.comp_idx;
1401-
if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
1402-
if (req->seqcomp == req->info.npkts - 1) {
1403-
req->status = 0;
1404-
user_sdma_free_request(req, false);
1405-
pq_update(pq);
1406-
set_comp_state(pq, cq, idx, COMPLETE, 0);
1407-
}
1408-
} else {
1409-
if (status != SDMA_TXREQ_S_OK)
1410-
req->status = status;
1411-
if (req->seqcomp == (READ_ONCE(req->seqsubmitted) - 1) &&
1412-
(READ_ONCE(req->done) ||
1413-
READ_ONCE(req->has_error))) {
1414-
user_sdma_free_request(req, false);
1415-
pq_update(pq);
1416-
set_comp_state(pq, cq, idx, ERROR, req->status);
1417-
}
1418-
}
1400+
1401+
/* sequence isn't complete? We are done */
1402+
if (req->seqcomp != req->info.npkts - 1)
1403+
return;
1404+
1405+
user_sdma_free_request(req, false);
1406+
set_comp_state(pq, cq, req->info.comp_idx, state, status);
1407+
pq_update(pq);
14191408
}
14201409

14211410
static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
@@ -1448,6 +1437,8 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
14481437
if (!node)
14491438
continue;
14501439

1440+
req->iovs[i].node = NULL;
1441+
14511442
if (unpin)
14521443
hfi1_mmu_rb_remove(req->pq->handler,
14531444
&node->rb);

drivers/infiniband/hw/hfi1/user_sdma.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,6 @@ struct user_sdma_request {
205205
/* Writeable fields shared with interrupt */
206206
u64 seqcomp ____cacheline_aligned_in_smp;
207207
u64 seqsubmitted;
208-
/* status of the last txreq completed */
209-
int status;
210208

211209
/* Send side fields */
212210
struct list_head txps ____cacheline_aligned_in_smp;
@@ -228,7 +226,6 @@ struct user_sdma_request {
228226
u16 tididx;
229227
/* progress index moving along the iovs array */
230228
u8 iov_idx;
231-
u8 done;
232229
u8 has_error;
233230

234231
struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];

0 commit comments

Comments
 (0)