Skip to content

Commit 4120553

Browse files
committed
nfsd: Fix NFSv4 READ on RDMA when using readv
svcrdma expects that the payload falls precisely into the xdr_buf page vector. This does not seem to be the case for nfsd4_encode_readv(). This code is called only when fops->splice_read is missing or when RQ_SPLICE_OK is clear, so it's not a noticeable problem in many common cases. Add new transport method: ->xpo_read_payload so that when a READ payload does not fit exactly in rq_res's page vector, the XDR encoder can inform the RPC transport exactly where that payload is, without the payload's XDR pad. That way, when a Write chunk is present, the transport knows what byte range in the Reply message is supposed to be matched with the chunk. Note that the Linux NFS server implementation of NFS/RDMA can currently handle only one Write chunk per RPC-over-RDMA message. This simplifies the implementation of this fix. Fixes: b042098 ("nfsd4: allow exotic read compounds") Buglink: https://bugzilla.kernel.org/show_bug.cgi?id=198053 Signed-off-by: Chuck Lever <[email protected]>
1 parent 057a227 commit 4120553

10 files changed

Lines changed: 106 additions & 23 deletions

File tree

fs/nfsd/nfs4xdr.c

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3594,17 +3594,17 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
35943594
u32 zzz = 0;
35953595
int pad;
35963596

3597+
/*
3598+
* svcrdma requires every READ payload to start somewhere
3599+
* in xdr->pages.
3600+
*/
3601+
if (xdr->iov == xdr->buf->head) {
3602+
xdr->iov = NULL;
3603+
xdr->end = xdr->p;
3604+
}
3605+
35973606
len = maxcount;
35983607
v = 0;
3599-
3600-
thislen = min_t(long, len, ((void *)xdr->end - (void *)xdr->p));
3601-
p = xdr_reserve_space(xdr, (thislen+3)&~3);
3602-
WARN_ON_ONCE(!p);
3603-
resp->rqstp->rq_vec[v].iov_base = p;
3604-
resp->rqstp->rq_vec[v].iov_len = thislen;
3605-
v++;
3606-
len -= thislen;
3607-
36083608
while (len) {
36093609
thislen = min_t(long, len, PAGE_SIZE);
36103610
p = xdr_reserve_space(xdr, (thislen+3)&~3);
@@ -3623,6 +3623,8 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
36233623
read->rd_length = maxcount;
36243624
if (nfserr)
36253625
return nfserr;
3626+
if (svc_encode_read_payload(resp->rqstp, starting_len + 8, maxcount))
3627+
return nfserr_io;
36263628
xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
36273629

36283630
tmp = htonl(eof);

include/linux/sunrpc/svc.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,9 @@ void svc_wake_up(struct svc_serv *);
517517
void svc_reserve(struct svc_rqst *rqstp, int space);
518518
struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu);
519519
char * svc_print_addr(struct svc_rqst *, char *, size_t);
520+
int svc_encode_read_payload(struct svc_rqst *rqstp,
521+
unsigned int offset,
522+
unsigned int length);
520523
unsigned int svc_fill_write_vector(struct svc_rqst *rqstp,
521524
struct page **pages,
522525
struct kvec *first, size_t total);

include/linux/sunrpc/svc_rdma.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ struct svc_rdma_recv_ctxt {
137137
unsigned int rc_page_count;
138138
unsigned int rc_hdr_count;
139139
u32 rc_inv_rkey;
140+
unsigned int rc_read_payload_offset;
141+
unsigned int rc_read_payload_length;
140142
struct page *rc_pages[RPCSVC_MAXPAGES];
141143
};
142144

@@ -170,7 +172,9 @@ extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma,
170172
struct svc_rqst *rqstp,
171173
struct svc_rdma_recv_ctxt *head, __be32 *p);
172174
extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
173-
__be32 *wr_ch, struct xdr_buf *xdr);
175+
__be32 *wr_ch, struct xdr_buf *xdr,
176+
unsigned int offset,
177+
unsigned long length);
174178
extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
175179
__be32 *rp_ch, bool writelist,
176180
struct xdr_buf *xdr);
@@ -189,6 +193,8 @@ extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
189193
struct svc_rdma_send_ctxt *ctxt,
190194
struct xdr_buf *xdr, __be32 *wr_lst);
191195
extern int svc_rdma_sendto(struct svc_rqst *);
196+
extern int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset,
197+
unsigned int length);
192198

193199
/* svc_rdma_transport.c */
194200
extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);

include/linux/sunrpc/svc_xprt.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ struct svc_xprt_ops {
2121
int (*xpo_has_wspace)(struct svc_xprt *);
2222
int (*xpo_recvfrom)(struct svc_rqst *);
2323
int (*xpo_sendto)(struct svc_rqst *);
24+
int (*xpo_read_payload)(struct svc_rqst *, unsigned int,
25+
unsigned int);
2426
void (*xpo_release_rqst)(struct svc_rqst *);
2527
void (*xpo_detach)(struct svc_xprt *);
2628
void (*xpo_free)(struct svc_xprt *);

net/sunrpc/svc.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1636,6 +1636,22 @@ u32 svc_max_payload(const struct svc_rqst *rqstp)
16361636
}
16371637
EXPORT_SYMBOL_GPL(svc_max_payload);
16381638

1639+
/**
1640+
* svc_encode_read_payload - mark a range of bytes as a READ payload
1641+
* @rqstp: svc_rqst to operate on
1642+
* @offset: payload's byte offset in rqstp->rq_res
1643+
* @length: size of payload, in bytes
1644+
*
1645+
* Returns zero on success, or a negative errno if a permanent
1646+
* error occurred.
1647+
*/
1648+
int svc_encode_read_payload(struct svc_rqst *rqstp, unsigned int offset,
1649+
unsigned int length)
1650+
{
1651+
return rqstp->rq_xprt->xpt_ops->xpo_read_payload(rqstp, offset, length);
1652+
}
1653+
EXPORT_SYMBOL_GPL(svc_encode_read_payload);
1654+
16391655
/**
16401656
* svc_fill_write_vector - Construct data argument for VFS write call
16411657
* @rqstp: svc_rqst to operate on

net/sunrpc/svcsock.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,12 @@ static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
279279
return len;
280280
}
281281

282+
static int svc_sock_read_payload(struct svc_rqst *rqstp, unsigned int offset,
283+
unsigned int length)
284+
{
285+
return 0;
286+
}
287+
282288
/*
283289
* Report socket names for nfsdfs
284290
*/
@@ -653,6 +659,7 @@ static const struct svc_xprt_ops svc_udp_ops = {
653659
.xpo_create = svc_udp_create,
654660
.xpo_recvfrom = svc_udp_recvfrom,
655661
.xpo_sendto = svc_udp_sendto,
662+
.xpo_read_payload = svc_sock_read_payload,
656663
.xpo_release_rqst = svc_release_udp_skb,
657664
.xpo_detach = svc_sock_detach,
658665
.xpo_free = svc_sock_free,
@@ -1171,6 +1178,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
11711178
.xpo_create = svc_tcp_create,
11721179
.xpo_recvfrom = svc_tcp_recvfrom,
11731180
.xpo_sendto = svc_tcp_sendto,
1181+
.xpo_read_payload = svc_sock_read_payload,
11741182
.xpo_release_rqst = svc_release_skb,
11751183
.xpo_detach = svc_tcp_sock_detach,
11761184
.xpo_free = svc_sock_free,

net/sunrpc/xprtrdma/svc_rdma_recvfrom.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
193193

194194
out:
195195
ctxt->rc_page_count = 0;
196+
ctxt->rc_read_payload_length = 0;
196197
return ctxt;
197198

198199
out_empty:

net/sunrpc/xprtrdma/svc_rdma_rw.c

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -482,25 +482,28 @@ static int svc_rdma_send_xdr_kvec(struct svc_rdma_write_info *info,
482482
vec->iov_len);
483483
}
484484

485-
/* Send an xdr_buf's page list by itself. A Write chunk is
486-
* just the page list. a Reply chunk is the head, page list,
487-
* and tail. This function is shared between the two types
488-
* of chunk.
485+
/* Send an xdr_buf's page list by itself. A Write chunk is just
486+
* the page list. A Reply chunk is @xdr's head, page list, and
487+
* tail. This function is shared between the two types of chunk.
489488
*/
490489
static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info,
491-
struct xdr_buf *xdr)
490+
struct xdr_buf *xdr,
491+
unsigned int offset,
492+
unsigned long length)
492493
{
493494
info->wi_xdr = xdr;
494-
info->wi_next_off = 0;
495+
info->wi_next_off = offset - xdr->head[0].iov_len;
495496
return svc_rdma_build_writes(info, svc_rdma_pagelist_to_sg,
496-
xdr->page_len);
497+
length);
497498
}
498499

499500
/**
500501
* svc_rdma_send_write_chunk - Write all segments in a Write chunk
501502
* @rdma: controlling RDMA transport
502503
* @wr_ch: Write chunk provided by client
503504
* @xdr: xdr_buf containing the data payload
505+
* @offset: payload's byte offset in @xdr
506+
* @length: size of payload, in bytes
504507
*
505508
* Returns a non-negative number of bytes the chunk consumed, or
506509
* %-E2BIG if the payload was larger than the Write chunk,
@@ -510,19 +513,20 @@ static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info,
510513
* %-EIO if rdma_rw initialization failed (DMA mapping, etc).
511514
*/
512515
int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch,
513-
struct xdr_buf *xdr)
516+
struct xdr_buf *xdr,
517+
unsigned int offset, unsigned long length)
514518
{
515519
struct svc_rdma_write_info *info;
516520
int ret;
517521

518-
if (!xdr->page_len)
522+
if (!length)
519523
return 0;
520524

521525
info = svc_rdma_write_info_alloc(rdma, wr_ch);
522526
if (!info)
523527
return -ENOMEM;
524528

525-
ret = svc_rdma_send_xdr_pagelist(info, xdr);
529+
ret = svc_rdma_send_xdr_pagelist(info, xdr, offset, length);
526530
if (ret < 0)
527531
goto out_err;
528532

@@ -531,7 +535,7 @@ int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch,
531535
goto out_err;
532536

533537
trace_svcrdma_encode_write(xdr->page_len);
534-
return xdr->page_len;
538+
return length;
535539

536540
out_err:
537541
svc_rdma_write_info_free(info);
@@ -571,7 +575,9 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, __be32 *rp_ch,
571575
* client did not provide Write chunks.
572576
*/
573577
if (!writelist && xdr->page_len) {
574-
ret = svc_rdma_send_xdr_pagelist(info, xdr);
578+
ret = svc_rdma_send_xdr_pagelist(info, xdr,
579+
xdr->head[0].iov_len,
580+
xdr->page_len);
575581
if (ret < 0)
576582
goto out_err;
577583
consumed += xdr->page_len;

net/sunrpc/xprtrdma/svc_rdma_sendto.c

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -858,7 +858,18 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
858858

859859
if (wr_lst) {
860860
/* XXX: Presume the client sent only one Write chunk */
861-
ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr);
861+
unsigned long offset;
862+
unsigned int length;
863+
864+
if (rctxt->rc_read_payload_length) {
865+
offset = rctxt->rc_read_payload_offset;
866+
length = rctxt->rc_read_payload_length;
867+
} else {
868+
offset = xdr->head[0].iov_len;
869+
length = xdr->page_len;
870+
}
871+
ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr, offset,
872+
length);
862873
if (ret < 0)
863874
goto err2;
864875
svc_rdma_xdr_encode_write_list(rdma_resp, wr_lst, ret);
@@ -900,3 +911,30 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
900911
ret = -ENOTCONN;
901912
goto out;
902913
}
914+
915+
/**
916+
* svc_rdma_read_payload - special processing for a READ payload
917+
* @rqstp: svc_rqst to operate on
918+
* @offset: payload's byte offset in @xdr
919+
* @length: size of payload, in bytes
920+
*
921+
* Returns zero on success.
922+
*
923+
* For the moment, just record the xdr_buf location of the READ
924+
* payload. svc_rdma_sendto will use that location later when
925+
* we actually send the payload.
926+
*/
927+
int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset,
928+
unsigned int length)
929+
{
930+
struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt;
931+
932+
/* XXX: Just one READ payload slot for now, since our
933+
* transport implementation currently supports only one
934+
* Write chunk.
935+
*/
936+
rctxt->rc_read_payload_offset = offset;
937+
rctxt->rc_read_payload_length = length;
938+
939+
return 0;
940+
}

net/sunrpc/xprtrdma/svc_rdma_transport.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ static const struct svc_xprt_ops svc_rdma_ops = {
8282
.xpo_create = svc_rdma_create,
8383
.xpo_recvfrom = svc_rdma_recvfrom,
8484
.xpo_sendto = svc_rdma_sendto,
85+
.xpo_read_payload = svc_rdma_read_payload,
8586
.xpo_release_rqst = svc_rdma_release_rqst,
8687
.xpo_detach = svc_rdma_detach,
8788
.xpo_free = svc_rdma_free,

0 commit comments

Comments
 (0)