Skip to content

Commit 793917d

Browse files
author
Matthew Wilcox (Oracle)
committed
mm/readahead: Add large folio readahead
Allocate large folios in the readahead code when the filesystem supports them and it seems worth doing. The heuristic for choosing which folio sizes will surely need some tuning, but this aggressive ramp-up has been good for testing. Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
1 parent 18788cf commit 793917d

File tree

1 file changed

+99
-7
lines changed

1 file changed

+99
-7
lines changed

mm/readahead.c

+99-7
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ static void read_pages(struct readahead_control *rac, struct list_head *pages,
148148

149149
blk_finish_plug(&plug);
150150

151-
BUG_ON(!list_empty(pages));
151+
BUG_ON(pages && !list_empty(pages));
152152
BUG_ON(readahead_count(rac));
153153

154154
out:
@@ -431,11 +431,103 @@ static int try_context_readahead(struct address_space *mapping,
431431
return 1;
432432
}
433433

434+
/*
435+
* There are some parts of the kernel which assume that PMD entries
436+
* are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
437+
* limit the maximum allocation order to PMD size. I'm not aware of any
438+
* assumptions about maximum order if THP are disabled, but 8 seems like
439+
* a good order (that's 1MB if you're using 4kB pages)
440+
*/
441+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
442+
#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
443+
#else
444+
#define MAX_PAGECACHE_ORDER 8
445+
#endif
446+
447+
static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
448+
pgoff_t mark, unsigned int order, gfp_t gfp)
449+
{
450+
int err;
451+
struct folio *folio = filemap_alloc_folio(gfp, order);
452+
453+
if (!folio)
454+
return -ENOMEM;
455+
if (mark - index < (1UL << order))
456+
folio_set_readahead(folio);
457+
err = filemap_add_folio(ractl->mapping, folio, index, gfp);
458+
if (err)
459+
folio_put(folio);
460+
else
461+
ractl->_nr_pages += 1UL << order;
462+
return err;
463+
}
464+
465+
static void page_cache_ra_order(struct readahead_control *ractl,
466+
struct file_ra_state *ra, unsigned int new_order)
467+
{
468+
struct address_space *mapping = ractl->mapping;
469+
pgoff_t index = readahead_index(ractl);
470+
pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT;
471+
pgoff_t mark = index + ra->size - ra->async_size;
472+
int err = 0;
473+
gfp_t gfp = readahead_gfp_mask(mapping);
474+
475+
if (!mapping_large_folio_support(mapping) || ra->size < 4)
476+
goto fallback;
477+
478+
limit = min(limit, index + ra->size - 1);
479+
480+
if (new_order < MAX_PAGECACHE_ORDER) {
481+
new_order += 2;
482+
if (new_order > MAX_PAGECACHE_ORDER)
483+
new_order = MAX_PAGECACHE_ORDER;
484+
while ((1 << new_order) > ra->size)
485+
new_order--;
486+
}
487+
488+
while (index <= limit) {
489+
unsigned int order = new_order;
490+
491+
/* Align with smaller pages if needed */
492+
if (index & ((1UL << order) - 1)) {
493+
order = __ffs(index);
494+
if (order == 1)
495+
order = 0;
496+
}
497+
/* Don't allocate pages past EOF */
498+
while (index + (1UL << order) - 1 > limit) {
499+
if (--order == 1)
500+
order = 0;
501+
}
502+
err = ra_alloc_folio(ractl, index, mark, order, gfp);
503+
if (err)
504+
break;
505+
index += 1UL << order;
506+
}
507+
508+
if (index > limit) {
509+
ra->size += index - limit - 1;
510+
ra->async_size += index - limit - 1;
511+
}
512+
513+
read_pages(ractl, NULL, false);
514+
515+
/*
516+
* If there were already pages in the page cache, then we may have
517+
* left some gaps. Let the regular readahead code take care of this
518+
* situation.
519+
*/
520+
if (!err)
521+
return;
522+
fallback:
523+
do_page_cache_ra(ractl, ra->size, ra->async_size);
524+
}
525+
434526
/*
435527
* A minimal readahead algorithm for trivial sequential/random reads.
436528
*/
437529
static void ondemand_readahead(struct readahead_control *ractl,
438-
bool hit_readahead_marker, unsigned long req_size)
530+
struct folio *folio, unsigned long req_size)
439531
{
440532
struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host);
441533
struct file_ra_state *ra = ractl->ra;
@@ -470,12 +562,12 @@ static void ondemand_readahead(struct readahead_control *ractl,
470562
}
471563

472564
/*
473-
* Hit a marked page without valid readahead state.
565+
* Hit a marked folio without valid readahead state.
474566
* E.g. interleaved reads.
475567
* Query the pagecache for async_size, which normally equals to
476568
* readahead size. Ramp it up and use it as the new readahead size.
477569
*/
478-
if (hit_readahead_marker) {
570+
if (folio) {
479571
pgoff_t start;
480572

481573
rcu_read_lock();
@@ -548,7 +640,7 @@ static void ondemand_readahead(struct readahead_control *ractl,
548640
}
549641

550642
ractl->_index = ra->start;
551-
do_page_cache_ra(ractl, ra->size, ra->async_size);
643+
page_cache_ra_order(ractl, ra, folio ? folio_order(folio) : 0);
552644
}
553645

554646
void page_cache_sync_ra(struct readahead_control *ractl,
@@ -576,7 +668,7 @@ void page_cache_sync_ra(struct readahead_control *ractl,
576668
}
577669

578670
/* do read-ahead */
579-
ondemand_readahead(ractl, false, req_count);
671+
ondemand_readahead(ractl, NULL, req_count);
580672
}
581673
EXPORT_SYMBOL_GPL(page_cache_sync_ra);
582674

@@ -605,7 +697,7 @@ void page_cache_async_ra(struct readahead_control *ractl,
605697
return;
606698

607699
/* do read-ahead */
608-
ondemand_readahead(ractl, true, req_count);
700+
ondemand_readahead(ractl, folio, req_count);
609701
}
610702
EXPORT_SYMBOL_GPL(page_cache_async_ra);
611703

0 commit comments

Comments
 (0)