@@ -148,7 +148,7 @@ static void read_pages(struct readahead_control *rac, struct list_head *pages,
148
148
149
149
blk_finish_plug (& plug );
150
150
151
- BUG_ON (!list_empty (pages ));
151
+ BUG_ON (pages && !list_empty (pages ));
152
152
BUG_ON (readahead_count (rac ));
153
153
154
154
out :
@@ -431,11 +431,103 @@ static int try_context_readahead(struct address_space *mapping,
431
431
return 1 ;
432
432
}
433
433
434
+ /*
435
+ * There are some parts of the kernel which assume that PMD entries
436
+ * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
437
+ * limit the maximum allocation order to PMD size. I'm not aware of any
438
+ * assumptions about maximum order if THP are disabled, but 8 seems like
439
+ * a good order (that's 1MB if you're using 4kB pages)
440
+ */
441
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
442
+ #define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
443
+ #else
444
+ #define MAX_PAGECACHE_ORDER 8
445
+ #endif
446
+
447
+ static inline int ra_alloc_folio (struct readahead_control * ractl , pgoff_t index ,
448
+ pgoff_t mark , unsigned int order , gfp_t gfp )
449
+ {
450
+ int err ;
451
+ struct folio * folio = filemap_alloc_folio (gfp , order );
452
+
453
+ if (!folio )
454
+ return - ENOMEM ;
455
+ if (mark - index < (1UL << order ))
456
+ folio_set_readahead (folio );
457
+ err = filemap_add_folio (ractl -> mapping , folio , index , gfp );
458
+ if (err )
459
+ folio_put (folio );
460
+ else
461
+ ractl -> _nr_pages += 1UL << order ;
462
+ return err ;
463
+ }
464
+
465
+ static void page_cache_ra_order (struct readahead_control * ractl ,
466
+ struct file_ra_state * ra , unsigned int new_order )
467
+ {
468
+ struct address_space * mapping = ractl -> mapping ;
469
+ pgoff_t index = readahead_index (ractl );
470
+ pgoff_t limit = (i_size_read (mapping -> host ) - 1 ) >> PAGE_SHIFT ;
471
+ pgoff_t mark = index + ra -> size - ra -> async_size ;
472
+ int err = 0 ;
473
+ gfp_t gfp = readahead_gfp_mask (mapping );
474
+
475
+ if (!mapping_large_folio_support (mapping ) || ra -> size < 4 )
476
+ goto fallback ;
477
+
478
+ limit = min (limit , index + ra -> size - 1 );
479
+
480
+ if (new_order < MAX_PAGECACHE_ORDER ) {
481
+ new_order += 2 ;
482
+ if (new_order > MAX_PAGECACHE_ORDER )
483
+ new_order = MAX_PAGECACHE_ORDER ;
484
+ while ((1 << new_order ) > ra -> size )
485
+ new_order -- ;
486
+ }
487
+
488
+ while (index <= limit ) {
489
+ unsigned int order = new_order ;
490
+
491
+ /* Align with smaller pages if needed */
492
+ if (index & ((1UL << order ) - 1 )) {
493
+ order = __ffs (index );
494
+ if (order == 1 )
495
+ order = 0 ;
496
+ }
497
+ /* Don't allocate pages past EOF */
498
+ while (index + (1UL << order ) - 1 > limit ) {
499
+ if (-- order == 1 )
500
+ order = 0 ;
501
+ }
502
+ err = ra_alloc_folio (ractl , index , mark , order , gfp );
503
+ if (err )
504
+ break ;
505
+ index += 1UL << order ;
506
+ }
507
+
508
+ if (index > limit ) {
509
+ ra -> size += index - limit - 1 ;
510
+ ra -> async_size += index - limit - 1 ;
511
+ }
512
+
513
+ read_pages (ractl , NULL , false);
514
+
515
+ /*
516
+ * If there were already pages in the page cache, then we may have
517
+ * left some gaps. Let the regular readahead code take care of this
518
+ * situation.
519
+ */
520
+ if (!err )
521
+ return ;
522
+ fallback :
523
+ do_page_cache_ra (ractl , ra -> size , ra -> async_size );
524
+ }
525
+
434
526
/*
435
527
* A minimal readahead algorithm for trivial sequential/random reads.
436
528
*/
437
529
static void ondemand_readahead (struct readahead_control * ractl ,
438
- bool hit_readahead_marker , unsigned long req_size )
530
+ struct folio * folio , unsigned long req_size )
439
531
{
440
532
struct backing_dev_info * bdi = inode_to_bdi (ractl -> mapping -> host );
441
533
struct file_ra_state * ra = ractl -> ra ;
@@ -470,12 +562,12 @@ static void ondemand_readahead(struct readahead_control *ractl,
470
562
}
471
563
472
564
/*
473
- * Hit a marked page without valid readahead state.
565
+ * Hit a marked folio without valid readahead state.
474
566
* E.g. interleaved reads.
475
567
* Query the pagecache for async_size, which normally equals to
476
568
* readahead size. Ramp it up and use it as the new readahead size.
477
569
*/
478
- if (hit_readahead_marker ) {
570
+ if (folio ) {
479
571
pgoff_t start ;
480
572
481
573
rcu_read_lock ();
@@ -548,7 +640,7 @@ static void ondemand_readahead(struct readahead_control *ractl,
548
640
}
549
641
550
642
ractl -> _index = ra -> start ;
551
- do_page_cache_ra (ractl , ra -> size , ra -> async_size );
643
+ page_cache_ra_order (ractl , ra , folio ? folio_order ( folio ) : 0 );
552
644
}
553
645
554
646
void page_cache_sync_ra (struct readahead_control * ractl ,
@@ -576,7 +668,7 @@ void page_cache_sync_ra(struct readahead_control *ractl,
576
668
}
577
669
578
670
/* do read-ahead */
579
- ondemand_readahead (ractl , false , req_count );
671
+ ondemand_readahead (ractl , NULL , req_count );
580
672
}
581
673
EXPORT_SYMBOL_GPL (page_cache_sync_ra );
582
674
@@ -605,7 +697,7 @@ void page_cache_async_ra(struct readahead_control *ractl,
605
697
return ;
606
698
607
699
/* do read-ahead */
608
- ondemand_readahead (ractl , true , req_count );
700
+ ondemand_readahead (ractl , folio , req_count );
609
701
}
610
702
EXPORT_SYMBOL_GPL (page_cache_async_ra );
611
703
0 commit comments