@@ -329,6 +329,7 @@ EXPORT_SYMBOL(hmm_mirror_unregister);
329329
330330struct hmm_vma_walk {
331331 struct hmm_range * range ;
332+ struct dev_pagemap * pgmap ;
332333 unsigned long last ;
333334 bool fault ;
334335 bool block ;
@@ -503,12 +504,22 @@ static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd)
503504 range -> flags [HMM_PFN_VALID ];
504505}
505506
507+ static inline uint64_t pud_to_hmm_pfn_flags (struct hmm_range * range , pud_t pud )
508+ {
509+ if (!pud_present (pud ))
510+ return 0 ;
511+ return pud_write (pud ) ? range -> flags [HMM_PFN_VALID ] |
512+ range -> flags [HMM_PFN_WRITE ] :
513+ range -> flags [HMM_PFN_VALID ];
514+ }
515+
506516static int hmm_vma_handle_pmd (struct mm_walk * walk ,
507517 unsigned long addr ,
508518 unsigned long end ,
509519 uint64_t * pfns ,
510520 pmd_t pmd )
511521{
522+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
512523 struct hmm_vma_walk * hmm_vma_walk = walk -> private ;
513524 struct hmm_range * range = hmm_vma_walk -> range ;
514525 unsigned long pfn , npages , i ;
@@ -524,10 +535,25 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk,
524535 return hmm_vma_walk_hole_ (addr , end , fault , write_fault , walk );
525536
526537 pfn = pmd_pfn (pmd ) + pte_index (addr );
527- for (i = 0 ; addr < end ; addr += PAGE_SIZE , i ++ , pfn ++ )
538+ for (i = 0 ; addr < end ; addr += PAGE_SIZE , i ++ , pfn ++ ) {
539+ if (pmd_devmap (pmd )) {
540+ hmm_vma_walk -> pgmap = get_dev_pagemap (pfn ,
541+ hmm_vma_walk -> pgmap );
542+ if (unlikely (!hmm_vma_walk -> pgmap ))
543+ return - EBUSY ;
544+ }
528545 pfns [i ] = hmm_pfn_from_pfn (range , pfn ) | cpu_flags ;
546+ }
547+ if (hmm_vma_walk -> pgmap ) {
548+ put_dev_pagemap (hmm_vma_walk -> pgmap );
549+ hmm_vma_walk -> pgmap = NULL ;
550+ }
529551 hmm_vma_walk -> last = end ;
530552 return 0 ;
553+ #else
554+ /* If THP is not enabled then we should never reach that code ! */
555+ return - EINVAL ;
556+ #endif
531557}
532558
533559static inline uint64_t pte_to_hmm_pfn_flags (struct hmm_range * range , pte_t pte )
@@ -612,10 +638,24 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
612638 if (fault || write_fault )
613639 goto fault ;
614640
641+ if (pte_devmap (pte )) {
642+ hmm_vma_walk -> pgmap = get_dev_pagemap (pte_pfn (pte ),
643+ hmm_vma_walk -> pgmap );
644+ if (unlikely (!hmm_vma_walk -> pgmap ))
645+ return - EBUSY ;
646+ } else if (IS_ENABLED (CONFIG_ARCH_HAS_PTE_SPECIAL ) && pte_special (pte )) {
647+ * pfn = range -> values [HMM_PFN_SPECIAL ];
648+ return - EFAULT ;
649+ }
650+
615651 * pfn = hmm_pfn_from_pfn (range , pte_pfn (pte )) | cpu_flags ;
616652 return 0 ;
617653
618654fault :
655+ if (hmm_vma_walk -> pgmap ) {
656+ put_dev_pagemap (hmm_vma_walk -> pgmap );
657+ hmm_vma_walk -> pgmap = NULL ;
658+ }
619659 pte_unmap (ptep );
620660 /* Fault any virtual address we were asked to fault */
621661 return hmm_vma_walk_hole_ (addr , end , fault , write_fault , walk );
@@ -703,12 +743,93 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
703743 return r ;
704744 }
705745 }
746+ if (hmm_vma_walk -> pgmap ) {
747+ /*
748+ * We do put_dev_pagemap() here and not in hmm_vma_handle_pte()
749+ * so that we can leverage get_dev_pagemap() optimization which
750+ * will not re-take a reference on a pgmap if we already have
751+ * one.
752+ */
753+ put_dev_pagemap (hmm_vma_walk -> pgmap );
754+ hmm_vma_walk -> pgmap = NULL ;
755+ }
706756 pte_unmap (ptep - 1 );
707757
708758 hmm_vma_walk -> last = addr ;
709759 return 0 ;
710760}
711761
762+ static int hmm_vma_walk_pud (pud_t * pudp ,
763+ unsigned long start ,
764+ unsigned long end ,
765+ struct mm_walk * walk )
766+ {
767+ struct hmm_vma_walk * hmm_vma_walk = walk -> private ;
768+ struct hmm_range * range = hmm_vma_walk -> range ;
769+ unsigned long addr = start , next ;
770+ pmd_t * pmdp ;
771+ pud_t pud ;
772+ int ret ;
773+
774+ again :
775+ pud = READ_ONCE (* pudp );
776+ if (pud_none (pud ))
777+ return hmm_vma_walk_hole (start , end , walk );
778+
779+ if (pud_huge (pud ) && pud_devmap (pud )) {
780+ unsigned long i , npages , pfn ;
781+ uint64_t * pfns , cpu_flags ;
782+ bool fault , write_fault ;
783+
784+ if (!pud_present (pud ))
785+ return hmm_vma_walk_hole (start , end , walk );
786+
787+ i = (addr - range -> start ) >> PAGE_SHIFT ;
788+ npages = (end - addr ) >> PAGE_SHIFT ;
789+ pfns = & range -> pfns [i ];
790+
791+ cpu_flags = pud_to_hmm_pfn_flags (range , pud );
792+ hmm_range_need_fault (hmm_vma_walk , pfns , npages ,
793+ cpu_flags , & fault , & write_fault );
794+ if (fault || write_fault )
795+ return hmm_vma_walk_hole_ (addr , end , fault ,
796+ write_fault , walk );
797+
798+ #ifdef CONFIG_HUGETLB_PAGE
799+ pfn = pud_pfn (pud ) + ((addr & ~PUD_MASK ) >> PAGE_SHIFT );
800+ for (i = 0 ; i < npages ; ++ i , ++ pfn ) {
801+ hmm_vma_walk -> pgmap = get_dev_pagemap (pfn ,
802+ hmm_vma_walk -> pgmap );
803+ if (unlikely (!hmm_vma_walk -> pgmap ))
804+ return - EBUSY ;
805+ pfns [i ] = hmm_pfn_from_pfn (range , pfn ) | cpu_flags ;
806+ }
807+ if (hmm_vma_walk -> pgmap ) {
808+ put_dev_pagemap (hmm_vma_walk -> pgmap );
809+ hmm_vma_walk -> pgmap = NULL ;
810+ }
811+ hmm_vma_walk -> last = end ;
812+ return 0 ;
813+ #else
814+ return - EINVAL ;
815+ #endif
816+ }
817+
818+ split_huge_pud (walk -> vma , pudp , addr );
819+ if (pud_none (* pudp ))
820+ goto again ;
821+
822+ pmdp = pmd_offset (pudp , addr );
823+ do {
824+ next = pmd_addr_end (addr , end );
825+ ret = hmm_vma_walk_pmd (pmdp , addr , next , walk );
826+ if (ret )
827+ return ret ;
828+ } while (pmdp ++ , addr = next , addr != end );
829+
830+ return 0 ;
831+ }
832+
712833static int hmm_vma_walk_hugetlb_entry (pte_t * pte , unsigned long hmask ,
713834 unsigned long start , unsigned long end ,
714835 struct mm_walk * walk )
@@ -781,14 +902,6 @@ static void hmm_pfns_clear(struct hmm_range *range,
781902 * pfns = range -> values [HMM_PFN_NONE ];
782903}
783904
784- static void hmm_pfns_special (struct hmm_range * range )
785- {
786- unsigned long addr = range -> start , i = 0 ;
787-
788- for (; addr < range -> end ; addr += PAGE_SIZE , i ++ )
789- range -> pfns [i ] = range -> values [HMM_PFN_SPECIAL ];
790- }
791-
792905/*
793906 * hmm_range_register() - start tracking change to CPU page table over a range
794907 * @range: range
@@ -906,12 +1019,6 @@ long hmm_range_snapshot(struct hmm_range *range)
9061019 if (vma == NULL || (vma -> vm_flags & device_vma ))
9071020 return - EFAULT ;
9081021
909- /* FIXME support dax */
910- if (vma_is_dax (vma )) {
911- hmm_pfns_special (range );
912- return - EINVAL ;
913- }
914-
9151022 if (is_vm_hugetlb_page (vma )) {
9161023 struct hstate * h = hstate_vma (vma );
9171024
@@ -935,6 +1042,7 @@ long hmm_range_snapshot(struct hmm_range *range)
9351042 }
9361043
9371044 range -> vma = vma ;
1045+ hmm_vma_walk .pgmap = NULL ;
9381046 hmm_vma_walk .last = start ;
9391047 hmm_vma_walk .fault = false;
9401048 hmm_vma_walk .range = range ;
@@ -946,6 +1054,7 @@ long hmm_range_snapshot(struct hmm_range *range)
9461054 mm_walk .pte_entry = NULL ;
9471055 mm_walk .test_walk = NULL ;
9481056 mm_walk .hugetlb_entry = NULL ;
1057+ mm_walk .pud_entry = hmm_vma_walk_pud ;
9491058 mm_walk .pmd_entry = hmm_vma_walk_pmd ;
9501059 mm_walk .pte_hole = hmm_vma_walk_hole ;
9511060 mm_walk .hugetlb_entry = hmm_vma_walk_hugetlb_entry ;
@@ -1011,12 +1120,6 @@ long hmm_range_fault(struct hmm_range *range, bool block)
10111120 if (vma == NULL || (vma -> vm_flags & device_vma ))
10121121 return - EFAULT ;
10131122
1014- /* FIXME support dax */
1015- if (vma_is_dax (vma )) {
1016- hmm_pfns_special (range );
1017- return - EINVAL ;
1018- }
1019-
10201123 if (is_vm_hugetlb_page (vma )) {
10211124 if (huge_page_shift (hstate_vma (vma )) !=
10221125 range -> page_shift &&
@@ -1039,6 +1142,7 @@ long hmm_range_fault(struct hmm_range *range, bool block)
10391142 }
10401143
10411144 range -> vma = vma ;
1145+ hmm_vma_walk .pgmap = NULL ;
10421146 hmm_vma_walk .last = start ;
10431147 hmm_vma_walk .fault = true;
10441148 hmm_vma_walk .block = block ;
@@ -1051,6 +1155,7 @@ long hmm_range_fault(struct hmm_range *range, bool block)
10511155 mm_walk .pte_entry = NULL ;
10521156 mm_walk .test_walk = NULL ;
10531157 mm_walk .hugetlb_entry = NULL ;
1158+ mm_walk .pud_entry = hmm_vma_walk_pud ;
10541159 mm_walk .pmd_entry = hmm_vma_walk_pmd ;
10551160 mm_walk .pte_hole = hmm_vma_walk_hole ;
10561161 mm_walk .hugetlb_entry = hmm_vma_walk_hugetlb_entry ;
0 commit comments