@@ -641,6 +641,8 @@ unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> {
641
641
}
642
642
StrSearcherImpl :: TwoWay ( ref mut searcher) => {
643
643
let is_long = searcher. memory == usize:: MAX ;
644
+ // write out `true` and `false` cases to encourage the compiler
645
+ // to specialize the two cases separately.
644
646
if is_long {
645
647
searcher. next :: < MatchOnly > ( self . haystack . as_bytes ( ) ,
646
648
self . needle . as_bytes ( ) ,
@@ -653,8 +655,8 @@ unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b> {
653
655
}
654
656
}
655
657
}
656
-
657
658
}
659
+
658
660
unsafe impl < ' a , ' b > ReverseSearcher < ' a > for StrSearcher < ' a , ' b > {
659
661
#[ inline]
660
662
fn next_back ( & mut self ) -> SearchStep {
@@ -709,6 +711,7 @@ unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> {
709
711
}
710
712
StrSearcherImpl :: TwoWay ( ref mut searcher) => {
711
713
let is_long = searcher. memory == usize:: MAX ;
714
+ // write out `true` and `false`, like `next_match`
712
715
if is_long {
713
716
searcher. next_back :: < MatchOnly > ( self . haystack . as_bytes ( ) ,
714
717
self . needle . as_bytes ( ) ,
@@ -723,8 +726,7 @@ unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b> {
723
726
}
724
727
}
725
728
726
- /// The internal state of an iterator that searches for matches of a substring
727
- /// within a larger string using two-way search
729
+ /// The internal state of the two-way substring search algorithm.
728
730
#[ derive( Clone , Debug ) ]
729
731
struct TwoWaySearcher {
730
732
// constants
@@ -741,7 +743,9 @@ struct TwoWaySearcher {
741
743
// variables
742
744
position : usize ,
743
745
end : usize ,
746
+ /// index into needle before which we have already matched
744
747
memory : usize ,
748
+ /// index into needle after which we have already matched
745
749
memory_back : usize ,
746
750
}
747
751
@@ -841,9 +845,6 @@ impl TwoWaySearcher {
841
845
// is large.
842
846
if & needle[ ..crit_pos] == & needle[ period.. period + crit_pos] {
843
847
// short period case -- the period is exact
844
- let byteset = needle[ ..period] . iter ( )
845
- . fold ( 0 , |a, & b| ( 1 << ( b & 0x3f ) ) | a) ;
846
-
847
848
// compute a separate critical factorization for the reversed needle
848
849
// x = u' v' where |v'| < period(x).
849
850
//
@@ -860,26 +861,26 @@ impl TwoWaySearcher {
860
861
crit_pos : crit_pos,
861
862
crit_pos_back : crit_pos_back,
862
863
period : period,
863
- byteset : byteset ,
864
+ byteset : Self :: byteset_create ( & needle [ ..period ] ) ,
864
865
865
866
position : 0 ,
866
867
end : end,
867
868
memory : 0 ,
868
- // memory_back after which we have already matched
869
869
memory_back : needle. len ( ) ,
870
870
}
871
871
} else {
872
872
// long period case -- we have an approximation to the actual period,
873
873
// and don't use memorization.
874
-
875
- let byteset = needle. iter ( )
876
- . fold ( 0 , |a, & b| ( 1 << ( b & 0x3f ) ) | a) ;
874
+ //
875
+ // Approximate the period by lower bound max(|u|, |v|) + 1.
876
+ // The critical factorization is efficient to use for both forward and
877
+ // reverse search.
877
878
878
879
TwoWaySearcher {
879
880
crit_pos : crit_pos,
880
881
crit_pos_back : crit_pos,
881
882
period : cmp:: max ( crit_pos, needle. len ( ) - crit_pos) + 1 ,
882
- byteset : byteset ,
883
+ byteset : Self :: byteset_create ( needle ) ,
883
884
884
885
position : 0 ,
885
886
end : end,
@@ -889,6 +890,11 @@ impl TwoWaySearcher {
889
890
}
890
891
}
891
892
893
+ #[ inline]
894
+ fn byteset_create ( bytes : & [ u8 ] ) -> u64 {
895
+ bytes. iter ( ) . fold ( 0 , |a, & b| ( 1 << ( b & 0x3f ) ) | a)
896
+ }
897
+
892
898
#[ inline( always) ]
893
899
fn byteset_contains ( & self , byte : u8 ) -> bool {
894
900
( self . byteset >> ( ( byte & 0x3f ) as usize ) ) & 1 != 0
@@ -976,9 +982,9 @@ impl TwoWaySearcher {
976
982
// and local_period(u, v) = local_period(reverse(v), reverse(u)), so if (u, v)
977
983
// is a critical factorization, so is (reverse(v), reverse(u)).
978
984
//
979
- // For the short period case, using memorization, we rely on |u| < period(x).
980
- // For this case we have computed a critical factorization x = u' v'
981
- // where |v'| < period(x) instead (field `crit_pos_back`) .
985
+ // For the reverse case we have computed a critical factorization x = u' v'
986
+ // (field `crit_pos_back`). We need |u| < period(x) for the forward case and
987
+ // thus |v'| < period(x) for the reverse .
982
988
//
983
989
// To search in reverse through the haystack, we search forward through
984
990
// a reversed haystack with a reversed needle, matching first u' and then v'.
@@ -1018,7 +1024,7 @@ impl TwoWaySearcher {
1018
1024
1019
1025
// See if the left part of the needle matches
1020
1026
let crit = if long_period { self . crit_pos_back }
1021
- else { cmp:: min ( self . crit_pos_back , self . memory_back ) } ;
1027
+ else { cmp:: min ( self . crit_pos_back , self . memory_back ) } ;
1022
1028
for i in ( 0 ..crit) . rev ( ) {
1023
1029
if needle[ i] != haystack[ self . end - needle. len ( ) + i] {
1024
1030
self . end -= self . crit_pos_back - i;
@@ -1031,7 +1037,7 @@ impl TwoWaySearcher {
1031
1037
1032
1038
// See if the right part of the needle matches
1033
1039
let needle_end = if long_period { needle. len ( ) }
1034
- else { self . memory_back } ;
1040
+ else { self . memory_back } ;
1035
1041
for i in self . crit_pos_back ..needle_end {
1036
1042
if needle[ i] != haystack[ self . end - needle. len ( ) + i] {
1037
1043
self . end -= self . period ;
@@ -1070,7 +1076,8 @@ impl TwoWaySearcher {
1070
1076
fn maximal_suffix ( arr : & [ u8 ] , order_greater : bool ) -> ( usize , usize ) {
1071
1077
let mut left = 0 ; // Corresponds to i in the paper
1072
1078
let mut right = 1 ; // Corresponds to j in the paper
1073
- let mut offset = 0 ; // Corresponds to k in the paper
1079
+ let mut offset = 0 ; // Corresponds to k in the paper, but starting at 0
1080
+ // to match 0-based indexing.
1074
1081
let mut period = 1 ; // Corresponds to p in the paper
1075
1082
1076
1083
while let Some ( & a) = arr. get ( right + offset) {
@@ -1117,7 +1124,8 @@ impl TwoWaySearcher {
1117
1124
{
1118
1125
let mut left = 0 ; // Corresponds to i in the paper
1119
1126
let mut right = 1 ; // Corresponds to j in the paper
1120
- let mut offset = 0 ; // Corresponds to k in the paper
1127
+ let mut offset = 0 ; // Corresponds to k in the paper, but starting at 0
1128
+ // to match 0-based indexing.
1121
1129
let mut period = 1 ; // Corresponds to p in the paper
1122
1130
let n = arr. len ( ) ;
1123
1131
0 commit comments