@@ -60,10 +60,11 @@ pub struct SplitStr<'a> {
6060 inp : & ' a str ,
6161
6262 /// Initially points to the first byte of the `inp`-buffer. In case `ok_s` is
63- /// very long and has `>=ok_s_len_max`, the iterator stops and sends out
64- /// `ok_s`. Then `inp_start_p` is moved to the first byte after `ok_s` so that
65- /// the next `next()` deals with the rest of the string. This way the second
66- /// half will be identified to be the continuation of the first part.
63+ /// very long and has `>=ok_char_nb_max` characters, the iterator stops and
64+ /// sends out `ok_s`. Then `inp_start_p` is moved to the first byte after
65+ /// `ok_s` so that the next `next()` deals with the rest of the string. This
66+ /// way the second half will be identified to be the continuation of the
67+ /// first part.
6768 inp_start_p : * const u8 ,
6869
6970 /// Points to the first byte after the end of `inp` buffer.
@@ -75,7 +76,7 @@ pub struct SplitStr<'a> {
7576
7677 /// Criteria that influences the search performed by `next()`. Normally only
7778 /// substrings larger than `>=chars_min_nb` will be returned by `next()`.
78- /// This rule concerning only substrings touching one o fthe `inp` buffer
79+ /// This rule concerning only substrings touching one of the `inp` buffer
7980 /// boundaries has 2 exceptions:
8081 ///
8182 /// 1. When `last_s_was_maybe_cut` is set and
@@ -109,12 +110,8 @@ pub struct SplitStr<'a> {
109110 utf8f : Utf8Filter ,
110111
111112 /// This imposes an additional constraint to the iterator and instructs him
112- /// to never return substrings longer than `s_len_max`. Usually this is equal
113- /// the `inp`-buffer's length, but there can be exceptions of longer
114- /// `inp`-buffers. For example when the previous run has left some
115- /// non-treated `left_over` bytes which are then prepended to the
116- /// `inp`-buffer. In the worst case, such an `inp` is then twice as large.
117- s_len_max : usize ,
113+ /// to never return substrings longer than `s_char_nb_max`.
114+ s_char_nb_max : usize ,
118115}
119116
120117/// This enum describes result variants of the `SplitStr::next()` output.
@@ -169,7 +166,7 @@ impl<'a> SplitStr<'a> {
169166 last_s_was_maybe_cut : bool ,
170167 invalid_bytes_after_inp : bool ,
171168 utf8f : Utf8Filter ,
172- s_len_max : usize ,
169+ s_char_nb_max : usize ,
173170 ) -> SplitStr {
174171 unsafe {
175172 SplitStr {
@@ -187,7 +184,7 @@ impl<'a> SplitStr<'a> {
187184 // We will set this to false later, if `utf8f.grep_char` requires some
188185 // additional checking.
189186 utf8f,
190- s_len_max ,
187+ s_char_nb_max ,
191188 }
192189 }
193190 }
@@ -209,24 +206,21 @@ impl<'a> Iterator for SplitStr<'a> {
209206 let mut ok_s_len = 0usize ;
210207 let mut ok_char_nb = 0usize ;
211208 // The longest `ok_s` we want to return in one `next()` iteration is
212- // of length `ok_s_len_max`, which the usual `inp`-buffer size
213- // when no extra bytes are prepended.
209+ // of length `ok_char_nb_max`.
214210 // When we return such a maximum length string, we
215- // keep the rest in `inp` for `next()`. Such a long string can only
216- // appear, when some bytes form the last run had been prepended to
217- // 'inp'.
218- let ok_s_len_max = self . s_len_max ;
211+ // keep the rest in `inp` for `next()`.
212+ let ok_char_nb_max = self . s_char_nb_max ;
219213
220214 // The following loop has 4 exits:
221215 // 1. We finished the whole buffer: `self.p >= self.inp`
222- // 2. A long string was found: `ok_s_len > ok_s_len_max `,
216+ // 2. A long string was found: `ok_char_nb > ok_char_nb_max `,
223217 // `p` points to the first of the remaining bytes, left
224218 // for the next `next()` run.
225219 // 3. We found a substring at the beginning of the buffer;
226220 // 4. We found a substring in somewhere in middle of the buffer;
227221
228222 // Exit 1. and 2.
229- while self . p < self . inp_end_p && ok_s_len < ok_s_len_max {
223+ while self . p < self . inp_end_p && ok_char_nb < ok_char_nb_max {
230224 // We do not need an additional boundary check, because we
231225 // know from above that there is at least one character in
232226 // `inp` and there are only valid UTF-8 in here.
@@ -314,18 +308,14 @@ impl<'a> Iterator for SplitStr<'a> {
314308 // Exit 2 or 3:
315309 let s_touches_right_boundary = unsafe { ok_s_p. add ( ok_s_len) } >= self . inp_end_p ;
316310
317- let s_is_maybe_cut =
318- ok_s_len >= ok_s_len_max || ( s_touches_right_boundary && !self . invalid_bytes_after_inp ) ;
311+ let s_is_maybe_cut = ok_char_nb >= ok_char_nb_max
312+ || ( s_touches_right_boundary && !self . invalid_bytes_after_inp ) ;
319313 let s_completes_previous_s = s_touches_left_boundary && self . last_s_was_maybe_cut ;
320314
321315 // With this flag we tell the caller, that he should not immediately
322316 // print the returned string, but rather insert it at the the beginning
323317 // of the next input buffer and decode and run `SplitStr` again.
324318 //
325- // Note, we require, that `ok_s_len` is at least 1 byte SMALLER then
326- // `self.s_len_max` (`ok_s_len < self.s_len_max`). This way
327- // we print strings that fill the whole output line directly.
328- //
329319 // Note, `&& !s_completes_previous_s` guarantees, that
330320 // `s_is_to_be_filtered_again` is only set out for the first part
331321 // of a longer cut string. We only want the first part of string to be
@@ -341,23 +331,23 @@ impl<'a> Iterator for SplitStr<'a> {
341331 // 2. When the first part (==`!not_completes_previous`) of a longer
342332 // string who touches the right buffer boundary
343333 // (`==s_touches_right_boundary`) did start somewhere in the middle of
344- // the buffer (==`ok_s_len < self.s_len_max `). We actually could
334+ // the buffer (==`ok_char_nb < self.s_char_nb_max `). We actually could
345335 // print it out now, because it has the minimum length, but we want to
346336 // print the beginning of a every string as long as possible (approx
347- // `output_line_length `). Instead, we rather set
337+ // `output_line_char_nb_max `). Instead, we rather set
348338 // `s_is_to_be_filtered_again` instruction the caller to insert
349339 // this string at the beginning of the next buffer. Doing so, we
350340 // guarantee, that string beginnings are always assembled, even if they
351341 // crossed buffer boundaries. Thus, the user can pipe the output of
352342 // `stringsext` through additional filters, e.g. searching for
353343 // particular patterns.
354344 //
355- // As `ok_char_nb < chars_min_nb` is part of `ok_s_len < self.s_len_max `
345+ // As `ok_char_nb < chars_min_nb` is part of `ok_s_len < self.s_char_nb_max `
356346 // we do not need to add this condition explicitly below.
357347 let s_is_to_be_filtered_again = !s_completes_previous_s
358348 && s_touches_right_boundary
359349 && !self . invalid_bytes_after_inp
360- && ( ok_s_len < self . s_len_max || !grep_char_ok) ;
350+ && ( ok_char_nb < self . s_char_nb_max || !grep_char_ok) ;
361351
362352 let s_satisfies_min_char_rule = ok_char_nb >= self . chars_min_nb as usize ;
363353 let s_satisfies_grep_char_rule = grep_char_ok;
@@ -383,7 +373,7 @@ impl<'a> Iterator for SplitStr<'a> {
383373 } ;
384374
385375 // Exit was 2: prepare the inner state for the next `next()` run.
386- if ok_s_len >= ok_s_len_max {
376+ if ok_char_nb >= ok_char_nb_max {
387377 self . inp_start_p = self . p ;
388378 } ;
389379 self . last_s_was_maybe_cut = s_is_maybe_cut;
0 commit comments