@@ -9,13 +9,27 @@ use fluent_uri::UriRef;
99use percent_encoding:: percent_decode_str;
1010use std:: fmt:: Write ;
1111
12- fn is_cat1 ( c : char ) -> bool {
12+ /// Returns true for Go net/url's "category 1" characters:
13+ /// ASCII bytes that always trigger escaping in URLs (plus space and quote).
14+ fn is_go_url_escape_cat1 ( c : char ) -> bool {
1315 matches ! (
1416 c,
1517 '\\' | '^' | '{' | '}' | '|' | '<' | '>' | '`' | ' ' | '"'
1618 )
1719}
1820
21+ /// Returns true for Go net/url's "category 2" characters for PATH contexts:
22+ /// characters Go may escape in paths when Cat1 is present or non-ASCII exists.
23+ fn is_go_url_escape_cat2_path ( c : char ) -> bool {
24+ matches ! ( c, '!' | '\'' | '(' | ')' | '*' | '[' | ']' )
25+ }
26+
27+ /// Returns true for Go net/url's "category 2" characters for FRAGMENT contexts:
28+ /// characters Go may escape in fragments when non-ASCII exists.
29+ fn is_go_url_escape_cat2_fragment ( c : char ) -> bool {
30+ matches ! ( c, '\'' | '[' | ']' )
31+ }
32+
1933fn hex_val ( b : u8 ) -> u8 {
2034 match b {
2135 b'0' ..=b'9' => b - b'0' ,
@@ -50,14 +64,6 @@ fn normalize_pct_encoded_unreserved(path: &str) -> String {
5064 out
5165}
5266
53- fn is_path_cat2 ( c : char ) -> bool {
54- matches ! ( c, '!' | '\'' | '(' | ')' | '*' | '[' | ']' )
55- }
56-
57- fn is_frag_cat2 ( c : char ) -> bool {
58- matches ! ( c, '\'' | '[' | ']' )
59- }
60-
6167fn encode_char ( out : & mut String , c : char ) {
6268 let mut buf = [ 0u8 ; 4 ] ;
6369 for & b in c. encode_utf8 ( & mut buf) . as_bytes ( ) {
@@ -106,7 +112,7 @@ pub fn obfuscate_url_string(
106112
107113 // Determine Go's escape() trigger: Cat1 or non-ASCII in path causes Cat2 encoding too
108114 let path = & url[ ..path_end] ;
109- let needs_full_path = path. bytes ( ) . any ( |b| b > 127 ) || path. chars ( ) . any ( is_cat1 ) ;
115+ let needs_full_path = path. bytes ( ) . any ( |b| b > 127 ) || path. chars ( ) . any ( is_go_url_escape_cat1 ) ;
110116 let frag_has_non_ascii = frag_pos. is_some_and ( |i| url[ i + 1 ..] . bytes ( ) . any ( |b| b > 127 ) ) ;
111117
112118 // Pre-encode chars that UriRef (strict RFC 3986) rejects.
@@ -118,7 +124,7 @@ pub fn obfuscate_url_string(
118124 for c in url[ ..path_end] . chars ( ) {
119125 if !c. is_ascii ( ) {
120126 encode_char ( & mut pre, c) ;
121- } else if is_cat1 ( c) || ( needs_full_path && is_path_cat2 ( c) ) {
127+ } else if is_go_url_escape_cat1 ( c) || ( needs_full_path && is_go_url_escape_cat2_path ( c) ) {
122128 let _ = write ! ( pre, "%{:02X}" , c as u8 ) ;
123129 } else {
124130 pre. push ( c) ;
@@ -131,8 +137,8 @@ pub fn obfuscate_url_string(
131137 || ( c as u32 ) < 0x20
132138 || c as u32 == 0x7F
133139 || c == '#'
134- || is_cat1 ( c)
135- || ( frag_has_non_ascii && is_frag_cat2 ( c) )
140+ || is_go_url_escape_cat1 ( c)
141+ || ( frag_has_non_ascii && is_go_url_escape_cat2_fragment ( c) )
136142 {
137143 encode_char ( & mut pre, c) ;
138144 } else {
0 commit comments