Skip to content

Commit 8db3fd4

Browse files
committed
fix(http): rename/document confusing cat1/cat2 functions
1 parent e498168 commit 8db3fd4

File tree

1 file changed

+19
-13
lines changed

1 file changed

+19
-13
lines changed

libdd-trace-obfuscation/src/http.rs

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,27 @@ use fluent_uri::UriRef;
99
use percent_encoding::percent_decode_str;
1010
use std::fmt::Write;
1111

12-
fn is_cat1(c: char) -> bool {
12+
/// Returns true for Go net/url's "category 1" characters:
13+
/// ASCII bytes that always trigger escaping in URLs (plus space and quote).
14+
fn is_go_url_escape_cat1(c: char) -> bool {
1315
matches!(
1416
c,
1517
'\\' | '^' | '{' | '}' | '|' | '<' | '>' | '`' | ' ' | '"'
1618
)
1719
}
1820

21+
/// Returns true for Go net/url's "category 2" characters for PATH contexts:
22+
/// characters Go may escape in paths when Cat1 is present or non-ASCII exists.
23+
fn is_go_url_escape_cat2_path(c: char) -> bool {
24+
matches!(c, '!' | '\'' | '(' | ')' | '*' | '[' | ']')
25+
}
26+
27+
/// Returns true for Go net/url's "category 2" characters for FRAGMENT contexts:
28+
/// characters Go may escape in fragments when non-ASCII exists.
29+
fn is_go_url_escape_cat2_fragment(c: char) -> bool {
30+
matches!(c, '\'' | '[' | ']')
31+
}
32+
1933
fn hex_val(b: u8) -> u8 {
2034
match b {
2135
b'0'..=b'9' => b - b'0',
@@ -50,14 +64,6 @@ fn normalize_pct_encoded_unreserved(path: &str) -> String {
5064
out
5165
}
5266

53-
fn is_path_cat2(c: char) -> bool {
54-
matches!(c, '!' | '\'' | '(' | ')' | '*' | '[' | ']')
55-
}
56-
57-
fn is_frag_cat2(c: char) -> bool {
58-
matches!(c, '\'' | '[' | ']')
59-
}
60-
6167
fn encode_char(out: &mut String, c: char) {
6268
let mut buf = [0u8; 4];
6369
for &b in c.encode_utf8(&mut buf).as_bytes() {
@@ -106,7 +112,7 @@ pub fn obfuscate_url_string(
106112

107113
// Determine Go's escape() trigger: Cat1 or non-ASCII in path causes Cat2 encoding too
108114
let path = &url[..path_end];
109-
let needs_full_path = path.bytes().any(|b| b > 127) || path.chars().any(is_cat1);
115+
let needs_full_path = path.bytes().any(|b| b > 127) || path.chars().any(is_go_url_escape_cat1);
110116
let frag_has_non_ascii = frag_pos.is_some_and(|i| url[i + 1..].bytes().any(|b| b > 127));
111117

112118
// Pre-encode chars that UriRef (strict RFC 3986) rejects.
@@ -118,7 +124,7 @@ pub fn obfuscate_url_string(
118124
for c in url[..path_end].chars() {
119125
if !c.is_ascii() {
120126
encode_char(&mut pre, c);
121-
} else if is_cat1(c) || (needs_full_path && is_path_cat2(c)) {
127+
} else if is_go_url_escape_cat1(c) || (needs_full_path && is_go_url_escape_cat2_path(c)) {
122128
let _ = write!(pre, "%{:02X}", c as u8);
123129
} else {
124130
pre.push(c);
@@ -131,8 +137,8 @@ pub fn obfuscate_url_string(
131137
|| (c as u32) < 0x20
132138
|| c as u32 == 0x7F
133139
|| c == '#'
134-
|| is_cat1(c)
135-
|| (frag_has_non_ascii && is_frag_cat2(c))
140+
|| is_go_url_escape_cat1(c)
141+
|| (frag_has_non_ascii && is_go_url_escape_cat2_fragment(c))
136142
{
137143
encode_char(&mut pre, c);
138144
} else {

0 commit comments

Comments
 (0)