Skip to content

Commit ddf1b5d

Browse files
schuayV8 LUCI CQ
authored andcommitted
[regexp] Fix a possible off-by-one in Trace::cp_offset
.. which was itself guaranteed to be within kMinCPOffset and kMaxCPOffset. But some callers then went on to subtract 1, therefore exceeding kMinCPOffset. Ideally, callers would check that their updated value is still in bounds, and take appropriate action if not. But unfortunately, some current callers (e.g. in RegExpMacroAssembler) *have* no appropriate action, since they cannot abort. So this CL instead takes the approach of adding a kCPOffsetSlack instead, making the range checks in Trace stricter in order to allow caller "misbehavior". We also change cp_offset range DCHECKs into CHECKs since I'm not fully convinced that we correctly observe bounds everywhere. Fixed: 451663011 Change-Id: Ib50685d00a490a2959880bdd2fbeae5228a55997 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/7037653 Reviewed-by: Patrick Thier <[email protected]> Commit-Queue: Jakob Linke <[email protected]> Cr-Commit-Position: refs/heads/main@{#103102}
1 parent 891d8f1 commit ddf1b5d

5 files changed

Lines changed: 27 additions & 4 deletions

File tree

src/regexp/regexp-bytecode-generator.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,8 +204,7 @@ void RegExpBytecodeGenerator::LoadCurrentCharacterImpl(int cp_offset,
204204
check_bounds = false; // Load below doesn't need to check.
205205
}
206206

207-
DCHECK_LE(kMinCPOffset, cp_offset);
208-
DCHECK_GE(kMaxCPOffset, cp_offset);
207+
CHECK(base::IsInRange(cp_offset, kMinCPOffset, kMaxCPOffset));
209208
int bytecode;
210209
if (check_bounds) {
211210
if (characters == 4) {

src/regexp/regexp-compiler.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2313,6 +2313,7 @@ void AssertionNode::BacktrackIfPrevious(
23132313
// If we've already checked that we are not at the start of input, it's okay
23142314
// to load the previous character without bounds checks.
23152315
const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
2316+
static_assert(Trace::kCPOffsetSlack == 1);
23162317
assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, non_word,
23172318
can_skip_bounds_check);
23182319
EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
@@ -2567,6 +2568,7 @@ void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
25672568
}
25682569

25692570
bool first_elt_done = false;
2571+
static_assert(Trace::kCPOffsetSlack == 1);
25702572
int bound_checked_to = trace->cp_offset() - 1;
25712573
bound_checked_to += trace->bound_checked_up_to();
25722574

@@ -2611,7 +2613,10 @@ void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
26112613
// characters by means of mask and compare.
26122614
quick_check_performed_.Advance(by, compiler->one_byte());
26132615
cp_offset_ += by;
2614-
if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
2616+
static_assert(RegExpMacroAssembler::kMaxCPOffset ==
2617+
-RegExpMacroAssembler::kMinCPOffset);
2618+
if (std::abs(cp_offset_) + kCPOffsetSlack >
2619+
RegExpMacroAssembler::kMaxCPOffset) {
26152620
compiler->SetRegExpTooBig();
26162621
cp_offset_ = 0;
26172622
}

src/regexp/regexp-compiler.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,17 @@ class Trace {
278278
};
279279
void Flush(RegExpCompiler* compiler, RegExpNode* successor,
280280
FlushMode mode = kFlushFull);
281+
282+
// Some callers add/subtract 1 from cp_offset, assuming that the result is
283+
// still valid. That's obviously not the case when our `cp_offset` is only
284+
// checked against kMinCPOffset/kMaxCPOffset, so we need to apply the some
285+
// slack.
286+
// TODO(jgruber): It would be better if all callers checked against limits
287+
// themselves when doing so; but unfortunately not all callers have
288+
// abort-compilation mechanisms.
289+
static constexpr int kCPOffsetSlack = 1;
281290
int cp_offset() const { return cp_offset_; }
291+
282292
// Does any trace in the chain have an action?
283293
bool has_any_actions() const { return has_any_actions_; }
284294
// Does this particular trace object have an action?

src/regexp/regexp-macro-assembler.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ void NativeRegExpMacroAssembler::LoadCurrentCharacterImpl(
261261
// path requires a large number of characters, but not the reverse.
262262
DCHECK_GE(eats_at_least, characters);
263263

264-
DCHECK(base::IsInRange(cp_offset, kMinCPOffset, kMaxCPOffset));
264+
CHECK(base::IsInRange(cp_offset, kMinCPOffset, kMaxCPOffset));
265265
if (check_bounds) {
266266
if (cp_offset >= 0) {
267267
CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input);
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// Copyright 2025 the V8 project authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style license that can be
3+
// found in the LICENSE file.
4+
5+
const length = 32767;
6+
const pattern_body = "^" + "a".repeat(length);
7+
const pattern = new RegExp("(?<=" + pattern_body + ")", "m");
8+
const input = "a".repeat(length) + "b" + '\n';
9+
assertThrows(() => pattern.exec(input));

0 commit comments

Comments
 (0)