Skip to content

Commit 3518e49

Browse files
hashseedCommit bot
authored andcommitted
[regexp] do not assume short external strings have a minimum size.
Short external strings do not cache the resource data, and may be used for compressible strings. The assumptions about their lengths is invalid and may lead to oob reads. [email protected] BUG=v8:4923,chromium:604897 LOG=N Review URL: https://codereview.chromium.org/1901573003 Cr-Commit-Position: refs/heads/master@{#35660}
1 parent 4e93ce4 commit 3518e49

9 files changed

Lines changed: 174 additions & 219 deletions

File tree

src/arm/code-stubs-arm.cc

Lines changed: 28 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1577,70 +1577,59 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
15771577
__ ldr(subject, MemOperand(sp, kSubjectOffset));
15781578
__ JumpIfSmi(subject, &runtime);
15791579
__ mov(r3, subject); // Make a copy of the original subject string.
1580-
__ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset));
1581-
__ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset));
15821580
// subject: subject string
15831581
// r3: subject string
1584-
// r0: subject string instance type
15851582
// regexp_data: RegExp data (FixedArray)
15861583
// Handle subject string according to its encoding and representation:
1587-
// (1) Sequential string? If yes, go to (5).
1588-
// (2) Anything but sequential or cons? If yes, go to (6).
1589-
// (3) Cons string. If the string is flat, replace subject with first string.
1590-
// Otherwise bailout.
1591-
// (4) Is subject external? If yes, go to (7).
1592-
// (5) Sequential string. Load regexp code according to encoding.
1584+
// (1) Sequential string? If yes, go to (4).
1585+
// (2) Sequential or cons? If not, go to (5).
1586+
// (3) Cons string. If the string is flat, replace subject with first string
1587+
// and go to (1). Otherwise bail out to runtime.
1588+
// (4) Sequential string. Load regexp code according to encoding.
15931589
// (E) Carry on.
15941590
/// [...]
15951591

15961592
// Deferred code at the end of the stub:
1597-
// (6) Not a long external string? If yes, go to (8).
1598-
// (7) External string. Make it, offset-wise, look like a sequential string.
1599-
// Go to (5).
1600-
// (8) Short external string or not a string? If yes, bail out to runtime.
1601-
// (9) Sliced string. Replace subject with parent. Go to (4).
1593+
// (5) Long external string? If not, go to (7).
1594+
// (6) External string. Make it, offset-wise, look like a sequential string.
1595+
// Go to (4).
1596+
// (7) Short external string or not a string? If yes, bail out to runtime.
1597+
// (8) Sliced string. Replace subject with parent. Go to (1).
1598+
1599+
Label seq_string /* 4 */, external_string /* 6 */, check_underlying /* 1 */,
1600+
not_seq_nor_cons /* 5 */, not_long_external /* 7 */;
16021601

1603-
Label seq_string /* 5 */, external_string /* 7 */,
1604-
check_underlying /* 4 */, not_seq_nor_cons /* 6 */,
1605-
not_long_external /* 8 */;
1602+
__ bind(&check_underlying);
1603+
__ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset));
1604+
__ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset));
16061605

1607-
// (1) Sequential string? If yes, go to (5).
1606+
// (1) Sequential string? If yes, go to (4).
16081607
__ and_(r1,
16091608
r0,
16101609
Operand(kIsNotStringMask |
16111610
kStringRepresentationMask |
16121611
kShortExternalStringMask),
16131612
SetCC);
16141613
STATIC_ASSERT((kStringTag | kSeqStringTag) == 0);
1615-
__ b(eq, &seq_string); // Go to (5).
1614+
__ b(eq, &seq_string); // Go to (4).
16161615

1617-
// (2) Anything but sequential or cons? If yes, go to (6).
1616+
// (2) Sequential or cons? If not, go to (5).
16181617
STATIC_ASSERT(kConsStringTag < kExternalStringTag);
16191618
STATIC_ASSERT(kSlicedStringTag > kExternalStringTag);
16201619
STATIC_ASSERT(kIsNotStringMask > kExternalStringTag);
16211620
STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag);
16221621
__ cmp(r1, Operand(kExternalStringTag));
1623-
__ b(ge, &not_seq_nor_cons); // Go to (6).
1622+
__ b(ge, &not_seq_nor_cons); // Go to (5).
16241623

16251624
// (3) Cons string. Check that it's flat.
16261625
// Replace subject with first string and reload instance type.
16271626
__ ldr(r0, FieldMemOperand(subject, ConsString::kSecondOffset));
16281627
__ CompareRoot(r0, Heap::kempty_stringRootIndex);
16291628
__ b(ne, &runtime);
16301629
__ ldr(subject, FieldMemOperand(subject, ConsString::kFirstOffset));
1630+
__ jmp(&check_underlying);
16311631

1632-
// (4) Is subject external? If yes, go to (7).
1633-
__ bind(&check_underlying);
1634-
__ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset));
1635-
__ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset));
1636-
STATIC_ASSERT(kSeqStringTag == 0);
1637-
__ tst(r0, Operand(kStringRepresentationMask));
1638-
// The underlying external string is never a short external string.
1639-
STATIC_ASSERT(ExternalString::kMaxShortLength < ConsString::kMinLength);
1640-
STATIC_ASSERT(ExternalString::kMaxShortLength < SlicedString::kMinLength);
1641-
__ b(ne, &external_string); // Go to (7).
1642-
1643-
// (5) Sequential string. Load regexp code according to encoding.
1632+
// (4) Sequential string. Load regexp code according to encoding.
16441633
__ bind(&seq_string);
16451634
// subject: sequential subject string (or look-alike, external string)
16461635
// r3: original subject string
@@ -1873,12 +1862,12 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
18731862
__ TailCallRuntime(Runtime::kRegExpExec);
18741863

18751864
// Deferred code for string handling.
1876-
// (6) Not a long external string? If yes, go to (8).
1865+
// (5) Long external string? If not, go to (7).
18771866
__ bind(&not_seq_nor_cons);
18781867
// Compare flags are still set.
1879-
__ b(gt, &not_long_external); // Go to (8).
1868+
__ b(gt, &not_long_external); // Go to (7).
18801869

1881-
// (7) External string. Make it, offset-wise, look like a sequential string.
1870+
// (6) External string. Make it, offset-wise, look like a sequential string.
18821871
__ bind(&external_string);
18831872
__ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset));
18841873
__ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset));
@@ -1895,15 +1884,15 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
18951884
__ sub(subject,
18961885
subject,
18971886
Operand(SeqTwoByteString::kHeaderSize - kHeapObjectTag));
1898-
__ jmp(&seq_string); // Go to (5).
1887+
__ jmp(&seq_string); // Go to (4).
18991888

1900-
// (8) Short external string or not a string? If yes, bail out to runtime.
1889+
// (7) Short external string or not a string? If yes, bail out to runtime.
19011890
__ bind(&not_long_external);
19021891
STATIC_ASSERT(kNotStringTag != 0 && kShortExternalStringTag !=0);
19031892
__ tst(r1, Operand(kIsNotStringMask | kShortExternalStringMask));
19041893
__ b(ne, &runtime);
19051894

1906-
// (9) Sliced string. Replace subject with parent. Go to (4).
1895+
// (8) Sliced string. Replace subject with parent. Go to (4).
19071896
// Load offset into r9 and replace subject string with parent.
19081897
__ ldr(r9, FieldMemOperand(subject, SlicedString::kOffsetOffset));
19091898
__ SmiUntag(r9);

src/arm64/code-stubs-arm64.cc

Lines changed: 34 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1743,35 +1743,35 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
17431743
__ Peek(subject, kSubjectOffset);
17441744
__ JumpIfSmi(subject, &runtime);
17451745

1746-
__ Ldr(x10, FieldMemOperand(subject, HeapObject::kMapOffset));
1747-
__ Ldrb(string_type, FieldMemOperand(x10, Map::kInstanceTypeOffset));
1748-
17491746
__ Ldr(jsstring_length, FieldMemOperand(subject, String::kLengthOffset));
17501747

17511748
// Handle subject string according to its encoding and representation:
1752-
// (1) Sequential string? If yes, go to (5).
1753-
// (2) Anything but sequential or cons? If yes, go to (6).
1754-
// (3) Cons string. If the string is flat, replace subject with first string.
1755-
// Otherwise bailout.
1756-
// (4) Is subject external? If yes, go to (7).
1757-
// (5) Sequential string. Load regexp code according to encoding.
1749+
// (1) Sequential string? If yes, go to (4).
1750+
// (2) Sequential or cons? If not, go to (5).
1751+
// (3) Cons string. If the string is flat, replace subject with first string
1752+
// and go to (1). Otherwise bail out to runtime.
1753+
// (4) Sequential string. Load regexp code according to encoding.
17581754
// (E) Carry on.
17591755
/// [...]
17601756

17611757
// Deferred code at the end of the stub:
1762-
// (6) Not a long external string? If yes, go to (8).
1763-
// (7) External string. Make it, offset-wise, look like a sequential string.
1764-
// Go to (5).
1765-
// (8) Short external string or not a string? If yes, bail out to runtime.
1766-
// (9) Sliced string. Replace subject with parent. Go to (4).
1767-
1768-
Label check_underlying; // (4)
1769-
Label seq_string; // (5)
1770-
Label not_seq_nor_cons; // (6)
1771-
Label external_string; // (7)
1772-
Label not_long_external; // (8)
1773-
1774-
// (1) Sequential string? If yes, go to (5).
1758+
// (5) Long external string? If not, go to (7).
1759+
// (6) External string. Make it, offset-wise, look like a sequential string.
1760+
// Go to (4).
1761+
// (7) Short external string or not a string? If yes, bail out to runtime.
1762+
// (8) Sliced string. Replace subject with parent. Go to (1).
1763+
1764+
Label check_underlying; // (1)
1765+
Label seq_string; // (4)
1766+
Label not_seq_nor_cons; // (5)
1767+
Label external_string; // (6)
1768+
Label not_long_external; // (7)
1769+
1770+
__ Bind(&check_underlying);
1771+
__ Ldr(x10, FieldMemOperand(subject, HeapObject::kMapOffset));
1772+
__ Ldrb(string_type, FieldMemOperand(x10, Map::kInstanceTypeOffset));
1773+
1774+
// (1) Sequential string? If yes, go to (4).
17751775
__ And(string_representation,
17761776
string_type,
17771777
kIsNotStringMask |
@@ -1788,36 +1788,24 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
17881788
// is a String
17891789
STATIC_ASSERT((kStringTag | kSeqStringTag) == 0);
17901790
STATIC_ASSERT(kShortExternalStringTag != 0);
1791-
__ Cbz(string_representation, &seq_string); // Go to (5).
1791+
__ Cbz(string_representation, &seq_string); // Go to (4).
17921792

1793-
// (2) Anything but sequential or cons? If yes, go to (6).
1793+
// (2) Sequential or cons? If not, go to (5).
17941794
STATIC_ASSERT(kConsStringTag < kExternalStringTag);
17951795
STATIC_ASSERT(kSlicedStringTag > kExternalStringTag);
17961796
STATIC_ASSERT(kIsNotStringMask > kExternalStringTag);
17971797
STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag);
17981798
__ Cmp(string_representation, kExternalStringTag);
1799-
__ B(ge, &not_seq_nor_cons); // Go to (6).
1799+
__ B(ge, &not_seq_nor_cons); // Go to (5).
18001800

18011801
// (3) Cons string. Check that it's flat.
18021802
__ Ldr(x10, FieldMemOperand(subject, ConsString::kSecondOffset));
18031803
__ JumpIfNotRoot(x10, Heap::kempty_stringRootIndex, &runtime);
18041804
// Replace subject with first string.
18051805
__ Ldr(subject, FieldMemOperand(subject, ConsString::kFirstOffset));
1806+
__ B(&check_underlying);
18061807

1807-
// (4) Is subject external? If yes, go to (7).
1808-
__ Bind(&check_underlying);
1809-
// Reload the string type.
1810-
__ Ldr(x10, FieldMemOperand(subject, HeapObject::kMapOffset));
1811-
__ Ldrb(string_type, FieldMemOperand(x10, Map::kInstanceTypeOffset));
1812-
STATIC_ASSERT(kSeqStringTag == 0);
1813-
// The underlying external string is never a short external string.
1814-
STATIC_ASSERT(ExternalString::kMaxShortLength < ConsString::kMinLength);
1815-
STATIC_ASSERT(ExternalString::kMaxShortLength < SlicedString::kMinLength);
1816-
__ TestAndBranchIfAnySet(string_type.X(),
1817-
kStringRepresentationMask,
1818-
&external_string); // Go to (7).
1819-
1820-
// (5) Sequential string. Load regexp code according to encoding.
1808+
// (4) Sequential string. Load regexp code according to encoding.
18211809
__ Bind(&seq_string);
18221810

18231811
// Check that the third argument is a positive smi less than the subject
@@ -2087,12 +2075,12 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
20872075
__ TailCallRuntime(Runtime::kRegExpExec);
20882076

20892077
// Deferred code for string handling.
2090-
// (6) Not a long external string? If yes, go to (8).
2078+
// (5) Long external string? If not, go to (7).
20912079
__ Bind(&not_seq_nor_cons);
20922080
// Compare flags are still set.
2093-
__ B(ne, &not_long_external); // Go to (8).
2081+
__ B(ne, &not_long_external); // Go to (7).
20942082

2095-
// (7) External string. Make it, offset-wise, look like a sequential string.
2083+
// (6) External string. Make it, offset-wise, look like a sequential string.
20962084
__ Bind(&external_string);
20972085
if (masm->emit_debug_code()) {
20982086
// Assert that we do not have a cons or slice (indirect strings) here.
@@ -2110,21 +2098,21 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
21102098
// Move the pointer so that offset-wise, it looks like a sequential string.
21112099
STATIC_ASSERT(SeqTwoByteString::kHeaderSize == SeqOneByteString::kHeaderSize);
21122100
__ Sub(subject, subject, SeqTwoByteString::kHeaderSize - kHeapObjectTag);
2113-
__ B(&seq_string); // Go to (5).
2101+
__ B(&seq_string); // Go to (4).
21142102

2115-
// (8) If this is a short external string or not a string, bail out to
2103+
// (7) If this is a short external string or not a string, bail out to
21162104
// runtime.
21172105
__ Bind(&not_long_external);
21182106
STATIC_ASSERT(kShortExternalStringTag != 0);
21192107
__ TestAndBranchIfAnySet(string_representation,
21202108
kShortExternalStringMask | kIsNotStringMask,
21212109
&runtime);
21222110

2123-
// (9) Sliced string. Replace subject with parent.
2111+
// (8) Sliced string. Replace subject with parent.
21242112
__ Ldr(sliced_string_offset,
21252113
UntagSmiFieldMemOperand(subject, SlicedString::kOffsetOffset));
21262114
__ Ldr(subject, FieldMemOperand(subject, SlicedString::kParentOffset));
2127-
__ B(&check_underlying); // Go to (4).
2115+
__ B(&check_underlying); // Go to (1).
21282116
#endif
21292117
}
21302118

0 commit comments

Comments
 (0)