Skip to content

Commit 213504b

Browse files
pthierCommit Bot
authored andcommitted
[regexp] Consolidate calls to jitted irregexp and regexp interpreter
The code fields in a JSRegExp object now either contain irregexp compiled code or a trampoline to the interpreter. This way the code can be executed without explicitly checking if the regexp shall be interpreted or executed natively. In case of interpreted regexp the generated bytecode is now stored in its own fields instead of the code fields for Latin1 and UC16 respectively. The signatures of the jitted irregexp match and the regexp interpreter have been equalized. Bug: v8:9516 Change-Id: I30e3d86f4702a902d3387bccc1ee91dea501fe4e Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1762513 Commit-Queue: Patrick Thier <[email protected]> Reviewed-by: Peter Marshall <[email protected]> Reviewed-by: Jakob Gruber <[email protected]> Reviewed-by: Michael Starzinger <[email protected]> Cr-Commit-Position: refs/heads/master@{#63457}
1 parent 37a4937 commit 213504b

31 files changed

Lines changed: 357 additions & 248 deletions

src/builtins/builtins-definitions.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,7 @@ namespace internal {
863863
/* RegExp helpers */ \
864864
TFS(RegExpExecAtom, kRegExp, kString, kLastIndex, kMatchInfo) \
865865
TFS(RegExpExecInternal, kRegExp, kString, kLastIndex, kMatchInfo) \
866+
ASM(RegExpInterpreterTrampoline, CCall) \
866867
TFS(RegExpMatchFast, kReceiver, kPattern) \
867868
TFS(RegExpPrototypeExecSlow, kReceiver, kString) \
868869
TFS(RegExpSearchFast, kReceiver, kPattern) \

src/builtins/builtins-regexp-gen.cc

Lines changed: 103 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "src/builtins/growable-fixed-array-gen.h"
1111
#include "src/codegen/code-factory.h"
1212
#include "src/codegen/code-stub-assembler.h"
13+
#include "src/codegen/macro-assembler.h"
1314
#include "src/execution/protectors.h"
1415
#include "src/heap/factory-inl.h"
1516
#include "src/logging/counters.h"
@@ -25,12 +26,60 @@ using compiler::Node;
2526
template <class T>
2627
using TNode = compiler::TNode<T>;
2728

29+
// Tail calls the regular expression interpreter.
30+
// static
31+
void Builtins::Generate_RegExpInterpreterTrampoline(MacroAssembler* masm) {
32+
ExternalReference interpreter_code_entry =
33+
ExternalReference::re_match_for_call_from_js(masm->isolate());
34+
masm->Jump(interpreter_code_entry);
35+
}
36+
2837
TNode<Smi> RegExpBuiltinsAssembler::SmiZero() { return SmiConstant(0); }
2938

3039
TNode<IntPtrT> RegExpBuiltinsAssembler::IntPtrZero() {
3140
return IntPtrConstant(0);
3241
}
3342

43+
// If code is a builtin, return the address to the (possibly embedded) builtin
44+
// code entry, otherwise return the entry of the code object itself.
45+
TNode<RawPtrT> RegExpBuiltinsAssembler::LoadCodeObjectEntry(TNode<Code> code) {
46+
TVARIABLE(RawPtrT, var_result);
47+
48+
Label if_code_is_off_heap(this), out(this);
49+
{
50+
// TODO(pthier): A potential optimization for the future is to make this
51+
// decision based on the builtin index instead of flags, and avoid the
52+
// additional load below.
53+
TNode<Int32T> code_flags = UncheckedCast<Int32T>(
54+
LoadObjectField(code, Code::kFlagsOffset, MachineType::Int32()));
55+
GotoIf(IsSetWord32(code_flags, Code::IsOffHeapTrampoline::kMask),
56+
&if_code_is_off_heap);
57+
var_result = ReinterpretCast<RawPtrT>(
58+
IntPtrAdd(BitcastTaggedToWord(code),
59+
IntPtrConstant(Code::kHeaderSize - kHeapObjectTag)));
60+
Goto(&out);
61+
}
62+
63+
BIND(&if_code_is_off_heap);
64+
{
65+
TNode<Int32T> builtin_index = UncheckedCast<Int32T>(
66+
LoadObjectField(code, Code::kBuiltinIndexOffset, MachineType::Int32()));
67+
TNode<IntPtrT> builtin_entry_offset_from_isolate_root =
68+
IntPtrAdd(IntPtrConstant(IsolateData::builtin_entry_table_offset()),
69+
ChangeInt32ToIntPtr(Word32Shl(
70+
builtin_index, Int32Constant(kSystemPointerSizeLog2))));
71+
72+
var_result = ReinterpretCast<RawPtrT>(
73+
Load(MachineType::Pointer(),
74+
ExternalConstant(ExternalReference::isolate_root(isolate())),
75+
builtin_entry_offset_from_isolate_root));
76+
Goto(&out);
77+
}
78+
79+
BIND(&out);
80+
return var_result.value();
81+
}
82+
3483
// -----------------------------------------------------------------------------
3584
// ES6 section 21.2 RegExp Objects
3685

@@ -336,8 +385,7 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
336385
ToDirectStringAssembler to_direct(state(), string);
337386

338387
TVARIABLE(HeapObject, var_result);
339-
Label out(this), interpreted(this), atom(this),
340-
runtime(this, Label::kDeferred);
388+
Label out(this), atom(this), runtime(this, Label::kDeferred);
341389

342390
// External constants.
343391
TNode<ExternalReference> isolate_address =
@@ -406,12 +454,13 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
406454

407455
to_direct.TryToDirect(&runtime);
408456

409-
// Load the irregexp code object and offsets into the subject string. Both
410-
// depend on whether the string is one- or two-byte.
457+
// Load the irregexp code or bytecode object and offsets into the subject
458+
// string. Both depend on whether the string is one- or two-byte.
411459

412460
TVARIABLE(RawPtrT, var_string_start);
413461
TVARIABLE(RawPtrT, var_string_end);
414462
TVARIABLE(Object, var_code);
463+
TVARIABLE(Object, var_bytecode);
415464

416465
{
417466
TNode<RawPtrT> direct_string_data = to_direct.PointerToData(&runtime);
@@ -427,6 +476,8 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
427476
&var_string_start, &var_string_end);
428477
var_code =
429478
UnsafeLoadFixedArrayElement(data, JSRegExp::kIrregexpLatin1CodeIndex);
479+
var_bytecode = UnsafeLoadFixedArrayElement(
480+
data, JSRegExp::kIrregexpLatin1BytecodeIndex);
430481
Goto(&next);
431482
}
432483

@@ -437,6 +488,8 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
437488
&var_string_start, &var_string_end);
438489
var_code =
439490
UnsafeLoadFixedArrayElement(data, JSRegExp::kIrregexpUC16CodeIndex);
491+
var_bytecode = UnsafeLoadFixedArrayElement(
492+
data, JSRegExp::kIrregexpUC16BytecodeIndex);
440493
Goto(&next);
441494
}
442495

@@ -458,14 +511,28 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
458511
#endif
459512

460513
GotoIf(TaggedIsSmi(var_code.value()), &runtime);
461-
GotoIfNot(IsCode(CAST(var_code.value())), &interpreted);
462514
TNode<Code> code = CAST(var_code.value());
463515

464-
// Ensure that a RegExp stack is allocated when using compiled Irregexp.
516+
// Tier-up in runtime if ticks are non-zero and tier-up hasn't happened yet
517+
// and ensure that a RegExp stack is allocated when using compiled Irregexp.
465518
{
519+
Label next(this);
520+
GotoIfNot(TaggedIsSmi(var_bytecode.value()), &next);
521+
CSA_ASSERT(this, SmiEqual(CAST(var_bytecode.value()),
522+
SmiConstant(JSRegExp::kUninitializedValue)));
523+
524+
// Ensure RegExp stack is allocated.
466525
TNode<IntPtrT> stack_size = UncheckedCast<IntPtrT>(
467526
Load(MachineType::IntPtr(), regexp_stack_memory_size_address));
468527
GotoIf(IntPtrEqual(stack_size, IntPtrZero()), &runtime);
528+
529+
// Check if tier-up is requested.
530+
TNode<Smi> ticks = CAST(
531+
UnsafeLoadFixedArrayElement(data, JSRegExp::kIrregexpTierUpTicksIndex));
532+
GotoIf(SmiToInt32(ticks), &runtime);
533+
534+
Goto(&next);
535+
BIND(&next);
469536
}
470537

471538
Label if_success(this), if_exception(this, Label::kDeferred);
@@ -489,25 +556,39 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
489556
MachineType arg1_type = type_int32;
490557
TNode<Int32T> arg1 = TruncateIntPtrToInt32(int_last_index);
491558

492-
// Argument 2: Start of string data.
559+
// Argument 2: Start of string data. This argument is ignored in the
560+
// interpreter.
493561
MachineType arg2_type = type_ptr;
494562
TNode<RawPtrT> arg2 = var_string_start.value();
495563

496-
// Argument 3: End of string data.
564+
// Argument 3: End of string data. This argument is ignored in the
565+
// interpreter.
497566
MachineType arg3_type = type_ptr;
498567
TNode<RawPtrT> arg3 = var_string_end.value();
499568

500569
// Argument 4: static offsets vector buffer.
501570
MachineType arg4_type = type_ptr;
502571
TNode<ExternalReference> arg4 = static_offsets_vector_address;
503572

504-
// Argument 5: Set the number of capture registers to zero to force global
505-
// regexps to behave as non-global. This does not affect non-global
506-
// regexps.
573+
// Argument 5: Number of capture registers.
574+
// Setting this to the number of registers required to store all captures
575+
// forces global regexps to behave as non-global.
576+
TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement(
577+
data, JSRegExp::kIrregexpCaptureCountIndex));
578+
// capture_count is the number of captures without the match itself.
579+
// Required registers = (capture_count + 1) * 2.
580+
STATIC_ASSERT(Internals::IsValidSmi((JSRegExp::kMaxCaptures + 1) << 1));
581+
TNode<Smi> register_count =
582+
SmiShl(SmiAdd(capture_count, SmiConstant(1)), 1);
583+
507584
MachineType arg5_type = type_int32;
508-
TNode<Int32T> arg5 = Int32Constant(0);
585+
TNode<Int32T> arg5 = SmiToInt32(register_count);
509586

510-
// Argument 6: Start (high end) of backtracking stack memory area.
587+
// Argument 6: Start (high end) of backtracking stack memory area. This
588+
// argument is ignored in the interpreter.
589+
// TODO(pthier): We should consider creating a dedicated external reference
590+
// for top of regexp stack instead of calculating it here for every
591+
// execution.
511592
TNode<RawPtrT> stack_start = UncheckedCast<RawPtrT>(
512593
Load(MachineType::Pointer(), regexp_stack_memory_address_address));
513594
TNode<IntPtrT> stack_size = UncheckedCast<IntPtrT>(
@@ -520,22 +601,26 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
520601

521602
// Argument 7: Indicate that this is a direct call from JavaScript.
522603
MachineType arg7_type = type_int32;
523-
TNode<Int32T> arg7 = Int32Constant(1);
604+
TNode<Int32T> arg7 = Int32Constant(RegExp::CallOrigin::kFromJs);
524605

525606
// Argument 8: Pass current isolate address.
526607
MachineType arg8_type = type_ptr;
527608
TNode<ExternalReference> arg8 = isolate_address;
528609

529-
TNode<RawPtrT> code_entry = ReinterpretCast<RawPtrT>(
530-
IntPtrAdd(BitcastTaggedToWord(code),
531-
IntPtrConstant(Code::kHeaderSize - kHeapObjectTag)));
610+
// Argument 9: Regular expression object. This argument is ignored in native
611+
// irregexp code.
612+
MachineType arg9_type = type_tagged;
613+
TNode<JSRegExp> arg9 = regexp;
614+
615+
TNode<RawPtrT> code_entry = LoadCodeObjectEntry(code);
532616

533617
TNode<Int32T> result = UncheckedCast<Int32T>(CallCFunction(
534618
code_entry, retval_type, std::make_pair(arg0_type, arg0),
535619
std::make_pair(arg1_type, arg1), std::make_pair(arg2_type, arg2),
536620
std::make_pair(arg3_type, arg3), std::make_pair(arg4_type, arg4),
537621
std::make_pair(arg5_type, arg5), std::make_pair(arg6_type, arg6),
538-
std::make_pair(arg7_type, arg7), std::make_pair(arg8_type, arg8)));
622+
std::make_pair(arg7_type, arg7), std::make_pair(arg8_type, arg8),
623+
std::make_pair(arg9_type, arg9)));
539624

540625
// Check the result.
541626
// We expect exactly one result since we force the called regexp to behave
@@ -556,78 +641,6 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
556641
Goto(&runtime);
557642
}
558643

559-
BIND(&interpreted);
560-
{
561-
// Tier-up in runtime to compiler if ticks are non-zero.
562-
TNode<Smi> ticks = CAST(
563-
UnsafeLoadFixedArrayElement(data, JSRegExp::kIrregexpTierUpTicksIndex));
564-
GotoIf(SmiToInt32(ticks), &runtime);
565-
566-
IncrementCounter(isolate()->counters()->regexp_entry_native(), 1);
567-
568-
// Set up args for the final call into IrregexpInterpreter.
569-
570-
MachineType type_int32 = MachineType::Int32();
571-
MachineType type_tagged = MachineType::AnyTagged();
572-
MachineType type_ptr = MachineType::Pointer();
573-
574-
// Result: A IrregexpInterpreter::Result return code.
575-
MachineType retval_type = type_int32;
576-
577-
// Argument 0: Pass current isolate address.
578-
MachineType arg0_type = type_ptr;
579-
TNode<ExternalReference> arg0 = isolate_address;
580-
581-
// Argument 1: Regular expression object.
582-
MachineType arg1_type = type_tagged;
583-
TNode<JSRegExp> arg1 = regexp;
584-
585-
// Argument 2: Original subject string.
586-
MachineType arg2_type = type_tagged;
587-
TNode<String> arg2 = string;
588-
589-
// Argument 3: Static offsets vector buffer.
590-
MachineType arg3_type = type_ptr;
591-
TNode<ExternalReference> arg3 = static_offsets_vector_address;
592-
593-
// Argument 4: Length of static offsets vector buffer.
594-
TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement(
595-
data, JSRegExp::kIrregexpCaptureCountIndex));
596-
TNode<Smi> register_count =
597-
SmiShl(SmiAdd(capture_count, SmiConstant(1)), 1);
598-
599-
MachineType arg4_type = type_int32;
600-
TNode<Int32T> arg4 = SmiToInt32(register_count);
601-
602-
// Argument 5: Previous index.
603-
MachineType arg5_type = type_int32;
604-
TNode<Int32T> arg5 = TruncateIntPtrToInt32(int_last_index);
605-
606-
TNode<ExternalReference> code_entry = ExternalConstant(
607-
ExternalReference::re_match_for_call_from_js(isolate()));
608-
609-
TNode<Int32T> result = UncheckedCast<Int32T>(CallCFunction(
610-
code_entry, retval_type, std::make_pair(arg0_type, arg0),
611-
std::make_pair(arg1_type, arg1), std::make_pair(arg2_type, arg2),
612-
std::make_pair(arg3_type, arg3), std::make_pair(arg4_type, arg4),
613-
std::make_pair(arg5_type, arg5)));
614-
615-
TNode<IntPtrT> int_result = ChangeInt32ToIntPtr(result);
616-
GotoIf(
617-
IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpSuccess)),
618-
&if_success);
619-
GotoIf(
620-
IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpFailure)),
621-
&if_failure);
622-
GotoIf(IntPtrEqual(int_result,
623-
IntPtrConstant(RegExp::kInternalRegExpException)),
624-
&if_exception);
625-
626-
CSA_ASSERT(this, IntPtrEqual(int_result,
627-
IntPtrConstant(RegExp::kInternalRegExpRetry)));
628-
Goto(&runtime);
629-
}
630-
631644
BIND(&if_success);
632645
{
633646
// Check that the last match info has space for the capture registers and
@@ -1812,7 +1825,6 @@ void RegExpBuiltinsAssembler::RegExpPrototypeMatchBody(TNode<Context> context,
18121825
TNode<RegExpMatchInfo> match_indices =
18131826
RegExpPrototypeExecBodyWithoutResult(context, CAST(regexp), string,
18141827
&if_didnotmatch, true);
1815-
18161828
Label dosubstring(this), donotsubstring(this);
18171829
Branch(var_atom.value(), &donotsubstring, &dosubstring);
18181830

src/builtins/builtins-regexp-gen.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ class RegExpBuiltinsAssembler : public CodeStubAssembler {
3030
TNode<Smi> SmiZero();
3131
TNode<IntPtrT> IntPtrZero();
3232

33+
TNode<RawPtrT> LoadCodeObjectEntry(TNode<Code> code);
34+
3335
// Allocate a RegExpResult with the given length (the number of captures,
3436
// including the match itself), index (the index where the match starts),
3537
// and input string.

src/codegen/arm/macro-assembler-arm.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,13 @@ void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode,
217217
Jump(static_cast<intptr_t>(code.address()), rmode, cond);
218218
}
219219

220+
void TurboAssembler::Jump(const ExternalReference& reference) {
221+
UseScratchRegisterScope temps(this);
222+
Register scratch = temps.Acquire();
223+
Move(scratch, reference);
224+
Jump(scratch);
225+
}
226+
220227
void TurboAssembler::Call(Register target, Condition cond) {
221228
// Block constant pool for the call instruction sequence.
222229
BlockConstPoolScope block_const_pool(this);

src/codegen/arm/macro-assembler-arm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
409409
void Jump(Register target, Condition cond = al);
410410
void Jump(Address target, RelocInfo::Mode rmode, Condition cond = al);
411411
void Jump(Handle<Code> code, RelocInfo::Mode rmode, Condition cond = al);
412+
void Jump(const ExternalReference& reference) override;
412413

413414
// Perform a floating-point min or max operation with the
414415
// (IEEE-754-compatible) semantics of ARM64's fmin/fmax. Some cases, typically

src/codegen/arm64/macro-assembler-arm64.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1867,6 +1867,13 @@ void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode,
18671867
}
18681868
}
18691869

1870+
void TurboAssembler::Jump(const ExternalReference& reference) {
1871+
UseScratchRegisterScope temps(this);
1872+
Register scratch = temps.AcquireX();
1873+
Mov(scratch, reference);
1874+
Jump(scratch);
1875+
}
1876+
18701877
void TurboAssembler::Call(Register target) {
18711878
BlockPoolsScope scope(this);
18721879
Blr(target);

src/codegen/arm64/macro-assembler-arm64.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
889889
void Jump(Register target, Condition cond = al);
890890
void Jump(Address target, RelocInfo::Mode rmode, Condition cond = al);
891891
void Jump(Handle<Code> code, RelocInfo::Mode rmode, Condition cond = al);
892+
void Jump(const ExternalReference& reference) override;
892893

893894
void Call(Register target);
894895
void Call(Address target, RelocInfo::Mode rmode);

src/codegen/ia32/macro-assembler-ia32.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1957,6 +1957,12 @@ void TurboAssembler::JumpCodeObject(Register code_object) {
19571957
jmp(code_object);
19581958
}
19591959

1960+
void TurboAssembler::Jump(const ExternalReference& reference) {
1961+
DCHECK(root_array_available());
1962+
jmp(Operand(kRootRegister, RootRegisterOffsetForExternalReferenceTableEntry(
1963+
isolate(), reference)));
1964+
}
1965+
19601966
void TurboAssembler::Jump(Handle<Code> code_object, RelocInfo::Mode rmode) {
19611967
DCHECK_IMPLIES(options().isolate_independent_code,
19621968
Builtins::IsIsolateIndependentBuiltin(*code_object));

src/codegen/ia32/macro-assembler-ia32.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
9696
void LoadCodeObjectEntry(Register destination, Register code_object) override;
9797
void CallCodeObject(Register code_object) override;
9898
void JumpCodeObject(Register code_object) override;
99+
void Jump(const ExternalReference& reference) override;
99100

100101
void RetpolineCall(Register reg);
101102
void RetpolineCall(Address destination, RelocInfo::Mode rmode);

0 commit comments

Comments
 (0)