Skip to content

Commit 8112cb4

Browse files
Dominik InführV8 LUCI CQ
authored andcommitted
[compiler] Reduce wb verification performance overhead
This CL adds a fast-path check to skipped write barrier verification written in inline-assembly to avoid the cost of saving/restoring registers and the C++ call. The new method PreCheckSkippedWriteBarrier jumps directly back to the regular code path once it determines that skipping the write barrier was safe. This method doesn't need to be exhaustive and is optional because we still use the C++ method as a fallback and for error reporting. This CL reduces the overhead from -12% to -3.3% on Speedometer3 and -26.7% to -4% on Jetstream2 (both on M1). Bug: 437096305 Change-Id: I7c280f6025a8204b85cb7f7a474162902b074df8 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6929591 Commit-Queue: Dominik Inführ <[email protected]> Reviewed-by: Darius Mercadier <[email protected]> Reviewed-by: Igor Sheludko <[email protected]> Cr-Commit-Position: refs/heads/main@{#102512}
1 parent 99fc9b3 commit 8112cb4

16 files changed

Lines changed: 245 additions & 22 deletions

src/codegen/arm/macro-assembler-arm.cc

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2984,6 +2984,52 @@ Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3,
29842984
UNREACHABLE();
29852985
}
29862986

2987+
void MacroAssembler::PreCheckSkippedWriteBarrier(Register object,
2988+
Register value,
2989+
Register scratch, Label* ok) {
2990+
ASM_CODE_COMMENT(this);
2991+
DCHECK(!AreAliased(object, scratch));
2992+
DCHECK(!AreAliased(value, scratch));
2993+
2994+
// The most common case: Static write barrier elimination is allowed on the
2995+
// last young allocation.
2996+
{
2997+
UseScratchRegisterScope temps(this);
2998+
Register scratch1 = temps.Acquire();
2999+
sub(scratch, object, Operand(kHeapObjectTag));
3000+
ldr(scratch1,
3001+
MemOperand(kRootRegister, IsolateData::last_young_allocation_offset()));
3002+
cmp(scratch, scratch1);
3003+
b(Condition::kEqual, ok);
3004+
}
3005+
3006+
// Write barier can also be removed if value is in read-only space.
3007+
CheckPageFlag(value, scratch, MemoryChunk::kIsInReadOnlyHeapMask, ne, ok);
3008+
3009+
Label not_ok;
3010+
3011+
// Handle allocation folding: Allow write barrier removal if LAB start <=
3012+
// object < LAB top.
3013+
{
3014+
UseScratchRegisterScope temps(this);
3015+
Register scratch1 = temps.Acquire();
3016+
// Recompute object address here because scratch was clobbered by
3017+
// CheckPageFlag.
3018+
sub(scratch, object, Operand(kHeapObjectTag));
3019+
ldr(scratch1, MemOperand(kRootRegister,
3020+
IsolateData::new_allocation_info_start_offset()));
3021+
cmp(scratch, scratch1);
3022+
b(Condition::kUnsignedLessThan, &not_ok);
3023+
ldr(scratch1, MemOperand(kRootRegister,
3024+
IsolateData::new_allocation_info_top_offset()));
3025+
cmp(scratch, scratch1);
3026+
b(Condition::kUnsignedLessThan, ok);
3027+
}
3028+
3029+
// Slow path: Potentially check more cases in C++.
3030+
bind(&not_ok);
3031+
}
3032+
29873033
void MacroAssembler::ComputeCodeStartAddress(Register dst) {
29883034
ASM_CODE_COMMENT(this);
29893035
// We can use the register pc - 8 for the address of the current instruction.

src/codegen/arm/macro-assembler-arm.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,9 @@ class V8_EXPORT_PRIVATE MacroAssembler : public MacroAssemblerBase {
418418
CheckPageFlag(object, mask, cc, condition_met);
419419
}
420420

421+
void PreCheckSkippedWriteBarrier(Register object, Register value,
422+
Register scratch, Label* ok);
423+
421424
// Check whether d16-d31 are available on the CPU. The result is given by the
422425
// Z condition flag: Z==0 if d16-d31 available, Z==1 otherwise.
423426
void CheckFor32DRegs(Register scratch);

src/codegen/arm64/macro-assembler-arm64.cc

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3895,6 +3895,52 @@ void MacroAssembler::JumpIfNotMarking(Label* not_marking,
38953895
Cbz(scratch, not_marking);
38963896
}
38973897

3898+
void MacroAssembler::PreCheckSkippedWriteBarrier(Register object,
3899+
Register value,
3900+
Register scratch, Label* ok) {
3901+
ASM_CODE_COMMENT(this);
3902+
DCHECK(!AreAliased(object, scratch));
3903+
DCHECK(!AreAliased(value, scratch));
3904+
3905+
// The most common case: Static write barrier elimination is allowed on the
3906+
// last young allocation.
3907+
{
3908+
UseScratchRegisterScope temps(this);
3909+
Register scratch1 = temps.AcquireX();
3910+
sub(scratch, object, kHeapObjectTag);
3911+
Ldr(scratch1,
3912+
MemOperand(kRootRegister, IsolateData::last_young_allocation_offset()));
3913+
cmp(scratch, scratch1);
3914+
B(Condition::kEqual, ok);
3915+
}
3916+
3917+
// Write barier can also be removed if value is in read-only space.
3918+
CheckPageFlag(value, scratch, MemoryChunk::kIsInReadOnlyHeapMask, ne, ok);
3919+
3920+
Label not_ok;
3921+
3922+
// Handle allocation folding: Allow write barrier removal if LAB start <=
3923+
// object < LAB top.
3924+
{
3925+
UseScratchRegisterScope temps(this);
3926+
Register scratch1 = temps.AcquireX();
3927+
// Recompute object address here because scratch was clobbered by
3928+
// CheckPageFlag.
3929+
sub(scratch, object, kHeapObjectTag);
3930+
Ldr(scratch1, MemOperand(kRootRegister,
3931+
IsolateData::new_allocation_info_start_offset()));
3932+
cmp(scratch, scratch1);
3933+
B(Condition::kUnsignedLessThan, &not_ok);
3934+
Ldr(scratch1, MemOperand(kRootRegister,
3935+
IsolateData::new_allocation_info_top_offset()));
3936+
cmp(scratch, scratch1);
3937+
B(Condition::kUnsignedLessThan, ok);
3938+
}
3939+
3940+
// Slow path: Potentially check more cases in C++.
3941+
bind(&not_ok);
3942+
}
3943+
38983944
void MacroAssembler::RecordWriteField(
38993945
Register object, int offset, Register value, LinkRegisterStatus lr_status,
39003946
SaveFPRegsMode save_fp, SmiCheck smi_check, ReadOnlyCheck ro_check,

src/codegen/arm64/macro-assembler-arm64.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,6 +1084,9 @@ class V8_EXPORT_PRIVATE MacroAssembler : public MacroAssemblerBase {
10841084
void LoadRootRelative(Register destination, int32_t offset) final;
10851085
void StoreRootRelative(int32_t offset, Register value) final;
10861086

1087+
void PreCheckSkippedWriteBarrier(Register object, Register value,
1088+
Register scratch, Label* ok);
1089+
10871090
// Operand pointing to an external reference.
10881091
// May emit code to set up the scratch register. The operand is
10891092
// only guaranteed to be correct as long as the scratch register

src/codegen/ia32/macro-assembler-ia32.cc

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2312,6 +2312,42 @@ void MacroAssembler::CheckPageFlag(Register object, Register scratch, int mask,
23122312
j(cc, condition_met, condition_met_distance);
23132313
}
23142314

2315+
void MacroAssembler::PreCheckSkippedWriteBarrier(Register object,
2316+
Register value,
2317+
Register scratch, Label* ok) {
2318+
ASM_CODE_COMMENT(this);
2319+
DCHECK(!AreAliased(object, scratch));
2320+
DCHECK(!AreAliased(value, scratch));
2321+
2322+
// The most common case: Static write barrier elimination is allowed on the
2323+
// last young allocation.
2324+
lea(scratch, Operand(object, -kHeapObjectTag));
2325+
cmp(scratch,
2326+
Operand(kRootRegister, IsolateData::last_young_allocation_offset()));
2327+
j(Condition::equal, ok);
2328+
2329+
// Write barier can also be removed if value is in read-only space.
2330+
CheckPageFlag(value, scratch, MemoryChunk::kIsInReadOnlyHeapMask, not_zero,
2331+
ok);
2332+
2333+
Label not_ok;
2334+
2335+
// Handle allocation folding: Allow write barrier removal if LAB start <=
2336+
// object < LAB top.
2337+
// Recompute object address here because scratch was clobbered by
2338+
// CheckPageFlag.
2339+
lea(scratch, Operand(object, -kHeapObjectTag));
2340+
cmp(scratch,
2341+
Operand(kRootRegister, IsolateData::new_allocation_info_start_offset()));
2342+
j(Condition::kUnsignedLessThan, &not_ok);
2343+
cmp(scratch,
2344+
Operand(kRootRegister, IsolateData::new_allocation_info_top_offset()));
2345+
j(Condition::kUnsignedLessThan, ok);
2346+
2347+
// Slow path: Potentially check more cases in C++.
2348+
bind(&not_ok);
2349+
}
2350+
23152351
void MacroAssembler::ComputeCodeStartAddress(Register dst) {
23162352
ASM_CODE_COMMENT(this);
23172353
// In order to get the address of the current instruction, we first need

src/codegen/ia32/macro-assembler-ia32.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ class V8_EXPORT_PRIVATE MacroAssembler
7979
Label* condition_met,
8080
Label::Distance condition_met_distance = Label::kFar);
8181

82+
void PreCheckSkippedWriteBarrier(Register object, Register value,
83+
Register scratch, Label* ok);
84+
8285
// Activation support.
8386
void EnterFrame(StackFrame::Type type);
8487
void EnterFrame(StackFrame::Type type, bool load_constant_pool_pointer_reg) {

src/codegen/x64/macro-assembler-x64.cc

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5052,6 +5052,42 @@ void MacroAssembler::JumpIfNotMarking(Label* not_marking,
50525052
j(zero, not_marking, condition_met_distance);
50535053
}
50545054

5055+
void MacroAssembler::PreCheckSkippedWriteBarrier(Register object,
5056+
Register value,
5057+
Register scratch, Label* ok) {
5058+
ASM_CODE_COMMENT(this);
5059+
DCHECK(!AreAliased(object, scratch));
5060+
DCHECK(!AreAliased(value, scratch));
5061+
5062+
// The most common case: Static write barrier elimination is allowed on the
5063+
// last young allocation.
5064+
leaq(scratch, Operand(object, -kHeapObjectTag));
5065+
cmpq(scratch,
5066+
Operand(kRootRegister, IsolateData::last_young_allocation_offset()));
5067+
j(Condition::equal, ok);
5068+
5069+
// Write barier can also be removed if value is in read-only space.
5070+
CheckPageFlag(value, scratch, MemoryChunk::kIsInReadOnlyHeapMask, not_zero,
5071+
ok);
5072+
5073+
Label not_ok;
5074+
5075+
// Handle allocation folding: Allow write barrier removal if LAB start <=
5076+
// object < LAB top.
5077+
// Recompute object address here because scratch was clobbered by
5078+
// CheckPageFlag.
5079+
leaq(scratch, Operand(object, -kHeapObjectTag));
5080+
cmpq(scratch,
5081+
Operand(kRootRegister, IsolateData::new_allocation_info_start_offset()));
5082+
j(Condition::kUnsignedLessThan, &not_ok);
5083+
cmpq(scratch,
5084+
Operand(kRootRegister, IsolateData::new_allocation_info_top_offset()));
5085+
j(Condition::kUnsignedLessThan, ok);
5086+
5087+
// Slow path: Potentially check more cases in C++.
5088+
bind(&not_ok);
5089+
}
5090+
50555091
void MacroAssembler::CheckMarkBit(Register object, Register scratch0,
50565092
Register scratch1, Condition cc,
50575093
Label* condition_met,

src/codegen/x64/macro-assembler-x64.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,9 @@ class V8_EXPORT_PRIVATE MacroAssembler
142142
void JumpIfNotMarking(Label* not_marking,
143143
Label::Distance condition_met_distance = Label::kFar);
144144

145+
void PreCheckSkippedWriteBarrier(Register object, Register value,
146+
Register scratch, Label* ok);
147+
145148
// Define movq here instead of using AVX_OP. movq is defined using templates
146149
// and there is a function template `void movq(P1)`, while technically
147150
// impossible, will be selected when deducing the arguments for AvxHelper.

src/compiler/backend/arm/code-generator-arm.cc

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,16 +233,19 @@ class OutOfLineRecordWrite final : public OutOfLineCode {
233233
class OutOfLineVerifySkippedWriteBarrier final : public OutOfLineCode {
234234
public:
235235
OutOfLineVerifySkippedWriteBarrier(CodeGenerator* gen, Register object,
236-
Register value,
236+
Register value, Register scratch,
237237
UnwindingInfoWriter* unwinding_info_writer)
238238
: OutOfLineCode(gen),
239239
object_(object),
240240
value_(value),
241+
scratch_(scratch),
241242
must_save_lr_(!gen->frame_access_state()->has_frame()),
242243
unwinding_info_writer_(unwinding_info_writer),
243244
zone_(gen->zone()) {}
244245

245246
void Generate() final {
247+
__ PreCheckSkippedWriteBarrier(object_, value_, scratch_, exit());
248+
246249
SaveFPRegsMode const save_fp_mode = frame()->DidAllocateDoubleRegisters()
247250
? SaveFPRegsMode::kSave
248251
: SaveFPRegsMode::kIgnore;
@@ -263,7 +266,8 @@ class OutOfLineVerifySkippedWriteBarrier final : public OutOfLineCode {
263266
private:
264267
Register const object_;
265268
Register const value_;
266-
const bool must_save_lr_;
269+
Register const scratch_;
270+
bool const must_save_lr_;
267271
UnwindingInfoWriter* const unwinding_info_writer_;
268272
Zone* zone_;
269273
};
@@ -1071,8 +1075,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
10711075
}
10721076

10731077
DCHECK(v8_flags.verify_write_barriers);
1078+
Register scratch = i.TempRegister(0);
10741079
auto ool = zone()->New<OutOfLineVerifySkippedWriteBarrier>(
1075-
this, object, value, &unwinding_info_writer_);
1080+
this, object, value, scratch, &unwinding_info_writer_);
10761081
__ JumpIfNotSmi(value, ool->entry());
10771082
__ bind(ool->exit());
10781083

src/compiler/backend/arm/instruction-selector-arm.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -815,8 +815,14 @@ void VisitStoreCommon(InstructionSelector* selector, OpIndex node,
815815
}
816816
}
817817

818+
InstructionOperand temps[1];
819+
size_t temp_count = 0;
820+
if (write_barrier_kind == kSkippedWriteBarrier) {
821+
temps[temp_count++] = g.TempRegister();
822+
}
823+
818824
code |= AddressingModeField::encode(addressing_mode);
819-
selector->Emit(code, 0, nullptr, input_count, inputs);
825+
selector->Emit(code, 0, nullptr, input_count, inputs, temp_count, temps);
820826
} else {
821827
InstructionCode opcode = kArchNop;
822828
if (!atomic_order) {

0 commit comments

Comments
 (0)