Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 2717444

Browse files
committed
try to improve the pass manager order
This changes the order of the GVNs a bit. I removed @eddyb's GVN added in #35662 - I checked the original examples, and it does not seem to do anything. Instead of it, I added a GVN to the *start* of the optimization pipeline, to remove junk. I moved the second GVN to after memcpyopt, so that we'll have a GVN after MemCpy optimizations - we already have a GVN "before" it. Also, I duplicated IndVarSimplify instead of adding a SimplifyCfg - looking at benchmarks, it should have the same performance impact.
1 parent 6d08185 commit 2717444

File tree

1 file changed

+57
-7
lines changed

1 file changed

+57
-7
lines changed

lib/Transforms/IPO/PassManagerBuilder.cpp

+57-7
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,9 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
291291
void PassManagerBuilder::addFunctionSimplificationPasses(
292292
legacy::PassManagerBase &MPM) {
293293
// Start of function pass.
294+
295+
// **** Initial canonicalization sequence, clean up the code after inlining.
296+
294297
// Break up aggregate allocas, using SSAUpdater.
295298
MPM.add(createSROAPass());
296299
MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
@@ -299,21 +302,62 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
299302
MPM.add(createJumpThreadingPass()); // Thread jumps.
300303
MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
301304
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
302-
// Combine silly seq's
303-
addInstructionCombiningPass(MPM);
305+
306+
// **** After we have taken out the trash, we can do more expensive and
307+
// aggressive optimizations.
308+
//
309+
// I have no idea what is the best order of these passes.
310+
//
311+
// ($) there might be some `br i1 false` here that InstCombine discovered
312+
// that we might want to kill somehow. However, every SimplifyCfg or
313+
// JumpThreading I add takes 1% of the compiler's performance even if
314+
// it does nothing.
315+
//
316+
// I believe we could have some sort of "br i1 false"-removal pass
317+
// in strategic places, that should not be too slow. Ideally, in
318+
// 90% of the inter-pass transitions the pass would have
319+
// nothing to do and therefore be fast (there's an O(N*M) problem,
320+
// where for a large function we might get hit with the full
321+
// cost). That needs to be further investigated.
322+
323+
addInstructionCombiningPass(MPM); // Combine silly seq's
304324
if (SizeLevel == 0 && !DisableLibCallsShrinkWrap)
305325
MPM.add(createLibCallsShrinkWrapPass());
306326
addExtensionsToPM(EP_Peephole, MPM);
307-
308327
MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
328+
if (OptLevel > 1) {
329+
// Merge duplicate loads and do cross-BB load/store forwarding. This should
330+
// happen before the loop passes. This is done earlier than in C++ because
331+
// these optimizations are much more useful in Rust, because of noalias.
332+
MPM.add(NewGVN ? createNewGVNPass()
333+
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
334+
}
309335
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
336+
337+
// **** Loop optimizations. There are 2 loop optimization "sequences",
338+
// with an InstCombine+SimplifyCfg in the middle.
339+
340+
// Seq #1
341+
310342
MPM.add(createReassociatePass()); // Reassociate expressions
311343
// Rotate Loop - disable header duplication at -Oz
312344
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
313345
MPM.add(createLICMPass()); // Hoist loop invariants
346+
MPM.add(createIndVarSimplifyPass()); // Simplify Indvars
314347
MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
348+
349+
// Cleanup between seqs.
350+
315351
MPM.add(createCFGSimplificationPass());
316352
addInstructionCombiningPass(MPM);
353+
354+
// Seq #2
355+
356+
// I am intentionally duplicating IndVarSimplify. The SimplifyCfg pass after
357+
// the first IndVarSimplify gets rid of a bunch of junk that interferes
358+
// with loop idiom recognition, and the second IndVarSimplify was present
359+
// in C++ so I don't want to remove it much.
360+
317361
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
318362
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
319363
MPM.add(createLoopDeletionPass()); // Delete dead loops
@@ -325,25 +369,31 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
325369
MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops
326370
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
327371

372+
// End of loop optimization sequence.
373+
374+
// Optimization sequences I know we need:
375+
// UNROLL -> SIMPLIFY -> MEMCPYOPT -> INSTCOMBINE -> GVN - needed for
376+
377+
// Exit out of LCSSA, and do some cleanup after loop unrolling.
378+
MPM.add(createCFGSimplificationPass());
379+
380+
MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
328381
if (OptLevel > 1) {
329382
if (EnableMLSM)
330383
MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
331384
MPM.add(NewGVN ? createNewGVNPass()
332385
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
333386
}
334-
MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
335387
MPM.add(createSCCPPass()); // Constant prop with SCCP
336388

337389
// Delete dead bit computations (instcombine runs after to fold away the dead
338390
// computations, and then ADCE will run later to exploit any new DCE
339391
// opportunities that creates).
340392
MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations
341-
342393
// Run instcombine after redundancy elimination to exploit opportunities
343394
// opened up by them.
344395
addInstructionCombiningPass(MPM);
345-
if (OptLevel > 1)
346-
MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
396+
347397
addExtensionsToPM(EP_Peephole, MPM);
348398
MPM.add(createJumpThreadingPass()); // Thread jumps
349399
MPM.add(createCorrelatedValuePropagationPass());

0 commit comments

Comments
 (0)