@@ -291,6 +291,9 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
291
291
void PassManagerBuilder::addFunctionSimplificationPasses (
292
292
legacy::PassManagerBase &MPM) {
293
293
// Start of function pass.
294
+
295
+ // **** Initial canonicalization sequence, clean up the code after inlining.
296
+
294
297
// Break up aggregate allocas, using SSAUpdater.
295
298
MPM.add (createSROAPass ());
296
299
MPM.add (createEarlyCSEPass ()); // Catch trivial redundancies
@@ -299,21 +302,62 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
299
302
MPM.add (createJumpThreadingPass ()); // Thread jumps.
300
303
MPM.add (createCorrelatedValuePropagationPass ()); // Propagate conditionals
301
304
MPM.add (createCFGSimplificationPass ()); // Merge & remove BBs
302
- // Combine silly seq's
303
- addInstructionCombiningPass (MPM);
305
+
306
+ // **** After we have taken out the trash, we can do more expensive and
307
+ // aggressive optimizations.
308
+ //
309
+ // I have no idea what is the best order of these passes.
310
+ //
311
+ // ($) there might be some `br i1 false` here that InstCombine discovered
312
+ // that we might want to kill somehow. However, every SimplifyCfg or
313
+ // JumpThreading I add takes 1% of the compiler's performance even if
314
+ // it does nothing.
315
+ //
316
+ // I believe we could have some sort of "br i1 false"-removal pass
317
+ // in strategic places, that should not be too slow. Ideally, in
318
+ // 90% of the inter-pass transitions the pass would have
319
+ // nothing to do and therefore be fast (there's an O(N*M) problem,
320
+ // where for a large function we might get hit with the full
321
+ // cost). That needs to be further investigated.
322
+
323
+ addInstructionCombiningPass (MPM); // Combine silly seq's
304
324
if (SizeLevel == 0 && !DisableLibCallsShrinkWrap)
305
325
MPM.add (createLibCallsShrinkWrapPass ());
306
326
addExtensionsToPM (EP_Peephole, MPM);
307
-
308
327
MPM.add (createTailCallEliminationPass ()); // Eliminate tail calls
328
+ if (OptLevel > 1 ) {
329
+ // Merge duplicate loads and do cross-BB load/store forwarding. This should
330
+ // happen before the loop passes. This is done earlier than in C++ because
331
+ // these optimizations are much more useful in Rust, because of noalias.
332
+ MPM.add (NewGVN ? createNewGVNPass ()
333
+ : createGVNPass (DisableGVNLoadPRE)); // Remove redundancies
334
+ }
309
335
MPM.add (createCFGSimplificationPass ()); // Merge & remove BBs
336
+
337
+ // **** Loop optimizations. There are 2 loop optimization "sequences",
338
+ // with an InstCombine+SimplifyCfg in the middle.
339
+
340
+ // Seq #1
341
+
310
342
MPM.add (createReassociatePass ()); // Reassociate expressions
311
343
// Rotate Loop - disable header duplication at -Oz
312
344
MPM.add (createLoopRotatePass (SizeLevel == 2 ? 0 : -1 ));
313
345
MPM.add (createLICMPass ()); // Hoist loop invariants
346
+ MPM.add (createIndVarSimplifyPass ()); // Simplify Indvars
314
347
MPM.add (createLoopUnswitchPass (SizeLevel || OptLevel < 3 ));
348
+
349
+ // Cleanup between seqs.
350
+
315
351
MPM.add (createCFGSimplificationPass ());
316
352
addInstructionCombiningPass (MPM);
353
+
354
+ // Seq #2
355
+
356
+ // I am intentionally duplicating IndVarSimplify. The SimplifyCfg pass after
357
+ // the first IndVarSimplify gets rid of a bunch of junk that interferes
358
+ // with loop idiom recognition, and the second IndVarSimplify was present
359
+ // in C++ so I don't want to remove it much.
360
+
317
361
MPM.add (createIndVarSimplifyPass ()); // Canonicalize indvars
318
362
MPM.add (createLoopIdiomPass ()); // Recognize idioms like memset.
319
363
MPM.add (createLoopDeletionPass ()); // Delete dead loops
@@ -325,25 +369,31 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
325
369
MPM.add (createSimpleLoopUnrollPass ()); // Unroll small loops
326
370
addExtensionsToPM (EP_LoopOptimizerEnd, MPM);
327
371
372
+ // End of loop optimization sequence.
373
+
374
+ // Optimization sequences I know we need:
375
+ // UNROLL -> SIMPLIFY -> MEMCPYOPT -> INSTCOMBINE -> GVN - needed for
376
+
377
+ // Exit out of LCSSA, and do some cleanup after loop unrolling.
378
+ MPM.add (createCFGSimplificationPass ());
379
+
380
+ MPM.add (createMemCpyOptPass ()); // Remove memcpy / form memset
328
381
if (OptLevel > 1 ) {
329
382
if (EnableMLSM)
330
383
MPM.add (createMergedLoadStoreMotionPass ()); // Merge ld/st in diamonds
331
384
MPM.add (NewGVN ? createNewGVNPass ()
332
385
: createGVNPass (DisableGVNLoadPRE)); // Remove redundancies
333
386
}
334
- MPM.add (createMemCpyOptPass ()); // Remove memcpy / form memset
335
387
MPM.add (createSCCPPass ()); // Constant prop with SCCP
336
388
337
389
// Delete dead bit computations (instcombine runs after to fold away the dead
338
390
// computations, and then ADCE will run later to exploit any new DCE
339
391
// opportunities that creates).
340
392
MPM.add (createBitTrackingDCEPass ()); // Delete dead bit computations
341
-
342
393
// Run instcombine after redundancy elimination to exploit opportunities
343
394
// opened up by them.
344
395
addInstructionCombiningPass (MPM);
345
- if (OptLevel > 1 )
346
- MPM.add (createGVNPass (DisableGVNLoadPRE)); // Remove redundancies
396
+
347
397
addExtensionsToPM (EP_Peephole, MPM);
348
398
MPM.add (createJumpThreadingPass ()); // Thread jumps
349
399
MPM.add (createCorrelatedValuePropagationPass ());
0 commit comments