@@ -7,6 +7,7 @@ use super::Stats;
77use crate :: dominator_tree:: DominatorTree ;
88use crate :: fx:: { FxHashMap , FxHashSet } ;
99use crate :: hash_map:: Entry as HashEntry ;
10+ use crate :: inst_predicates:: is_pure_for_egraph;
1011use crate :: ir:: { Block , Function , Inst , Value , ValueDef } ;
1112use crate :: loop_analysis:: { Loop , LoopAnalysis } ;
1213use crate :: scoped_hash_map:: ScopedHashMap ;
@@ -216,46 +217,112 @@ impl<'a> Elaborator<'a> {
216217
217218 fn compute_best_values ( & mut self ) {
218219 let best = & mut self . value_to_best_value ;
219- for ( value, def) in self . func . dfg . values_and_defs ( ) {
220- trace ! ( "computing best for value {:?} def {:?}" , value, def) ;
221- match def {
222- ValueDef :: Union ( x, y) => {
223- // Pick the best of the two options based on
224- // min-cost. This works because each element of `best`
225- // is a `(cost, value)` tuple; `cost` comes first so
226- // the natural comparison works based on cost, and
227- // breaks ties based on value number.
228- trace ! ( " -> best of {:?} and {:?}" , best[ x] , best[ y] ) ;
229- best[ value] = std:: cmp:: min ( best[ x] , best[ y] ) ;
230- trace ! ( " -> {:?}" , best[ value] ) ;
231- }
232- ValueDef :: Param ( _, _) => {
233- best[ value] = BestEntry ( Cost :: zero ( ) , value) ;
234- }
235- // If the Inst is inserted into the layout (which is,
236- // at this point, only the side-effecting skeleton),
237- // then it must be computed and thus we give it zero
238- // cost.
239- ValueDef :: Result ( inst, _) => {
240- if let Some ( _) = self . func . layout . inst_block ( inst) {
241- best[ value] = BestEntry ( Cost :: zero ( ) , value) ;
242- } else {
243- trace ! ( " -> value {}: result, computing cost" , value) ;
244- let inst_data = & self . func . dfg . insts [ inst] ;
245- // N.B.: at this point we know that the opcode is
246- // pure, so `pure_op_cost`'s precondition is
247- // satisfied.
248- let cost = Cost :: of_pure_op (
249- inst_data. opcode ( ) ,
250- self . func . dfg . inst_values ( inst) . map ( |value| best[ value] . 0 ) ,
220+
221+ // Do a fixpoint loop to compute the best value for each eclass.
222+ //
223+ // The maximum number of iterations is the length of the longest chain
224+ // of `vNN -> vMM` edges in the dataflow graph where `NN < MM`, so this
225+ // is *technically* quadratic, but `cranelift-frontend` won't construct
226+ // any such edges. NaN canonicalization will introduce some of these
227+ // edges, but they are chains of only two or three edges. So in
228+ // practice, we *never* do more than a handful of iterations here unless
229+ // (a) we parsed the CLIF from text and the text was funkily numbered,
230+ // which we don't really care about, or (b) the CLIF producer did
231+ // something weird, in which case it is their responsibility to stop
232+ // doing that.
233+ trace ! ( "Entering fixpoint loop to compute the best values for each eclass" ) ;
234+ let mut keep_going = true ;
235+ while keep_going {
236+ keep_going = false ;
237+ trace ! (
238+ "fixpoint iteration {}" ,
239+ self . stats. elaborate_best_cost_fixpoint_iters
240+ ) ;
241+ self . stats . elaborate_best_cost_fixpoint_iters += 1 ;
242+
243+ for ( value, def) in self . func . dfg . values_and_defs ( ) {
244+ trace ! ( "computing best for value {:?} def {:?}" , value, def) ;
245+ let orig_best_value = best[ value] ;
246+
247+ match def {
248+ ValueDef :: Union ( x, y) => {
249+ // Pick the best of the two options based on
250+ // min-cost. This works because each element of `best`
251+ // is a `(cost, value)` tuple; `cost` comes first so
252+ // the natural comparison works based on cost, and
253+ // breaks ties based on value number.
254+ best[ value] = std:: cmp:: min ( best[ x] , best[ y] ) ;
255+ trace ! (
256+ " -> best of union({:?}, {:?}) = {:?}" ,
257+ best[ x] ,
258+ best[ y] ,
259+ best[ value]
251260 ) ;
252- best[ value] = BestEntry ( cost, value) ;
253261 }
254- }
255- } ;
256- debug_assert_ne ! ( best[ value] . 0 , Cost :: infinity( ) ) ;
257- debug_assert_ne ! ( best[ value] . 1 , Value :: reserved_value( ) ) ;
258- trace ! ( "best for eclass {:?}: {:?}" , value, best[ value] ) ;
262+ ValueDef :: Param ( _, _) => {
263+ best[ value] = BestEntry ( Cost :: zero ( ) , value) ;
264+ }
265+ // If the Inst is inserted into the layout (which is,
266+ // at this point, only the side-effecting skeleton),
267+ // then it must be computed and thus we give it zero
268+ // cost.
269+ ValueDef :: Result ( inst, _) => {
270+ if let Some ( _) = self . func . layout . inst_block ( inst) {
271+ best[ value] = BestEntry ( Cost :: zero ( ) , value) ;
272+ } else {
273+ let inst_data = & self . func . dfg . insts [ inst] ;
274+ // N.B.: at this point we know that the opcode is
275+ // pure, so `pure_op_cost`'s precondition is
276+ // satisfied.
277+ let cost = Cost :: of_pure_op (
278+ inst_data. opcode ( ) ,
279+ self . func . dfg . inst_values ( inst) . map ( |value| best[ value] . 0 ) ,
280+ ) ;
281+ best[ value] = BestEntry ( cost, value) ;
282+ trace ! ( " -> cost of value {} = {:?}" , value, cost) ;
283+ }
284+ }
285+ } ;
286+
287+ // Keep on iterating the fixpoint loop while we are finding new
288+ // best values.
289+ keep_going |= orig_best_value != best[ value] ;
290+ }
291+ }
292+
293+ if cfg ! ( any( feature = "trace-log" , debug_assertions) ) {
294+ trace ! ( "finished fixpoint loop to compute best value for each eclass" ) ;
295+ for value in self . func . dfg . values ( ) {
296+ trace ! ( "-> best for eclass {:?}: {:?}" , value, best[ value] ) ;
297+ debug_assert_ne ! ( best[ value] . 1 , Value :: reserved_value( ) ) ;
298+ // You might additionally be expecting an assert that the best
299+ // cost is not infinity, however infinite cost *can* happen in
300+ // practice. First, note that our cost function doesn't know
301+ // about any shared structure in the dataflow graph, it only
302+ // sums operand costs. (And trying to avoid that by deduping a
303+ // single operation's operands is a losing game because you can
304+ // always just add one indirection and go from `add(x, x)` to
305+ // `add(foo(x), bar(x))` to hide the shared structure.) Given
306+ // that blindness to sharing, we can make cost grow
307+ // exponentially with a linear sequence of operations:
308+ //
309+ // v0 = iconst.i32 1 ;; cost = 1
310+ // v1 = iadd v0, v0 ;; cost = 3 + 1 + 1
311+ // v2 = iadd v1, v1 ;; cost = 3 + 5 + 5
312+ // v3 = iadd v2, v2 ;; cost = 3 + 13 + 13
313+ // v4 = iadd v3, v3 ;; cost = 3 + 29 + 29
314+ // v5 = iadd v4, v4 ;; cost = 3 + 61 + 61
315+ // v6 = iadd v5, v5 ;; cost = 3 + 125 + 125
316+ // ;; etc...
317+ //
318+ // Such a chain can cause cost to saturate to infinity. How do
319+ // we choose which e-node is best when there are multiple that
320+ // have saturated to infinity? It doesn't matter. As long as
321+ // invariant (2) for optimization rules is upheld by our rule
322+ // set (see `cranelift/codegen/src/opts/README.md`) it is safe
323+ // to choose *any* e-node in the e-class. At worst we will
324+ // produce suboptimal code, but never an incorrectness.
325+ }
259326 }
260327 }
261328
@@ -606,7 +673,13 @@ impl<'a> Elaborator<'a> {
606673 }
607674 inst
608675 } ;
676+
609677 // Place the inst just before `before`.
678+ debug_assert ! (
679+ is_pure_for_egraph( self . func, inst) ,
680+ "something has gone very wrong if we are elaborating effectful \
681+ instructions, they should have remained in the skeleton"
682+ ) ;
610683 self . func . layout . insert_inst ( inst, before) ;
611684
612685 // Update the inst's arguments.
0 commit comments