@@ -342,6 +342,14 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL, unsigned VF) {
342342 return DL.getTypeAllocSizeInBits (Ty) != DL.getTypeSizeInBits (Ty);
343343}
344344
345+ // / A helper function that returns the reciprocal of the block probability of
346+ // / predicated blocks. If we return X, we are assuming the predicated block
347+ // / will execute once for for every X iterations of the loop header.
348+ // /
349+ // / TODO: We should use actual block probability here, if available. Currently,
350+ // / we always assume predicated blocks have a 50% chance of executing.
351+ static unsigned getReciprocalPredBlockProb () { return 2 ; }
352+
345353// / InnerLoopVectorizer vectorizes loops which contain only one basic
346354// / block to a specified vectorization factor (VF).
347355// / This class performs the widening of scalars into vectors, or multiple
@@ -3554,12 +3562,11 @@ static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract,
35543562 }
35553563 }
35563564
3557- // We assume that if-converted blocks have a 50% chance of being executed.
3558- // Predicated scalarized instructions are avoided due to the CF that bypasses
3559- // turned off lanes. The extracts and inserts will be sinked/hoisted to the
3560- // predicated basic-block and are subjected to the same assumption.
3565+ // If we have a predicated instruction, it may not be executed for each
3566+ // vector lane. Scale the cost by the probability of executing the
3567+ // predicated block.
35613568 if (Predicated)
3562- Cost /= 2 ;
3569+ Cost /= getReciprocalPredBlockProb () ;
35633570
35643571 return Cost;
35653572}
@@ -6397,11 +6404,14 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) {
63976404 << VF << " For instruction: " << I << ' \n ' );
63986405 }
63996406
6400- // We assume that if-converted blocks have a 50% chance of being executed.
6401- // When the code is scalar then some of the blocks are avoided due to CF.
6402- // When the code is vectorized we execute all code paths.
6407+ // If we are vectorizing a predicated block, it will have been
6408+ // if-converted. This means that the block's instructions (aside from
6409+ // stores and instructions that may divide by zero) will now be
6410+ // unconditionally executed. For the scalar case, we may not always execute
6411+ // the predicated block. Thus, scale the block's cost by the probability of
6412+ // executing it.
64036413 if (VF == 1 && Legal->blockNeedsPredication (BB))
6404- BlockCost.first /= 2 ;
6414+ BlockCost.first /= getReciprocalPredBlockProb () ;
64056415
64066416 Cost.first += BlockCost.first ;
64076417 Cost.second |= BlockCost.second ;
@@ -6518,11 +6528,13 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
65186528 case Instruction::SDiv:
65196529 case Instruction::URem:
65206530 case Instruction::SRem:
6521- // We assume that if-converted blocks have a 50% chance of being executed.
6522- // Predicated scalarized instructions are avoided due to the CF that
6523- // bypasses turned off lanes. If we are not predicating, fallthrough.
6531+ // If we have a predicated instruction, it may not be executed for each
6532+ // vector lane. Get the scalarization cost and scale this amount by the
6533+ // probability of executing the predicated block. If the instruction is not
6534+ // predicated, we fall through to the next case.
65246535 if (VF > 1 && Legal->isScalarWithPredication (I))
6525- return VF * TTI.getArithmeticInstrCost (I->getOpcode (), RetTy) / 2 +
6536+ return VF * TTI.getArithmeticInstrCost (I->getOpcode (), RetTy) /
6537+ getReciprocalPredBlockProb () +
65266538 getScalarizationOverhead (I, VF, true , TTI);
65276539 case Instruction::Add:
65286540 case Instruction::FAdd:
0 commit comments