xref: /aosp_15_r20/external/llvm/lib/CodeGen/MachineBlockPlacement.cpp (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker //===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker //                     The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker // This file implements basic block placement transformations using the CFG
11*9880d681SAndroid Build Coastguard Worker // structure and branch probability estimates.
12*9880d681SAndroid Build Coastguard Worker //
13*9880d681SAndroid Build Coastguard Worker // The pass strives to preserve the structure of the CFG (that is, retain
14*9880d681SAndroid Build Coastguard Worker // a topological ordering of basic blocks) in the absence of a *strong* signal
15*9880d681SAndroid Build Coastguard Worker // to the contrary from probabilities. However, within the CFG structure, it
16*9880d681SAndroid Build Coastguard Worker // attempts to choose an ordering which favors placing more likely sequences of
17*9880d681SAndroid Build Coastguard Worker // blocks adjacent to each other.
18*9880d681SAndroid Build Coastguard Worker //
19*9880d681SAndroid Build Coastguard Worker // The algorithm works from the inner-most loop within a function outward, and
20*9880d681SAndroid Build Coastguard Worker // at each stage walks through the basic blocks, trying to coalesce them into
21*9880d681SAndroid Build Coastguard Worker // sequential chains where allowed by the CFG (or demanded by heavy
22*9880d681SAndroid Build Coastguard Worker // probabilities). Finally, it walks the blocks in topological order, and the
23*9880d681SAndroid Build Coastguard Worker // first time it reaches a chain of basic blocks, it schedules them in the
24*9880d681SAndroid Build Coastguard Worker // function in-order.
25*9880d681SAndroid Build Coastguard Worker //
26*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
27*9880d681SAndroid Build Coastguard Worker 
28*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/Passes.h"
29*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/TargetPassConfig.h"
30*9880d681SAndroid Build Coastguard Worker #include "BranchFolding.h"
31*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/DenseMap.h"
32*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/SmallPtrSet.h"
33*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/SmallVector.h"
34*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/Statistic.h"
35*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineBasicBlock.h"
36*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
37*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
38*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineDominators.h"
39*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunction.h"
40*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunctionPass.h"
41*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineLoopInfo.h"
42*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineModuleInfo.h"
43*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Allocator.h"
44*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/CommandLine.h"
45*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Debug.h"
46*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/raw_ostream.h"
47*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetInstrInfo.h"
48*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetLowering.h"
49*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetSubtargetInfo.h"
50*9880d681SAndroid Build Coastguard Worker #include <algorithm>
51*9880d681SAndroid Build Coastguard Worker using namespace llvm;
52*9880d681SAndroid Build Coastguard Worker 
53*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "block-placement"
54*9880d681SAndroid Build Coastguard Worker 
55*9880d681SAndroid Build Coastguard Worker STATISTIC(NumCondBranches, "Number of conditional branches");
56*9880d681SAndroid Build Coastguard Worker STATISTIC(NumUncondBranches, "Number of unconditional branches");
57*9880d681SAndroid Build Coastguard Worker STATISTIC(CondBranchTakenFreq,
58*9880d681SAndroid Build Coastguard Worker           "Potential frequency of taking conditional branches");
59*9880d681SAndroid Build Coastguard Worker STATISTIC(UncondBranchTakenFreq,
60*9880d681SAndroid Build Coastguard Worker           "Potential frequency of taking unconditional branches");
61*9880d681SAndroid Build Coastguard Worker 
62*9880d681SAndroid Build Coastguard Worker static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
63*9880d681SAndroid Build Coastguard Worker                                        cl::desc("Force the alignment of all "
64*9880d681SAndroid Build Coastguard Worker                                                 "blocks in the function."),
65*9880d681SAndroid Build Coastguard Worker                                        cl::init(0), cl::Hidden);
66*9880d681SAndroid Build Coastguard Worker 
67*9880d681SAndroid Build Coastguard Worker static cl::opt<unsigned> AlignAllNonFallThruBlocks(
68*9880d681SAndroid Build Coastguard Worker     "align-all-nofallthru-blocks",
69*9880d681SAndroid Build Coastguard Worker     cl::desc("Force the alignment of all "
70*9880d681SAndroid Build Coastguard Worker              "blocks that have no fall-through predecessors (i.e. don't add "
71*9880d681SAndroid Build Coastguard Worker              "nops that are executed)."),
72*9880d681SAndroid Build Coastguard Worker     cl::init(0), cl::Hidden);
73*9880d681SAndroid Build Coastguard Worker 
74*9880d681SAndroid Build Coastguard Worker // FIXME: Find a good default for this flag and remove the flag.
75*9880d681SAndroid Build Coastguard Worker static cl::opt<unsigned> ExitBlockBias(
76*9880d681SAndroid Build Coastguard Worker     "block-placement-exit-block-bias",
77*9880d681SAndroid Build Coastguard Worker     cl::desc("Block frequency percentage a loop exit block needs "
78*9880d681SAndroid Build Coastguard Worker              "over the original exit to be considered the new exit."),
79*9880d681SAndroid Build Coastguard Worker     cl::init(0), cl::Hidden);
80*9880d681SAndroid Build Coastguard Worker 
81*9880d681SAndroid Build Coastguard Worker static cl::opt<bool> OutlineOptionalBranches(
82*9880d681SAndroid Build Coastguard Worker     "outline-optional-branches",
83*9880d681SAndroid Build Coastguard Worker     cl::desc("Put completely optional branches, i.e. branches with a common "
84*9880d681SAndroid Build Coastguard Worker              "post dominator, out of line."),
85*9880d681SAndroid Build Coastguard Worker     cl::init(false), cl::Hidden);
86*9880d681SAndroid Build Coastguard Worker 
87*9880d681SAndroid Build Coastguard Worker static cl::opt<unsigned> OutlineOptionalThreshold(
88*9880d681SAndroid Build Coastguard Worker     "outline-optional-threshold",
89*9880d681SAndroid Build Coastguard Worker     cl::desc("Don't outline optional branches that are a single block with an "
90*9880d681SAndroid Build Coastguard Worker              "instruction count below this threshold"),
91*9880d681SAndroid Build Coastguard Worker     cl::init(4), cl::Hidden);
92*9880d681SAndroid Build Coastguard Worker 
93*9880d681SAndroid Build Coastguard Worker static cl::opt<unsigned> LoopToColdBlockRatio(
94*9880d681SAndroid Build Coastguard Worker     "loop-to-cold-block-ratio",
95*9880d681SAndroid Build Coastguard Worker     cl::desc("Outline loop blocks from loop chain if (frequency of loop) / "
96*9880d681SAndroid Build Coastguard Worker              "(frequency of block) is greater than this ratio"),
97*9880d681SAndroid Build Coastguard Worker     cl::init(5), cl::Hidden);
98*9880d681SAndroid Build Coastguard Worker 
99*9880d681SAndroid Build Coastguard Worker static cl::opt<bool>
100*9880d681SAndroid Build Coastguard Worker     PreciseRotationCost("precise-rotation-cost",
101*9880d681SAndroid Build Coastguard Worker                         cl::desc("Model the cost of loop rotation more "
102*9880d681SAndroid Build Coastguard Worker                                  "precisely by using profile data."),
103*9880d681SAndroid Build Coastguard Worker                         cl::init(false), cl::Hidden);
104*9880d681SAndroid Build Coastguard Worker static cl::opt<bool>
105*9880d681SAndroid Build Coastguard Worker     ForcePreciseRotationCost("force-precise-rotation-cost",
106*9880d681SAndroid Build Coastguard Worker                              cl::desc("Force the use of precise cost "
107*9880d681SAndroid Build Coastguard Worker                                       "loop rotation strategy."),
108*9880d681SAndroid Build Coastguard Worker                              cl::init(false), cl::Hidden);
109*9880d681SAndroid Build Coastguard Worker 
110*9880d681SAndroid Build Coastguard Worker static cl::opt<unsigned> MisfetchCost(
111*9880d681SAndroid Build Coastguard Worker     "misfetch-cost",
112*9880d681SAndroid Build Coastguard Worker     cl::desc("Cost that models the probablistic risk of an instruction "
113*9880d681SAndroid Build Coastguard Worker              "misfetch due to a jump comparing to falling through, whose cost "
114*9880d681SAndroid Build Coastguard Worker              "is zero."),
115*9880d681SAndroid Build Coastguard Worker     cl::init(1), cl::Hidden);
116*9880d681SAndroid Build Coastguard Worker 
117*9880d681SAndroid Build Coastguard Worker static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
118*9880d681SAndroid Build Coastguard Worker                                       cl::desc("Cost of jump instructions."),
119*9880d681SAndroid Build Coastguard Worker                                       cl::init(1), cl::Hidden);
120*9880d681SAndroid Build Coastguard Worker 
121*9880d681SAndroid Build Coastguard Worker static cl::opt<bool>
122*9880d681SAndroid Build Coastguard Worker BranchFoldPlacement("branch-fold-placement",
123*9880d681SAndroid Build Coastguard Worker               cl::desc("Perform branch folding during placement. "
124*9880d681SAndroid Build Coastguard Worker                        "Reduces code size."),
125*9880d681SAndroid Build Coastguard Worker               cl::init(true), cl::Hidden);
126*9880d681SAndroid Build Coastguard Worker 
127*9880d681SAndroid Build Coastguard Worker extern cl::opt<unsigned> StaticLikelyProb;
128*9880d681SAndroid Build Coastguard Worker extern cl::opt<unsigned> ProfileLikelyProb;
129*9880d681SAndroid Build Coastguard Worker 
130*9880d681SAndroid Build Coastguard Worker namespace {
131*9880d681SAndroid Build Coastguard Worker class BlockChain;
132*9880d681SAndroid Build Coastguard Worker /// \brief Type for our function-wide basic block -> block chain mapping.
133*9880d681SAndroid Build Coastguard Worker typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
134*9880d681SAndroid Build Coastguard Worker }
135*9880d681SAndroid Build Coastguard Worker 
136*9880d681SAndroid Build Coastguard Worker namespace {
137*9880d681SAndroid Build Coastguard Worker /// \brief A chain of blocks which will be laid out contiguously.
138*9880d681SAndroid Build Coastguard Worker ///
139*9880d681SAndroid Build Coastguard Worker /// This is the datastructure representing a chain of consecutive blocks that
140*9880d681SAndroid Build Coastguard Worker /// are profitable to layout together in order to maximize fallthrough
141*9880d681SAndroid Build Coastguard Worker /// probabilities and code locality. We also can use a block chain to represent
142*9880d681SAndroid Build Coastguard Worker /// a sequence of basic blocks which have some external (correctness)
143*9880d681SAndroid Build Coastguard Worker /// requirement for sequential layout.
144*9880d681SAndroid Build Coastguard Worker ///
145*9880d681SAndroid Build Coastguard Worker /// Chains can be built around a single basic block and can be merged to grow
146*9880d681SAndroid Build Coastguard Worker /// them. They participate in a block-to-chain mapping, which is updated
147*9880d681SAndroid Build Coastguard Worker /// automatically as chains are merged together.
148*9880d681SAndroid Build Coastguard Worker class BlockChain {
149*9880d681SAndroid Build Coastguard Worker   /// \brief The sequence of blocks belonging to this chain.
150*9880d681SAndroid Build Coastguard Worker   ///
151*9880d681SAndroid Build Coastguard Worker   /// This is the sequence of blocks for a particular chain. These will be laid
152*9880d681SAndroid Build Coastguard Worker   /// out in-order within the function.
153*9880d681SAndroid Build Coastguard Worker   SmallVector<MachineBasicBlock *, 4> Blocks;
154*9880d681SAndroid Build Coastguard Worker 
155*9880d681SAndroid Build Coastguard Worker   /// \brief A handle to the function-wide basic block to block chain mapping.
156*9880d681SAndroid Build Coastguard Worker   ///
157*9880d681SAndroid Build Coastguard Worker   /// This is retained in each block chain to simplify the computation of child
158*9880d681SAndroid Build Coastguard Worker   /// block chains for SCC-formation and iteration. We store the edges to child
159*9880d681SAndroid Build Coastguard Worker   /// basic blocks, and map them back to their associated chains using this
160*9880d681SAndroid Build Coastguard Worker   /// structure.
161*9880d681SAndroid Build Coastguard Worker   BlockToChainMapType &BlockToChain;
162*9880d681SAndroid Build Coastguard Worker 
163*9880d681SAndroid Build Coastguard Worker public:
164*9880d681SAndroid Build Coastguard Worker   /// \brief Construct a new BlockChain.
165*9880d681SAndroid Build Coastguard Worker   ///
166*9880d681SAndroid Build Coastguard Worker   /// This builds a new block chain representing a single basic block in the
167*9880d681SAndroid Build Coastguard Worker   /// function. It also registers itself as the chain that block participates
168*9880d681SAndroid Build Coastguard Worker   /// in with the BlockToChain mapping.
BlockChain(BlockToChainMapType & BlockToChain,MachineBasicBlock * BB)169*9880d681SAndroid Build Coastguard Worker   BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
170*9880d681SAndroid Build Coastguard Worker       : Blocks(1, BB), BlockToChain(BlockToChain), UnscheduledPredecessors(0) {
171*9880d681SAndroid Build Coastguard Worker     assert(BB && "Cannot create a chain with a null basic block");
172*9880d681SAndroid Build Coastguard Worker     BlockToChain[BB] = this;
173*9880d681SAndroid Build Coastguard Worker   }
174*9880d681SAndroid Build Coastguard Worker 
175*9880d681SAndroid Build Coastguard Worker   /// \brief Iterator over blocks within the chain.
176*9880d681SAndroid Build Coastguard Worker   typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator;
177*9880d681SAndroid Build Coastguard Worker 
178*9880d681SAndroid Build Coastguard Worker   /// \brief Beginning of blocks within the chain.
begin()179*9880d681SAndroid Build Coastguard Worker   iterator begin() { return Blocks.begin(); }
180*9880d681SAndroid Build Coastguard Worker 
181*9880d681SAndroid Build Coastguard Worker   /// \brief End of blocks within the chain.
end()182*9880d681SAndroid Build Coastguard Worker   iterator end() { return Blocks.end(); }
183*9880d681SAndroid Build Coastguard Worker 
184*9880d681SAndroid Build Coastguard Worker   /// \brief Merge a block chain into this one.
185*9880d681SAndroid Build Coastguard Worker   ///
186*9880d681SAndroid Build Coastguard Worker   /// This routine merges a block chain into this one. It takes care of forming
187*9880d681SAndroid Build Coastguard Worker   /// a contiguous sequence of basic blocks, updating the edge list, and
188*9880d681SAndroid Build Coastguard Worker   /// updating the block -> chain mapping. It does not free or tear down the
189*9880d681SAndroid Build Coastguard Worker   /// old chain, but the old chain's block list is no longer valid.
merge(MachineBasicBlock * BB,BlockChain * Chain)190*9880d681SAndroid Build Coastguard Worker   void merge(MachineBasicBlock *BB, BlockChain *Chain) {
191*9880d681SAndroid Build Coastguard Worker     assert(BB);
192*9880d681SAndroid Build Coastguard Worker     assert(!Blocks.empty());
193*9880d681SAndroid Build Coastguard Worker 
194*9880d681SAndroid Build Coastguard Worker     // Fast path in case we don't have a chain already.
195*9880d681SAndroid Build Coastguard Worker     if (!Chain) {
196*9880d681SAndroid Build Coastguard Worker       assert(!BlockToChain[BB]);
197*9880d681SAndroid Build Coastguard Worker       Blocks.push_back(BB);
198*9880d681SAndroid Build Coastguard Worker       BlockToChain[BB] = this;
199*9880d681SAndroid Build Coastguard Worker       return;
200*9880d681SAndroid Build Coastguard Worker     }
201*9880d681SAndroid Build Coastguard Worker 
202*9880d681SAndroid Build Coastguard Worker     assert(BB == *Chain->begin());
203*9880d681SAndroid Build Coastguard Worker     assert(Chain->begin() != Chain->end());
204*9880d681SAndroid Build Coastguard Worker 
205*9880d681SAndroid Build Coastguard Worker     // Update the incoming blocks to point to this chain, and add them to the
206*9880d681SAndroid Build Coastguard Worker     // chain structure.
207*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock *ChainBB : *Chain) {
208*9880d681SAndroid Build Coastguard Worker       Blocks.push_back(ChainBB);
209*9880d681SAndroid Build Coastguard Worker       assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain");
210*9880d681SAndroid Build Coastguard Worker       BlockToChain[ChainBB] = this;
211*9880d681SAndroid Build Coastguard Worker     }
212*9880d681SAndroid Build Coastguard Worker   }
213*9880d681SAndroid Build Coastguard Worker 
214*9880d681SAndroid Build Coastguard Worker #ifndef NDEBUG
215*9880d681SAndroid Build Coastguard Worker   /// \brief Dump the blocks in this chain.
dump()216*9880d681SAndroid Build Coastguard Worker   LLVM_DUMP_METHOD void dump() {
217*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock *MBB : *this)
218*9880d681SAndroid Build Coastguard Worker       MBB->dump();
219*9880d681SAndroid Build Coastguard Worker   }
220*9880d681SAndroid Build Coastguard Worker #endif // NDEBUG
221*9880d681SAndroid Build Coastguard Worker 
222*9880d681SAndroid Build Coastguard Worker   /// \brief Count of predecessors of any block within the chain which have not
223*9880d681SAndroid Build Coastguard Worker   /// yet been scheduled.  In general, we will delay scheduling this chain
224*9880d681SAndroid Build Coastguard Worker   /// until those predecessors are scheduled (or we find a sufficiently good
225*9880d681SAndroid Build Coastguard Worker   /// reason to override this heuristic.)  Note that when forming loop chains,
226*9880d681SAndroid Build Coastguard Worker   /// blocks outside the loop are ignored and treated as if they were already
227*9880d681SAndroid Build Coastguard Worker   /// scheduled.
228*9880d681SAndroid Build Coastguard Worker   ///
229*9880d681SAndroid Build Coastguard Worker   /// Note: This field is reinitialized multiple times - once for each loop,
230*9880d681SAndroid Build Coastguard Worker   /// and then once for the function as a whole.
231*9880d681SAndroid Build Coastguard Worker   unsigned UnscheduledPredecessors;
232*9880d681SAndroid Build Coastguard Worker };
233*9880d681SAndroid Build Coastguard Worker }
234*9880d681SAndroid Build Coastguard Worker 
235*9880d681SAndroid Build Coastguard Worker namespace {
236*9880d681SAndroid Build Coastguard Worker class MachineBlockPlacement : public MachineFunctionPass {
237*9880d681SAndroid Build Coastguard Worker   /// \brief A typedef for a block filter set.
238*9880d681SAndroid Build Coastguard Worker   typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet;
239*9880d681SAndroid Build Coastguard Worker 
240*9880d681SAndroid Build Coastguard Worker   /// \brief work lists of blocks that are ready to be laid out
241*9880d681SAndroid Build Coastguard Worker   SmallVector<MachineBasicBlock *, 16> BlockWorkList;
242*9880d681SAndroid Build Coastguard Worker   SmallVector<MachineBasicBlock *, 16> EHPadWorkList;
243*9880d681SAndroid Build Coastguard Worker 
244*9880d681SAndroid Build Coastguard Worker   /// \brief Machine Function
245*9880d681SAndroid Build Coastguard Worker   MachineFunction *F;
246*9880d681SAndroid Build Coastguard Worker 
247*9880d681SAndroid Build Coastguard Worker   /// \brief A handle to the branch probability pass.
248*9880d681SAndroid Build Coastguard Worker   const MachineBranchProbabilityInfo *MBPI;
249*9880d681SAndroid Build Coastguard Worker 
250*9880d681SAndroid Build Coastguard Worker   /// \brief A handle to the function-wide block frequency pass.
251*9880d681SAndroid Build Coastguard Worker   std::unique_ptr<BranchFolder::MBFIWrapper> MBFI;
252*9880d681SAndroid Build Coastguard Worker 
253*9880d681SAndroid Build Coastguard Worker   /// \brief A handle to the loop info.
254*9880d681SAndroid Build Coastguard Worker   MachineLoopInfo *MLI;
255*9880d681SAndroid Build Coastguard Worker 
256*9880d681SAndroid Build Coastguard Worker   /// \brief A handle to the target's instruction info.
257*9880d681SAndroid Build Coastguard Worker   const TargetInstrInfo *TII;
258*9880d681SAndroid Build Coastguard Worker 
259*9880d681SAndroid Build Coastguard Worker   /// \brief A handle to the target's lowering info.
260*9880d681SAndroid Build Coastguard Worker   const TargetLoweringBase *TLI;
261*9880d681SAndroid Build Coastguard Worker 
262*9880d681SAndroid Build Coastguard Worker   /// \brief A handle to the post dominator tree.
263*9880d681SAndroid Build Coastguard Worker   MachineDominatorTree *MDT;
264*9880d681SAndroid Build Coastguard Worker 
265*9880d681SAndroid Build Coastguard Worker   /// \brief A set of blocks that are unavoidably execute, i.e. they dominate
266*9880d681SAndroid Build Coastguard Worker   /// all terminators of the MachineFunction.
267*9880d681SAndroid Build Coastguard Worker   SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
268*9880d681SAndroid Build Coastguard Worker 
269*9880d681SAndroid Build Coastguard Worker   /// \brief Allocator and owner of BlockChain structures.
270*9880d681SAndroid Build Coastguard Worker   ///
271*9880d681SAndroid Build Coastguard Worker   /// We build BlockChains lazily while processing the loop structure of
272*9880d681SAndroid Build Coastguard Worker   /// a function. To reduce malloc traffic, we allocate them using this
273*9880d681SAndroid Build Coastguard Worker   /// slab-like allocator, and destroy them after the pass completes. An
274*9880d681SAndroid Build Coastguard Worker   /// important guarantee is that this allocator produces stable pointers to
275*9880d681SAndroid Build Coastguard Worker   /// the chains.
276*9880d681SAndroid Build Coastguard Worker   SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
277*9880d681SAndroid Build Coastguard Worker 
278*9880d681SAndroid Build Coastguard Worker   /// \brief Function wide BasicBlock to BlockChain mapping.
279*9880d681SAndroid Build Coastguard Worker   ///
280*9880d681SAndroid Build Coastguard Worker   /// This mapping allows efficiently moving from any given basic block to the
281*9880d681SAndroid Build Coastguard Worker   /// BlockChain it participates in, if any. We use it to, among other things,
282*9880d681SAndroid Build Coastguard Worker   /// allow implicitly defining edges between chains as the existing edges
283*9880d681SAndroid Build Coastguard Worker   /// between basic blocks.
284*9880d681SAndroid Build Coastguard Worker   DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
285*9880d681SAndroid Build Coastguard Worker 
286*9880d681SAndroid Build Coastguard Worker   void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
287*9880d681SAndroid Build Coastguard Worker                            const BlockFilterSet *BlockFilter = nullptr);
288*9880d681SAndroid Build Coastguard Worker   BranchProbability
289*9880d681SAndroid Build Coastguard Worker   collectViableSuccessors(MachineBasicBlock *BB, BlockChain &Chain,
290*9880d681SAndroid Build Coastguard Worker                           const BlockFilterSet *BlockFilter,
291*9880d681SAndroid Build Coastguard Worker                           SmallVector<MachineBasicBlock *, 4> &Successors);
292*9880d681SAndroid Build Coastguard Worker   bool shouldPredBlockBeOutlined(MachineBasicBlock *BB, MachineBasicBlock *Succ,
293*9880d681SAndroid Build Coastguard Worker                                  BlockChain &Chain,
294*9880d681SAndroid Build Coastguard Worker                                  const BlockFilterSet *BlockFilter,
295*9880d681SAndroid Build Coastguard Worker                                  BranchProbability SuccProb,
296*9880d681SAndroid Build Coastguard Worker                                  BranchProbability HotProb);
297*9880d681SAndroid Build Coastguard Worker   bool
298*9880d681SAndroid Build Coastguard Worker   hasBetterLayoutPredecessor(MachineBasicBlock *BB, MachineBasicBlock *Succ,
299*9880d681SAndroid Build Coastguard Worker                              BlockChain &SuccChain, BranchProbability SuccProb,
300*9880d681SAndroid Build Coastguard Worker                              BranchProbability RealSuccProb, BlockChain &Chain,
301*9880d681SAndroid Build Coastguard Worker                              const BlockFilterSet *BlockFilter);
302*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
303*9880d681SAndroid Build Coastguard Worker                                          BlockChain &Chain,
304*9880d681SAndroid Build Coastguard Worker                                          const BlockFilterSet *BlockFilter);
305*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *
306*9880d681SAndroid Build Coastguard Worker   selectBestCandidateBlock(BlockChain &Chain,
307*9880d681SAndroid Build Coastguard Worker                            SmallVectorImpl<MachineBasicBlock *> &WorkList);
308*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *
309*9880d681SAndroid Build Coastguard Worker   getFirstUnplacedBlock(const BlockChain &PlacedChain,
310*9880d681SAndroid Build Coastguard Worker                         MachineFunction::iterator &PrevUnplacedBlockIt,
311*9880d681SAndroid Build Coastguard Worker                         const BlockFilterSet *BlockFilter);
312*9880d681SAndroid Build Coastguard Worker 
313*9880d681SAndroid Build Coastguard Worker   /// \brief Add a basic block to the work list if it is apropriate.
314*9880d681SAndroid Build Coastguard Worker   ///
315*9880d681SAndroid Build Coastguard Worker   /// If the optional parameter BlockFilter is provided, only MBB
316*9880d681SAndroid Build Coastguard Worker   /// present in the set will be added to the worklist. If nullptr
317*9880d681SAndroid Build Coastguard Worker   /// is provided, no filtering occurs.
318*9880d681SAndroid Build Coastguard Worker   void fillWorkLists(MachineBasicBlock *MBB,
319*9880d681SAndroid Build Coastguard Worker                      SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
320*9880d681SAndroid Build Coastguard Worker                      const BlockFilterSet *BlockFilter);
321*9880d681SAndroid Build Coastguard Worker   void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
322*9880d681SAndroid Build Coastguard Worker                   const BlockFilterSet *BlockFilter = nullptr);
323*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *findBestLoopTop(MachineLoop &L,
324*9880d681SAndroid Build Coastguard Worker                                      const BlockFilterSet &LoopBlockSet);
325*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *findBestLoopExit(MachineLoop &L,
326*9880d681SAndroid Build Coastguard Worker                                       const BlockFilterSet &LoopBlockSet);
327*9880d681SAndroid Build Coastguard Worker   BlockFilterSet collectLoopBlockSet(MachineLoop &L);
328*9880d681SAndroid Build Coastguard Worker   void buildLoopChains(MachineLoop &L);
329*9880d681SAndroid Build Coastguard Worker   void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
330*9880d681SAndroid Build Coastguard Worker                   const BlockFilterSet &LoopBlockSet);
331*9880d681SAndroid Build Coastguard Worker   void rotateLoopWithProfile(BlockChain &LoopChain, MachineLoop &L,
332*9880d681SAndroid Build Coastguard Worker                              const BlockFilterSet &LoopBlockSet);
333*9880d681SAndroid Build Coastguard Worker   void collectMustExecuteBBs();
334*9880d681SAndroid Build Coastguard Worker   void buildCFGChains();
335*9880d681SAndroid Build Coastguard Worker   void optimizeBranches();
336*9880d681SAndroid Build Coastguard Worker   void alignBlocks();
337*9880d681SAndroid Build Coastguard Worker 
338*9880d681SAndroid Build Coastguard Worker public:
339*9880d681SAndroid Build Coastguard Worker   static char ID; // Pass identification, replacement for typeid
MachineBlockPlacement()340*9880d681SAndroid Build Coastguard Worker   MachineBlockPlacement() : MachineFunctionPass(ID) {
341*9880d681SAndroid Build Coastguard Worker     initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
342*9880d681SAndroid Build Coastguard Worker   }
343*9880d681SAndroid Build Coastguard Worker 
344*9880d681SAndroid Build Coastguard Worker   bool runOnMachineFunction(MachineFunction &F) override;
345*9880d681SAndroid Build Coastguard Worker 
getAnalysisUsage(AnalysisUsage & AU) const346*9880d681SAndroid Build Coastguard Worker   void getAnalysisUsage(AnalysisUsage &AU) const override {
347*9880d681SAndroid Build Coastguard Worker     AU.addRequired<MachineBranchProbabilityInfo>();
348*9880d681SAndroid Build Coastguard Worker     AU.addRequired<MachineBlockFrequencyInfo>();
349*9880d681SAndroid Build Coastguard Worker     AU.addRequired<MachineDominatorTree>();
350*9880d681SAndroid Build Coastguard Worker     AU.addRequired<MachineLoopInfo>();
351*9880d681SAndroid Build Coastguard Worker     AU.addRequired<TargetPassConfig>();
352*9880d681SAndroid Build Coastguard Worker     MachineFunctionPass::getAnalysisUsage(AU);
353*9880d681SAndroid Build Coastguard Worker   }
354*9880d681SAndroid Build Coastguard Worker };
355*9880d681SAndroid Build Coastguard Worker }
356*9880d681SAndroid Build Coastguard Worker 
357*9880d681SAndroid Build Coastguard Worker char MachineBlockPlacement::ID = 0;
358*9880d681SAndroid Build Coastguard Worker char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
359*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement",
360*9880d681SAndroid Build Coastguard Worker                       "Branch Probability Basic Block Placement", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)361*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
362*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
363*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
364*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
365*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement",
366*9880d681SAndroid Build Coastguard Worker                     "Branch Probability Basic Block Placement", false, false)
367*9880d681SAndroid Build Coastguard Worker 
368*9880d681SAndroid Build Coastguard Worker #ifndef NDEBUG
369*9880d681SAndroid Build Coastguard Worker /// \brief Helper to print the name of a MBB.
370*9880d681SAndroid Build Coastguard Worker ///
371*9880d681SAndroid Build Coastguard Worker /// Only used by debug logging.
372*9880d681SAndroid Build Coastguard Worker static std::string getBlockName(MachineBasicBlock *BB) {
373*9880d681SAndroid Build Coastguard Worker   std::string Result;
374*9880d681SAndroid Build Coastguard Worker   raw_string_ostream OS(Result);
375*9880d681SAndroid Build Coastguard Worker   OS << "BB#" << BB->getNumber();
376*9880d681SAndroid Build Coastguard Worker   OS << " ('" << BB->getName() << "')";
377*9880d681SAndroid Build Coastguard Worker   OS.flush();
378*9880d681SAndroid Build Coastguard Worker   return Result;
379*9880d681SAndroid Build Coastguard Worker }
380*9880d681SAndroid Build Coastguard Worker #endif
381*9880d681SAndroid Build Coastguard Worker 
382*9880d681SAndroid Build Coastguard Worker /// \brief Mark a chain's successors as having one fewer preds.
383*9880d681SAndroid Build Coastguard Worker ///
384*9880d681SAndroid Build Coastguard Worker /// When a chain is being merged into the "placed" chain, this routine will
385*9880d681SAndroid Build Coastguard Worker /// quickly walk the successors of each block in the chain and mark them as
386*9880d681SAndroid Build Coastguard Worker /// having one fewer active predecessor. It also adds any successors of this
387*9880d681SAndroid Build Coastguard Worker /// chain which reach the zero-predecessor state to the worklist passed in.
markChainSuccessors(BlockChain & Chain,MachineBasicBlock * LoopHeaderBB,const BlockFilterSet * BlockFilter)388*9880d681SAndroid Build Coastguard Worker void MachineBlockPlacement::markChainSuccessors(
389*9880d681SAndroid Build Coastguard Worker     BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
390*9880d681SAndroid Build Coastguard Worker     const BlockFilterSet *BlockFilter) {
391*9880d681SAndroid Build Coastguard Worker   // Walk all the blocks in this chain, marking their successors as having
392*9880d681SAndroid Build Coastguard Worker   // a predecessor placed.
393*9880d681SAndroid Build Coastguard Worker   for (MachineBasicBlock *MBB : Chain) {
394*9880d681SAndroid Build Coastguard Worker     // Add any successors for which this is the only un-placed in-loop
395*9880d681SAndroid Build Coastguard Worker     // predecessor to the worklist as a viable candidate for CFG-neutral
396*9880d681SAndroid Build Coastguard Worker     // placement. No subsequent placement of this block will violate the CFG
397*9880d681SAndroid Build Coastguard Worker     // shape, so we get to use heuristics to choose a favorable placement.
398*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock *Succ : MBB->successors()) {
399*9880d681SAndroid Build Coastguard Worker       if (BlockFilter && !BlockFilter->count(Succ))
400*9880d681SAndroid Build Coastguard Worker         continue;
401*9880d681SAndroid Build Coastguard Worker       BlockChain &SuccChain = *BlockToChain[Succ];
402*9880d681SAndroid Build Coastguard Worker       // Disregard edges within a fixed chain, or edges to the loop header.
403*9880d681SAndroid Build Coastguard Worker       if (&Chain == &SuccChain || Succ == LoopHeaderBB)
404*9880d681SAndroid Build Coastguard Worker         continue;
405*9880d681SAndroid Build Coastguard Worker 
406*9880d681SAndroid Build Coastguard Worker       // This is a cross-chain edge that is within the loop, so decrement the
407*9880d681SAndroid Build Coastguard Worker       // loop predecessor count of the destination chain.
408*9880d681SAndroid Build Coastguard Worker       if (SuccChain.UnscheduledPredecessors == 0 ||
409*9880d681SAndroid Build Coastguard Worker           --SuccChain.UnscheduledPredecessors > 0)
410*9880d681SAndroid Build Coastguard Worker         continue;
411*9880d681SAndroid Build Coastguard Worker 
412*9880d681SAndroid Build Coastguard Worker       auto *MBB = *SuccChain.begin();
413*9880d681SAndroid Build Coastguard Worker       if (MBB->isEHPad())
414*9880d681SAndroid Build Coastguard Worker         EHPadWorkList.push_back(MBB);
415*9880d681SAndroid Build Coastguard Worker       else
416*9880d681SAndroid Build Coastguard Worker         BlockWorkList.push_back(MBB);
417*9880d681SAndroid Build Coastguard Worker     }
418*9880d681SAndroid Build Coastguard Worker   }
419*9880d681SAndroid Build Coastguard Worker }
420*9880d681SAndroid Build Coastguard Worker 
421*9880d681SAndroid Build Coastguard Worker /// This helper function collects the set of successors of block
422*9880d681SAndroid Build Coastguard Worker /// \p BB that are allowed to be its layout successors, and return
423*9880d681SAndroid Build Coastguard Worker /// the total branch probability of edges from \p BB to those
424*9880d681SAndroid Build Coastguard Worker /// blocks.
collectViableSuccessors(MachineBasicBlock * BB,BlockChain & Chain,const BlockFilterSet * BlockFilter,SmallVector<MachineBasicBlock *,4> & Successors)425*9880d681SAndroid Build Coastguard Worker BranchProbability MachineBlockPlacement::collectViableSuccessors(
426*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter,
427*9880d681SAndroid Build Coastguard Worker     SmallVector<MachineBasicBlock *, 4> &Successors) {
428*9880d681SAndroid Build Coastguard Worker   // Adjust edge probabilities by excluding edges pointing to blocks that is
429*9880d681SAndroid Build Coastguard Worker   // either not in BlockFilter or is already in the current chain. Consider the
430*9880d681SAndroid Build Coastguard Worker   // following CFG:
431*9880d681SAndroid Build Coastguard Worker   //
432*9880d681SAndroid Build Coastguard Worker   //     --->A
433*9880d681SAndroid Build Coastguard Worker   //     |  / \
434*9880d681SAndroid Build Coastguard Worker   //     | B   C
435*9880d681SAndroid Build Coastguard Worker   //     |  \ / \
436*9880d681SAndroid Build Coastguard Worker   //     ----D   E
437*9880d681SAndroid Build Coastguard Worker   //
438*9880d681SAndroid Build Coastguard Worker   // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after
439*9880d681SAndroid Build Coastguard Worker   // A->C is chosen as a fall-through, D won't be selected as a successor of C
440*9880d681SAndroid Build Coastguard Worker   // due to CFG constraint (the probability of C->D is not greater than
441*9880d681SAndroid Build Coastguard Worker   // HotProb to break top-oorder). If we exclude E that is not in BlockFilter
442*9880d681SAndroid Build Coastguard Worker   // when calculating the  probability of C->D, D will be selected and we
443*9880d681SAndroid Build Coastguard Worker   // will get A C D B as the layout of this loop.
444*9880d681SAndroid Build Coastguard Worker   auto AdjustedSumProb = BranchProbability::getOne();
445*9880d681SAndroid Build Coastguard Worker   for (MachineBasicBlock *Succ : BB->successors()) {
446*9880d681SAndroid Build Coastguard Worker     bool SkipSucc = false;
447*9880d681SAndroid Build Coastguard Worker     if (Succ->isEHPad() || (BlockFilter && !BlockFilter->count(Succ))) {
448*9880d681SAndroid Build Coastguard Worker       SkipSucc = true;
449*9880d681SAndroid Build Coastguard Worker     } else {
450*9880d681SAndroid Build Coastguard Worker       BlockChain *SuccChain = BlockToChain[Succ];
451*9880d681SAndroid Build Coastguard Worker       if (SuccChain == &Chain) {
452*9880d681SAndroid Build Coastguard Worker         SkipSucc = true;
453*9880d681SAndroid Build Coastguard Worker       } else if (Succ != *SuccChain->begin()) {
454*9880d681SAndroid Build Coastguard Worker         DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> Mid chain!\n");
455*9880d681SAndroid Build Coastguard Worker         continue;
456*9880d681SAndroid Build Coastguard Worker       }
457*9880d681SAndroid Build Coastguard Worker     }
458*9880d681SAndroid Build Coastguard Worker     if (SkipSucc)
459*9880d681SAndroid Build Coastguard Worker       AdjustedSumProb -= MBPI->getEdgeProbability(BB, Succ);
460*9880d681SAndroid Build Coastguard Worker     else
461*9880d681SAndroid Build Coastguard Worker       Successors.push_back(Succ);
462*9880d681SAndroid Build Coastguard Worker   }
463*9880d681SAndroid Build Coastguard Worker 
464*9880d681SAndroid Build Coastguard Worker   return AdjustedSumProb;
465*9880d681SAndroid Build Coastguard Worker }
466*9880d681SAndroid Build Coastguard Worker 
467*9880d681SAndroid Build Coastguard Worker /// The helper function returns the branch probability that is adjusted
468*9880d681SAndroid Build Coastguard Worker /// or normalized over the new total \p AdjustedSumProb.
469*9880d681SAndroid Build Coastguard Worker 
470*9880d681SAndroid Build Coastguard Worker static BranchProbability
getAdjustedProbability(BranchProbability OrigProb,BranchProbability AdjustedSumProb)471*9880d681SAndroid Build Coastguard Worker getAdjustedProbability(BranchProbability OrigProb,
472*9880d681SAndroid Build Coastguard Worker                        BranchProbability AdjustedSumProb) {
473*9880d681SAndroid Build Coastguard Worker   BranchProbability SuccProb;
474*9880d681SAndroid Build Coastguard Worker   uint32_t SuccProbN = OrigProb.getNumerator();
475*9880d681SAndroid Build Coastguard Worker   uint32_t SuccProbD = AdjustedSumProb.getNumerator();
476*9880d681SAndroid Build Coastguard Worker   if (SuccProbN >= SuccProbD)
477*9880d681SAndroid Build Coastguard Worker     SuccProb = BranchProbability::getOne();
478*9880d681SAndroid Build Coastguard Worker   else
479*9880d681SAndroid Build Coastguard Worker     SuccProb = BranchProbability(SuccProbN, SuccProbD);
480*9880d681SAndroid Build Coastguard Worker 
481*9880d681SAndroid Build Coastguard Worker   return SuccProb;
482*9880d681SAndroid Build Coastguard Worker }
483*9880d681SAndroid Build Coastguard Worker 
484*9880d681SAndroid Build Coastguard Worker /// When the option OutlineOptionalBranches is on, this method
485*9880d681SAndroid Build Coastguard Worker /// checks if the fallthrough candidate block \p Succ (of block
486*9880d681SAndroid Build Coastguard Worker /// \p BB) also has other unscheduled predecessor blocks which
487*9880d681SAndroid Build Coastguard Worker /// are also successors of \p BB (forming triagular shape CFG).
488*9880d681SAndroid Build Coastguard Worker /// If none of such predecessors are small, it returns true.
489*9880d681SAndroid Build Coastguard Worker /// The caller can choose to select \p Succ as the layout successors
490*9880d681SAndroid Build Coastguard Worker /// so that \p Succ's predecessors (optional branches) can be
491*9880d681SAndroid Build Coastguard Worker /// outlined.
492*9880d681SAndroid Build Coastguard Worker /// FIXME: fold this with more general layout cost analysis.
shouldPredBlockBeOutlined(MachineBasicBlock * BB,MachineBasicBlock * Succ,BlockChain & Chain,const BlockFilterSet * BlockFilter,BranchProbability SuccProb,BranchProbability HotProb)493*9880d681SAndroid Build Coastguard Worker bool MachineBlockPlacement::shouldPredBlockBeOutlined(
494*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &Chain,
495*9880d681SAndroid Build Coastguard Worker     const BlockFilterSet *BlockFilter, BranchProbability SuccProb,
496*9880d681SAndroid Build Coastguard Worker     BranchProbability HotProb) {
497*9880d681SAndroid Build Coastguard Worker   if (!OutlineOptionalBranches)
498*9880d681SAndroid Build Coastguard Worker     return false;
499*9880d681SAndroid Build Coastguard Worker   // If we outline optional branches, look whether Succ is unavoidable, i.e.
500*9880d681SAndroid Build Coastguard Worker   // dominates all terminators of the MachineFunction. If it does, other
501*9880d681SAndroid Build Coastguard Worker   // successors must be optional. Don't do this for cold branches.
502*9880d681SAndroid Build Coastguard Worker   if (SuccProb > HotProb.getCompl() && UnavoidableBlocks.count(Succ) > 0) {
503*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock *Pred : Succ->predecessors()) {
504*9880d681SAndroid Build Coastguard Worker       // Check whether there is an unplaced optional branch.
505*9880d681SAndroid Build Coastguard Worker       if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
506*9880d681SAndroid Build Coastguard Worker           BlockToChain[Pred] == &Chain)
507*9880d681SAndroid Build Coastguard Worker         continue;
508*9880d681SAndroid Build Coastguard Worker       // Check whether the optional branch has exactly one BB.
509*9880d681SAndroid Build Coastguard Worker       if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB)
510*9880d681SAndroid Build Coastguard Worker         continue;
511*9880d681SAndroid Build Coastguard Worker       // Check whether the optional branch is small.
512*9880d681SAndroid Build Coastguard Worker       if (Pred->size() < OutlineOptionalThreshold)
513*9880d681SAndroid Build Coastguard Worker         return false;
514*9880d681SAndroid Build Coastguard Worker     }
515*9880d681SAndroid Build Coastguard Worker     return true;
516*9880d681SAndroid Build Coastguard Worker   } else
517*9880d681SAndroid Build Coastguard Worker     return false;
518*9880d681SAndroid Build Coastguard Worker }
519*9880d681SAndroid Build Coastguard Worker 
520*9880d681SAndroid Build Coastguard Worker // When profile is not present, return the StaticLikelyProb.
521*9880d681SAndroid Build Coastguard Worker // When profile is available, we need to handle the triangle-shape CFG.
getLayoutSuccessorProbThreshold(MachineBasicBlock * BB)522*9880d681SAndroid Build Coastguard Worker static BranchProbability getLayoutSuccessorProbThreshold(
523*9880d681SAndroid Build Coastguard Worker       MachineBasicBlock *BB) {
524*9880d681SAndroid Build Coastguard Worker   if (!BB->getParent()->getFunction()->getEntryCount())
525*9880d681SAndroid Build Coastguard Worker     return BranchProbability(StaticLikelyProb, 100);
526*9880d681SAndroid Build Coastguard Worker   if (BB->succ_size() == 2) {
527*9880d681SAndroid Build Coastguard Worker     const MachineBasicBlock *Succ1 = *BB->succ_begin();
528*9880d681SAndroid Build Coastguard Worker     const MachineBasicBlock *Succ2 = *(BB->succ_begin() + 1);
529*9880d681SAndroid Build Coastguard Worker     if (Succ1->isSuccessor(Succ2) || Succ2->isSuccessor(Succ1)) {
530*9880d681SAndroid Build Coastguard Worker       /* See case 1 below for the cost analysis. For BB->Succ to
531*9880d681SAndroid Build Coastguard Worker        * be taken with smaller cost, the following needs to hold:
532*9880d681SAndroid Build Coastguard Worker        *   Prob(BB->Succ) > 2* Prob(BB->Pred)
533*9880d681SAndroid Build Coastguard Worker        *   So the threshold T
534*9880d681SAndroid Build Coastguard Worker        *   T = 2 * (1-Prob(BB->Pred). Since T + Prob(BB->Pred) == 1,
535*9880d681SAndroid Build Coastguard Worker        * We have  T + T/2 = 1, i.e. T = 2/3. Also adding user specified
536*9880d681SAndroid Build Coastguard Worker        * branch bias, we have
537*9880d681SAndroid Build Coastguard Worker        *   T = (2/3)*(ProfileLikelyProb/50)
538*9880d681SAndroid Build Coastguard Worker        *     = (2*ProfileLikelyProb)/150)
539*9880d681SAndroid Build Coastguard Worker        */
540*9880d681SAndroid Build Coastguard Worker       return BranchProbability(2 * ProfileLikelyProb, 150);
541*9880d681SAndroid Build Coastguard Worker     }
542*9880d681SAndroid Build Coastguard Worker   }
543*9880d681SAndroid Build Coastguard Worker   return BranchProbability(ProfileLikelyProb, 100);
544*9880d681SAndroid Build Coastguard Worker }
545*9880d681SAndroid Build Coastguard Worker 
546*9880d681SAndroid Build Coastguard Worker /// Checks to see if the layout candidate block \p Succ has a better layout
547*9880d681SAndroid Build Coastguard Worker /// predecessor than \c BB. If yes, returns true.
hasBetterLayoutPredecessor(MachineBasicBlock * BB,MachineBasicBlock * Succ,BlockChain & SuccChain,BranchProbability SuccProb,BranchProbability RealSuccProb,BlockChain & Chain,const BlockFilterSet * BlockFilter)548*9880d681SAndroid Build Coastguard Worker bool MachineBlockPlacement::hasBetterLayoutPredecessor(
549*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &SuccChain,
550*9880d681SAndroid Build Coastguard Worker     BranchProbability SuccProb, BranchProbability RealSuccProb,
551*9880d681SAndroid Build Coastguard Worker     BlockChain &Chain, const BlockFilterSet *BlockFilter) {
552*9880d681SAndroid Build Coastguard Worker 
553*9880d681SAndroid Build Coastguard Worker   // This is no global conflict, just return false.
554*9880d681SAndroid Build Coastguard Worker   if (SuccChain.UnscheduledPredecessors == 0)
555*9880d681SAndroid Build Coastguard Worker     return false;
556*9880d681SAndroid Build Coastguard Worker 
557*9880d681SAndroid Build Coastguard Worker   // There are two basic scenarios here:
558*9880d681SAndroid Build Coastguard Worker   // -------------------------------------
559*9880d681SAndroid Build Coastguard Worker   // Case 1: triagular shape CFG:
560*9880d681SAndroid Build Coastguard Worker   //     BB
561*9880d681SAndroid Build Coastguard Worker   //     | \
562*9880d681SAndroid Build Coastguard Worker   //     |  \
563*9880d681SAndroid Build Coastguard Worker   //     |   Pred
564*9880d681SAndroid Build Coastguard Worker   //     |   /
565*9880d681SAndroid Build Coastguard Worker   //     Succ
566*9880d681SAndroid Build Coastguard Worker   // In this case, we are evaluating whether to select edge -> Succ, e.g.
567*9880d681SAndroid Build Coastguard Worker   // set Succ as the layout successor of BB. Picking Succ as BB's
568*9880d681SAndroid Build Coastguard Worker   // successor breaks the  CFG constraints. With this layout, Pred BB
569*9880d681SAndroid Build Coastguard Worker   // is forced to be outlined, so the overall cost will be cost of the
570*9880d681SAndroid Build Coastguard Worker   // branch taken from BB to Pred, plus the cost of back taken branch
571*9880d681SAndroid Build Coastguard Worker   // from Pred to Succ, as well as the additional cost asssociated
572*9880d681SAndroid Build Coastguard Worker   // with the needed unconditional jump instruction from Pred To Succ.
573*9880d681SAndroid Build Coastguard Worker   // The cost of the topological order layout is the taken branch cost
574*9880d681SAndroid Build Coastguard Worker   // from BB to Succ, so to make BB->Succ a viable candidate, the following
575*9880d681SAndroid Build Coastguard Worker   // must hold:
576*9880d681SAndroid Build Coastguard Worker   //     2 * freq(BB->Pred) * taken_branch_cost + unconditional_jump_cost
577*9880d681SAndroid Build Coastguard Worker   //      < freq(BB->Succ) *  taken_branch_cost.
578*9880d681SAndroid Build Coastguard Worker   // Ignoring unconditional jump cost, we get
579*9880d681SAndroid Build Coastguard Worker   //    freq(BB->Succ) > 2 * freq(BB->Pred), i.e.,
580*9880d681SAndroid Build Coastguard Worker   //    prob(BB->Succ) > 2 * prob(BB->Pred)
581*9880d681SAndroid Build Coastguard Worker   //
582*9880d681SAndroid Build Coastguard Worker   // When real profile data is available, we can precisely compute the the
583*9880d681SAndroid Build Coastguard Worker   // probabililty threshold that is needed for edge BB->Succ to be considered.
584*9880d681SAndroid Build Coastguard Worker   // With out profile data, the heuristic requires the branch bias to be
585*9880d681SAndroid Build Coastguard Worker   // a lot larger to make sure the signal is very strong (e.g. 80% default).
586*9880d681SAndroid Build Coastguard Worker   // -----------------------------------------------------------------
587*9880d681SAndroid Build Coastguard Worker   // Case 2: diamond like CFG:
588*9880d681SAndroid Build Coastguard Worker   //     S
589*9880d681SAndroid Build Coastguard Worker   //    / \
590*9880d681SAndroid Build Coastguard Worker   //   |   \
591*9880d681SAndroid Build Coastguard Worker   //  BB    Pred
592*9880d681SAndroid Build Coastguard Worker   //   \    /
593*9880d681SAndroid Build Coastguard Worker   //    Succ
594*9880d681SAndroid Build Coastguard Worker   //    ..
595*9880d681SAndroid Build Coastguard Worker   // In this case, edge S->BB has already been selected, and we are evaluating
596*9880d681SAndroid Build Coastguard Worker   // candidate edge BB->Succ. Edge S->BB is selected because prob(S->BB)
597*9880d681SAndroid Build Coastguard Worker   // is no less than prob(S->Pred). When real profile data is *available*, if
598*9880d681SAndroid Build Coastguard Worker   // the condition is true, it will be always better to continue the trace with
599*9880d681SAndroid Build Coastguard Worker   // edge BB->Succ instead of laying out with topological order (i.e. laying
600*9880d681SAndroid Build Coastguard Worker   // Pred first).  The cost of S->BB->Succ is 2 * freq (S->Pred), while with
601*9880d681SAndroid Build Coastguard Worker   // the topo order, the cost is freq(S-> Pred) + Pred(S->BB) which is larger.
602*9880d681SAndroid Build Coastguard Worker   // When profile data is not available, however, we need to be more
603*9880d681SAndroid Build Coastguard Worker   // conservative. If the branch prediction is wrong, breaking the topo-order
604*9880d681SAndroid Build Coastguard Worker   // will actually yield a layout with large cost. For this reason, we need
605*9880d681SAndroid Build Coastguard Worker   // strong biaaed branch at block S with Prob(S->BB) in order to select
606*9880d681SAndroid Build Coastguard Worker   // BB->Succ. This is equialant to looking the CFG backward with backward
607*9880d681SAndroid Build Coastguard Worker   // edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without
608*9880d681SAndroid Build Coastguard Worker   // profile data).
609*9880d681SAndroid Build Coastguard Worker 
610*9880d681SAndroid Build Coastguard Worker   BranchProbability HotProb = getLayoutSuccessorProbThreshold(BB);
611*9880d681SAndroid Build Coastguard Worker 
612*9880d681SAndroid Build Coastguard Worker   // Forward checking. For case 2, SuccProb will be 1.
613*9880d681SAndroid Build Coastguard Worker   if (SuccProb < HotProb) {
614*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> " << SuccProb
615*9880d681SAndroid Build Coastguard Worker                  << " (prob) (CFG conflict)\n");
616*9880d681SAndroid Build Coastguard Worker     return true;
617*9880d681SAndroid Build Coastguard Worker   }
618*9880d681SAndroid Build Coastguard Worker 
619*9880d681SAndroid Build Coastguard Worker   // Make sure that a hot successor doesn't have a globally more
620*9880d681SAndroid Build Coastguard Worker   // important predecessor.
621*9880d681SAndroid Build Coastguard Worker   BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
622*9880d681SAndroid Build Coastguard Worker   bool BadCFGConflict = false;
623*9880d681SAndroid Build Coastguard Worker 
624*9880d681SAndroid Build Coastguard Worker   for (MachineBasicBlock *Pred : Succ->predecessors()) {
625*9880d681SAndroid Build Coastguard Worker     if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
626*9880d681SAndroid Build Coastguard Worker         (BlockFilter && !BlockFilter->count(Pred)) ||
627*9880d681SAndroid Build Coastguard Worker         BlockToChain[Pred] == &Chain)
628*9880d681SAndroid Build Coastguard Worker       continue;
629*9880d681SAndroid Build Coastguard Worker     // Do backward checking. For case 1, it is actually redundant check. For
630*9880d681SAndroid Build Coastguard Worker     // case 2 above, we need a backward checking to filter out edges that are
631*9880d681SAndroid Build Coastguard Worker     // not 'strongly' biased. With profile data available, the check is mostly
632*9880d681SAndroid Build Coastguard Worker     // redundant too (when threshold prob is set at 50%) unless S has more than
633*9880d681SAndroid Build Coastguard Worker     // two successors.
634*9880d681SAndroid Build Coastguard Worker     // BB  Pred
635*9880d681SAndroid Build Coastguard Worker     //  \ /
636*9880d681SAndroid Build Coastguard Worker     //  Succ
637*9880d681SAndroid Build Coastguard Worker     // We select edgee BB->Succ if
638*9880d681SAndroid Build Coastguard Worker     //      freq(BB->Succ) > freq(Succ) * HotProb
639*9880d681SAndroid Build Coastguard Worker     //      i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) *
640*9880d681SAndroid Build Coastguard Worker     //      HotProb
641*9880d681SAndroid Build Coastguard Worker     //      i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb
642*9880d681SAndroid Build Coastguard Worker     BlockFrequency PredEdgeFreq =
643*9880d681SAndroid Build Coastguard Worker         MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
644*9880d681SAndroid Build Coastguard Worker     if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {
645*9880d681SAndroid Build Coastguard Worker       BadCFGConflict = true;
646*9880d681SAndroid Build Coastguard Worker       break;
647*9880d681SAndroid Build Coastguard Worker     }
648*9880d681SAndroid Build Coastguard Worker   }
649*9880d681SAndroid Build Coastguard Worker 
650*9880d681SAndroid Build Coastguard Worker   if (BadCFGConflict) {
651*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> " << SuccProb
652*9880d681SAndroid Build Coastguard Worker                  << " (prob) (non-cold CFG conflict)\n");
653*9880d681SAndroid Build Coastguard Worker     return true;
654*9880d681SAndroid Build Coastguard Worker   }
655*9880d681SAndroid Build Coastguard Worker 
656*9880d681SAndroid Build Coastguard Worker   return false;
657*9880d681SAndroid Build Coastguard Worker }
658*9880d681SAndroid Build Coastguard Worker 
659*9880d681SAndroid Build Coastguard Worker /// \brief Select the best successor for a block.
660*9880d681SAndroid Build Coastguard Worker ///
661*9880d681SAndroid Build Coastguard Worker /// This looks across all successors of a particular block and attempts to
662*9880d681SAndroid Build Coastguard Worker /// select the "best" one to be the layout successor. It only considers direct
663*9880d681SAndroid Build Coastguard Worker /// successors which also pass the block filter. It will attempt to avoid
664*9880d681SAndroid Build Coastguard Worker /// breaking CFG structure, but cave and break such structures in the case of
665*9880d681SAndroid Build Coastguard Worker /// very hot successor edges.
666*9880d681SAndroid Build Coastguard Worker ///
667*9880d681SAndroid Build Coastguard Worker /// \returns The best successor block found, or null if none are viable.
668*9880d681SAndroid Build Coastguard Worker MachineBasicBlock *
selectBestSuccessor(MachineBasicBlock * BB,BlockChain & Chain,const BlockFilterSet * BlockFilter)669*9880d681SAndroid Build Coastguard Worker MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
670*9880d681SAndroid Build Coastguard Worker                                            BlockChain &Chain,
671*9880d681SAndroid Build Coastguard Worker                                            const BlockFilterSet *BlockFilter) {
672*9880d681SAndroid Build Coastguard Worker   const BranchProbability HotProb(StaticLikelyProb, 100);
673*9880d681SAndroid Build Coastguard Worker 
674*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *BestSucc = nullptr;
675*9880d681SAndroid Build Coastguard Worker   auto BestProb = BranchProbability::getZero();
676*9880d681SAndroid Build Coastguard Worker 
677*9880d681SAndroid Build Coastguard Worker   SmallVector<MachineBasicBlock *, 4> Successors;
678*9880d681SAndroid Build Coastguard Worker   auto AdjustedSumProb =
679*9880d681SAndroid Build Coastguard Worker       collectViableSuccessors(BB, Chain, BlockFilter, Successors);
680*9880d681SAndroid Build Coastguard Worker 
681*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
682*9880d681SAndroid Build Coastguard Worker   for (MachineBasicBlock *Succ : Successors) {
683*9880d681SAndroid Build Coastguard Worker     auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
684*9880d681SAndroid Build Coastguard Worker     BranchProbability SuccProb =
685*9880d681SAndroid Build Coastguard Worker         getAdjustedProbability(RealSuccProb, AdjustedSumProb);
686*9880d681SAndroid Build Coastguard Worker 
687*9880d681SAndroid Build Coastguard Worker     // This heuristic is off by default.
688*9880d681SAndroid Build Coastguard Worker     if (shouldPredBlockBeOutlined(BB, Succ, Chain, BlockFilter, SuccProb,
689*9880d681SAndroid Build Coastguard Worker                                   HotProb))
690*9880d681SAndroid Build Coastguard Worker       return Succ;
691*9880d681SAndroid Build Coastguard Worker 
692*9880d681SAndroid Build Coastguard Worker     BlockChain &SuccChain = *BlockToChain[Succ];
693*9880d681SAndroid Build Coastguard Worker     // Skip the edge \c BB->Succ if block \c Succ has a better layout
694*9880d681SAndroid Build Coastguard Worker     // predecessor that yields lower global cost.
695*9880d681SAndroid Build Coastguard Worker     if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
696*9880d681SAndroid Build Coastguard Worker                                    Chain, BlockFilter))
697*9880d681SAndroid Build Coastguard Worker       continue;
698*9880d681SAndroid Build Coastguard Worker 
699*9880d681SAndroid Build Coastguard Worker     DEBUG(
700*9880d681SAndroid Build Coastguard Worker         dbgs() << "    " << getBlockName(Succ) << " -> " << SuccProb
701*9880d681SAndroid Build Coastguard Worker                << " (prob)"
702*9880d681SAndroid Build Coastguard Worker                << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")
703*9880d681SAndroid Build Coastguard Worker                << "\n");
704*9880d681SAndroid Build Coastguard Worker     if (BestSucc && BestProb >= SuccProb)
705*9880d681SAndroid Build Coastguard Worker       continue;
706*9880d681SAndroid Build Coastguard Worker     BestSucc = Succ;
707*9880d681SAndroid Build Coastguard Worker     BestProb = SuccProb;
708*9880d681SAndroid Build Coastguard Worker   }
709*9880d681SAndroid Build Coastguard Worker   return BestSucc;
710*9880d681SAndroid Build Coastguard Worker }
711*9880d681SAndroid Build Coastguard Worker 
712*9880d681SAndroid Build Coastguard Worker /// \brief Select the best block from a worklist.
713*9880d681SAndroid Build Coastguard Worker ///
714*9880d681SAndroid Build Coastguard Worker /// This looks through the provided worklist as a list of candidate basic
715*9880d681SAndroid Build Coastguard Worker /// blocks and select the most profitable one to place. The definition of
716*9880d681SAndroid Build Coastguard Worker /// profitable only really makes sense in the context of a loop. This returns
717*9880d681SAndroid Build Coastguard Worker /// the most frequently visited block in the worklist, which in the case of
718*9880d681SAndroid Build Coastguard Worker /// a loop, is the one most desirable to be physically close to the rest of the
719*9880d681SAndroid Build Coastguard Worker /// loop body in order to improve icache behavior.
720*9880d681SAndroid Build Coastguard Worker ///
721*9880d681SAndroid Build Coastguard Worker /// \returns The best block found, or null if none are viable.
selectBestCandidateBlock(BlockChain & Chain,SmallVectorImpl<MachineBasicBlock * > & WorkList)722*9880d681SAndroid Build Coastguard Worker MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
723*9880d681SAndroid Build Coastguard Worker     BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {
724*9880d681SAndroid Build Coastguard Worker   // Once we need to walk the worklist looking for a candidate, cleanup the
725*9880d681SAndroid Build Coastguard Worker   // worklist of already placed entries.
726*9880d681SAndroid Build Coastguard Worker   // FIXME: If this shows up on profiles, it could be folded (at the cost of
727*9880d681SAndroid Build Coastguard Worker   // some code complexity) into the loop below.
728*9880d681SAndroid Build Coastguard Worker   WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
729*9880d681SAndroid Build Coastguard Worker                                 [&](MachineBasicBlock *BB) {
730*9880d681SAndroid Build Coastguard Worker                                   return BlockToChain.lookup(BB) == &Chain;
731*9880d681SAndroid Build Coastguard Worker                                 }),
732*9880d681SAndroid Build Coastguard Worker                  WorkList.end());
733*9880d681SAndroid Build Coastguard Worker 
734*9880d681SAndroid Build Coastguard Worker   if (WorkList.empty())
735*9880d681SAndroid Build Coastguard Worker     return nullptr;
736*9880d681SAndroid Build Coastguard Worker 
737*9880d681SAndroid Build Coastguard Worker   bool IsEHPad = WorkList[0]->isEHPad();
738*9880d681SAndroid Build Coastguard Worker 
739*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *BestBlock = nullptr;
740*9880d681SAndroid Build Coastguard Worker   BlockFrequency BestFreq;
741*9880d681SAndroid Build Coastguard Worker   for (MachineBasicBlock *MBB : WorkList) {
742*9880d681SAndroid Build Coastguard Worker     assert(MBB->isEHPad() == IsEHPad);
743*9880d681SAndroid Build Coastguard Worker 
744*9880d681SAndroid Build Coastguard Worker     BlockChain &SuccChain = *BlockToChain[MBB];
745*9880d681SAndroid Build Coastguard Worker     if (&SuccChain == &Chain)
746*9880d681SAndroid Build Coastguard Worker       continue;
747*9880d681SAndroid Build Coastguard Worker 
748*9880d681SAndroid Build Coastguard Worker     assert(SuccChain.UnscheduledPredecessors == 0 && "Found CFG-violating block");
749*9880d681SAndroid Build Coastguard Worker 
750*9880d681SAndroid Build Coastguard Worker     BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB);
751*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "    " << getBlockName(MBB) << " -> ";
752*9880d681SAndroid Build Coastguard Worker           MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n");
753*9880d681SAndroid Build Coastguard Worker 
754*9880d681SAndroid Build Coastguard Worker     // For ehpad, we layout the least probable first as to avoid jumping back
755*9880d681SAndroid Build Coastguard Worker     // from least probable landingpads to more probable ones.
756*9880d681SAndroid Build Coastguard Worker     //
757*9880d681SAndroid Build Coastguard Worker     // FIXME: Using probability is probably (!) not the best way to achieve
758*9880d681SAndroid Build Coastguard Worker     // this. We should probably have a more principled approach to layout
759*9880d681SAndroid Build Coastguard Worker     // cleanup code.
760*9880d681SAndroid Build Coastguard Worker     //
761*9880d681SAndroid Build Coastguard Worker     // The goal is to get:
762*9880d681SAndroid Build Coastguard Worker     //
763*9880d681SAndroid Build Coastguard Worker     //                 +--------------------------+
764*9880d681SAndroid Build Coastguard Worker     //                 |                          V
765*9880d681SAndroid Build Coastguard Worker     // InnerLp -> InnerCleanup    OuterLp -> OuterCleanup -> Resume
766*9880d681SAndroid Build Coastguard Worker     //
767*9880d681SAndroid Build Coastguard Worker     // Rather than:
768*9880d681SAndroid Build Coastguard Worker     //
769*9880d681SAndroid Build Coastguard Worker     //                 +-------------------------------------+
770*9880d681SAndroid Build Coastguard Worker     //                 V                                     |
771*9880d681SAndroid Build Coastguard Worker     // OuterLp -> OuterCleanup -> Resume     InnerLp -> InnerCleanup
772*9880d681SAndroid Build Coastguard Worker     if (BestBlock && (IsEHPad ^ (BestFreq >= CandidateFreq)))
773*9880d681SAndroid Build Coastguard Worker       continue;
774*9880d681SAndroid Build Coastguard Worker 
775*9880d681SAndroid Build Coastguard Worker     BestBlock = MBB;
776*9880d681SAndroid Build Coastguard Worker     BestFreq = CandidateFreq;
777*9880d681SAndroid Build Coastguard Worker   }
778*9880d681SAndroid Build Coastguard Worker 
779*9880d681SAndroid Build Coastguard Worker   return BestBlock;
780*9880d681SAndroid Build Coastguard Worker }
781*9880d681SAndroid Build Coastguard Worker 
782*9880d681SAndroid Build Coastguard Worker /// \brief Retrieve the first unplaced basic block.
783*9880d681SAndroid Build Coastguard Worker ///
784*9880d681SAndroid Build Coastguard Worker /// This routine is called when we are unable to use the CFG to walk through
785*9880d681SAndroid Build Coastguard Worker /// all of the basic blocks and form a chain due to unnatural loops in the CFG.
786*9880d681SAndroid Build Coastguard Worker /// We walk through the function's blocks in order, starting from the
787*9880d681SAndroid Build Coastguard Worker /// LastUnplacedBlockIt. We update this iterator on each call to avoid
788*9880d681SAndroid Build Coastguard Worker /// re-scanning the entire sequence on repeated calls to this routine.
getFirstUnplacedBlock(const BlockChain & PlacedChain,MachineFunction::iterator & PrevUnplacedBlockIt,const BlockFilterSet * BlockFilter)789*9880d681SAndroid Build Coastguard Worker MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
790*9880d681SAndroid Build Coastguard Worker     const BlockChain &PlacedChain,
791*9880d681SAndroid Build Coastguard Worker     MachineFunction::iterator &PrevUnplacedBlockIt,
792*9880d681SAndroid Build Coastguard Worker     const BlockFilterSet *BlockFilter) {
793*9880d681SAndroid Build Coastguard Worker   for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F->end(); I != E;
794*9880d681SAndroid Build Coastguard Worker        ++I) {
795*9880d681SAndroid Build Coastguard Worker     if (BlockFilter && !BlockFilter->count(&*I))
796*9880d681SAndroid Build Coastguard Worker       continue;
797*9880d681SAndroid Build Coastguard Worker     if (BlockToChain[&*I] != &PlacedChain) {
798*9880d681SAndroid Build Coastguard Worker       PrevUnplacedBlockIt = I;
799*9880d681SAndroid Build Coastguard Worker       // Now select the head of the chain to which the unplaced block belongs
800*9880d681SAndroid Build Coastguard Worker       // as the block to place. This will force the entire chain to be placed,
801*9880d681SAndroid Build Coastguard Worker       // and satisfies the requirements of merging chains.
802*9880d681SAndroid Build Coastguard Worker       return *BlockToChain[&*I]->begin();
803*9880d681SAndroid Build Coastguard Worker     }
804*9880d681SAndroid Build Coastguard Worker   }
805*9880d681SAndroid Build Coastguard Worker   return nullptr;
806*9880d681SAndroid Build Coastguard Worker }
807*9880d681SAndroid Build Coastguard Worker 
fillWorkLists(MachineBasicBlock * MBB,SmallPtrSetImpl<BlockChain * > & UpdatedPreds,const BlockFilterSet * BlockFilter=nullptr)808*9880d681SAndroid Build Coastguard Worker void MachineBlockPlacement::fillWorkLists(
809*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock *MBB,
810*9880d681SAndroid Build Coastguard Worker     SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
811*9880d681SAndroid Build Coastguard Worker     const BlockFilterSet *BlockFilter = nullptr) {
812*9880d681SAndroid Build Coastguard Worker   BlockChain &Chain = *BlockToChain[MBB];
813*9880d681SAndroid Build Coastguard Worker   if (!UpdatedPreds.insert(&Chain).second)
814*9880d681SAndroid Build Coastguard Worker     return;
815*9880d681SAndroid Build Coastguard Worker 
816*9880d681SAndroid Build Coastguard Worker   assert(Chain.UnscheduledPredecessors == 0);
817*9880d681SAndroid Build Coastguard Worker   for (MachineBasicBlock *ChainBB : Chain) {
818*9880d681SAndroid Build Coastguard Worker     assert(BlockToChain[ChainBB] == &Chain);
819*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock *Pred : ChainBB->predecessors()) {
820*9880d681SAndroid Build Coastguard Worker       if (BlockFilter && !BlockFilter->count(Pred))
821*9880d681SAndroid Build Coastguard Worker         continue;
822*9880d681SAndroid Build Coastguard Worker       if (BlockToChain[Pred] == &Chain)
823*9880d681SAndroid Build Coastguard Worker         continue;
824*9880d681SAndroid Build Coastguard Worker       ++Chain.UnscheduledPredecessors;
825*9880d681SAndroid Build Coastguard Worker     }
826*9880d681SAndroid Build Coastguard Worker   }
827*9880d681SAndroid Build Coastguard Worker 
828*9880d681SAndroid Build Coastguard Worker   if (Chain.UnscheduledPredecessors != 0)
829*9880d681SAndroid Build Coastguard Worker     return;
830*9880d681SAndroid Build Coastguard Worker 
831*9880d681SAndroid Build Coastguard Worker   MBB = *Chain.begin();
832*9880d681SAndroid Build Coastguard Worker   if (MBB->isEHPad())
833*9880d681SAndroid Build Coastguard Worker     EHPadWorkList.push_back(MBB);
834*9880d681SAndroid Build Coastguard Worker   else
835*9880d681SAndroid Build Coastguard Worker     BlockWorkList.push_back(MBB);
836*9880d681SAndroid Build Coastguard Worker }
837*9880d681SAndroid Build Coastguard Worker 
buildChain(MachineBasicBlock * BB,BlockChain & Chain,const BlockFilterSet * BlockFilter)838*9880d681SAndroid Build Coastguard Worker void MachineBlockPlacement::buildChain(
839*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock *BB, BlockChain &Chain,
840*9880d681SAndroid Build Coastguard Worker     const BlockFilterSet *BlockFilter) {
841*9880d681SAndroid Build Coastguard Worker   assert(BB && "BB must not be null.\n");
842*9880d681SAndroid Build Coastguard Worker   assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match.\n");
843*9880d681SAndroid Build Coastguard Worker   MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
844*9880d681SAndroid Build Coastguard Worker 
845*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *LoopHeaderBB = BB;
846*9880d681SAndroid Build Coastguard Worker   markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
847*9880d681SAndroid Build Coastguard Worker   BB = *std::prev(Chain.end());
848*9880d681SAndroid Build Coastguard Worker   for (;;) {
849*9880d681SAndroid Build Coastguard Worker     assert(BB && "null block found at end of chain in loop.");
850*9880d681SAndroid Build Coastguard Worker     assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop.");
851*9880d681SAndroid Build Coastguard Worker     assert(*std::prev(Chain.end()) == BB && "BB Not found at end of chain.");
852*9880d681SAndroid Build Coastguard Worker 
853*9880d681SAndroid Build Coastguard Worker 
854*9880d681SAndroid Build Coastguard Worker     // Look for the best viable successor if there is one to place immediately
855*9880d681SAndroid Build Coastguard Worker     // after this block.
856*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
857*9880d681SAndroid Build Coastguard Worker 
858*9880d681SAndroid Build Coastguard Worker     // If an immediate successor isn't available, look for the best viable
859*9880d681SAndroid Build Coastguard Worker     // block among those we've identified as not violating the loop's CFG at
860*9880d681SAndroid Build Coastguard Worker     // this point. This won't be a fallthrough, but it will increase locality.
861*9880d681SAndroid Build Coastguard Worker     if (!BestSucc)
862*9880d681SAndroid Build Coastguard Worker       BestSucc = selectBestCandidateBlock(Chain, BlockWorkList);
863*9880d681SAndroid Build Coastguard Worker     if (!BestSucc)
864*9880d681SAndroid Build Coastguard Worker       BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
865*9880d681SAndroid Build Coastguard Worker 
866*9880d681SAndroid Build Coastguard Worker     if (!BestSucc) {
867*9880d681SAndroid Build Coastguard Worker       BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter);
868*9880d681SAndroid Build Coastguard Worker       if (!BestSucc)
869*9880d681SAndroid Build Coastguard Worker         break;
870*9880d681SAndroid Build Coastguard Worker 
871*9880d681SAndroid Build Coastguard Worker       DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
872*9880d681SAndroid Build Coastguard Worker                       "layout successor until the CFG reduces\n");
873*9880d681SAndroid Build Coastguard Worker     }
874*9880d681SAndroid Build Coastguard Worker 
875*9880d681SAndroid Build Coastguard Worker     // Place this block, updating the datastructures to reflect its placement.
876*9880d681SAndroid Build Coastguard Worker     BlockChain &SuccChain = *BlockToChain[BestSucc];
877*9880d681SAndroid Build Coastguard Worker     // Zero out UnscheduledPredecessors for the successor we're about to merge in case
878*9880d681SAndroid Build Coastguard Worker     // we selected a successor that didn't fit naturally into the CFG.
879*9880d681SAndroid Build Coastguard Worker     SuccChain.UnscheduledPredecessors = 0;
880*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "Merging from " << getBlockName(BB) << " to "
881*9880d681SAndroid Build Coastguard Worker                  << getBlockName(BestSucc) << "\n");
882*9880d681SAndroid Build Coastguard Worker     markChainSuccessors(SuccChain, LoopHeaderBB, BlockFilter);
883*9880d681SAndroid Build Coastguard Worker     Chain.merge(BestSucc, &SuccChain);
884*9880d681SAndroid Build Coastguard Worker     BB = *std::prev(Chain.end());
885*9880d681SAndroid Build Coastguard Worker   }
886*9880d681SAndroid Build Coastguard Worker 
887*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "Finished forming chain for header block "
888*9880d681SAndroid Build Coastguard Worker                << getBlockName(*Chain.begin()) << "\n");
889*9880d681SAndroid Build Coastguard Worker }
890*9880d681SAndroid Build Coastguard Worker 
891*9880d681SAndroid Build Coastguard Worker /// \brief Find the best loop top block for layout.
892*9880d681SAndroid Build Coastguard Worker ///
893*9880d681SAndroid Build Coastguard Worker /// Look for a block which is strictly better than the loop header for laying
894*9880d681SAndroid Build Coastguard Worker /// out at the top of the loop. This looks for one and only one pattern:
895*9880d681SAndroid Build Coastguard Worker /// a latch block with no conditional exit. This block will cause a conditional
896*9880d681SAndroid Build Coastguard Worker /// jump around it or will be the bottom of the loop if we lay it out in place,
897*9880d681SAndroid Build Coastguard Worker /// but if it it doesn't end up at the bottom of the loop for any reason,
898*9880d681SAndroid Build Coastguard Worker /// rotation alone won't fix it. Because such a block will always result in an
899*9880d681SAndroid Build Coastguard Worker /// unconditional jump (for the backedge) rotating it in front of the loop
900*9880d681SAndroid Build Coastguard Worker /// header is always profitable.
901*9880d681SAndroid Build Coastguard Worker MachineBasicBlock *
findBestLoopTop(MachineLoop & L,const BlockFilterSet & LoopBlockSet)902*9880d681SAndroid Build Coastguard Worker MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
903*9880d681SAndroid Build Coastguard Worker                                        const BlockFilterSet &LoopBlockSet) {
904*9880d681SAndroid Build Coastguard Worker   // Check that the header hasn't been fused with a preheader block due to
905*9880d681SAndroid Build Coastguard Worker   // crazy branches. If it has, we need to start with the header at the top to
906*9880d681SAndroid Build Coastguard Worker   // prevent pulling the preheader into the loop body.
907*9880d681SAndroid Build Coastguard Worker   BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
908*9880d681SAndroid Build Coastguard Worker   if (!LoopBlockSet.count(*HeaderChain.begin()))
909*9880d681SAndroid Build Coastguard Worker     return L.getHeader();
910*9880d681SAndroid Build Coastguard Worker 
911*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(L.getHeader())
912*9880d681SAndroid Build Coastguard Worker                << "\n");
913*9880d681SAndroid Build Coastguard Worker 
914*9880d681SAndroid Build Coastguard Worker   BlockFrequency BestPredFreq;
915*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *BestPred = nullptr;
916*9880d681SAndroid Build Coastguard Worker   for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
917*9880d681SAndroid Build Coastguard Worker     if (!LoopBlockSet.count(Pred))
918*9880d681SAndroid Build Coastguard Worker       continue;
919*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "    header pred: " << getBlockName(Pred) << ", "
920*9880d681SAndroid Build Coastguard Worker                  << Pred->succ_size() << " successors, ";
921*9880d681SAndroid Build Coastguard Worker           MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
922*9880d681SAndroid Build Coastguard Worker     if (Pred->succ_size() > 1)
923*9880d681SAndroid Build Coastguard Worker       continue;
924*9880d681SAndroid Build Coastguard Worker 
925*9880d681SAndroid Build Coastguard Worker     BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
926*9880d681SAndroid Build Coastguard Worker     if (!BestPred || PredFreq > BestPredFreq ||
927*9880d681SAndroid Build Coastguard Worker         (!(PredFreq < BestPredFreq) &&
928*9880d681SAndroid Build Coastguard Worker          Pred->isLayoutSuccessor(L.getHeader()))) {
929*9880d681SAndroid Build Coastguard Worker       BestPred = Pred;
930*9880d681SAndroid Build Coastguard Worker       BestPredFreq = PredFreq;
931*9880d681SAndroid Build Coastguard Worker     }
932*9880d681SAndroid Build Coastguard Worker   }
933*9880d681SAndroid Build Coastguard Worker 
934*9880d681SAndroid Build Coastguard Worker   // If no direct predecessor is fine, just use the loop header.
935*9880d681SAndroid Build Coastguard Worker   if (!BestPred) {
936*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "    final top unchanged\n");
937*9880d681SAndroid Build Coastguard Worker     return L.getHeader();
938*9880d681SAndroid Build Coastguard Worker   }
939*9880d681SAndroid Build Coastguard Worker 
940*9880d681SAndroid Build Coastguard Worker   // Walk backwards through any straight line of predecessors.
941*9880d681SAndroid Build Coastguard Worker   while (BestPred->pred_size() == 1 &&
942*9880d681SAndroid Build Coastguard Worker          (*BestPred->pred_begin())->succ_size() == 1 &&
943*9880d681SAndroid Build Coastguard Worker          *BestPred->pred_begin() != L.getHeader())
944*9880d681SAndroid Build Coastguard Worker     BestPred = *BestPred->pred_begin();
945*9880d681SAndroid Build Coastguard Worker 
946*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "    final top: " << getBlockName(BestPred) << "\n");
947*9880d681SAndroid Build Coastguard Worker   return BestPred;
948*9880d681SAndroid Build Coastguard Worker }
949*9880d681SAndroid Build Coastguard Worker 
950*9880d681SAndroid Build Coastguard Worker /// \brief Find the best loop exiting block for layout.
951*9880d681SAndroid Build Coastguard Worker ///
952*9880d681SAndroid Build Coastguard Worker /// This routine implements the logic to analyze the loop looking for the best
953*9880d681SAndroid Build Coastguard Worker /// block to layout at the top of the loop. Typically this is done to maximize
954*9880d681SAndroid Build Coastguard Worker /// fallthrough opportunities.
955*9880d681SAndroid Build Coastguard Worker MachineBasicBlock *
findBestLoopExit(MachineLoop & L,const BlockFilterSet & LoopBlockSet)956*9880d681SAndroid Build Coastguard Worker MachineBlockPlacement::findBestLoopExit(MachineLoop &L,
957*9880d681SAndroid Build Coastguard Worker                                         const BlockFilterSet &LoopBlockSet) {
958*9880d681SAndroid Build Coastguard Worker   // We don't want to layout the loop linearly in all cases. If the loop header
959*9880d681SAndroid Build Coastguard Worker   // is just a normal basic block in the loop, we want to look for what block
960*9880d681SAndroid Build Coastguard Worker   // within the loop is the best one to layout at the top. However, if the loop
961*9880d681SAndroid Build Coastguard Worker   // header has be pre-merged into a chain due to predecessors not having
962*9880d681SAndroid Build Coastguard Worker   // analyzable branches, *and* the predecessor it is merged with is *not* part
963*9880d681SAndroid Build Coastguard Worker   // of the loop, rotating the header into the middle of the loop will create
964*9880d681SAndroid Build Coastguard Worker   // a non-contiguous range of blocks which is Very Bad. So start with the
965*9880d681SAndroid Build Coastguard Worker   // header and only rotate if safe.
966*9880d681SAndroid Build Coastguard Worker   BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
967*9880d681SAndroid Build Coastguard Worker   if (!LoopBlockSet.count(*HeaderChain.begin()))
968*9880d681SAndroid Build Coastguard Worker     return nullptr;
969*9880d681SAndroid Build Coastguard Worker 
970*9880d681SAndroid Build Coastguard Worker   BlockFrequency BestExitEdgeFreq;
971*9880d681SAndroid Build Coastguard Worker   unsigned BestExitLoopDepth = 0;
972*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *ExitingBB = nullptr;
973*9880d681SAndroid Build Coastguard Worker   // If there are exits to outer loops, loop rotation can severely limit
974*9880d681SAndroid Build Coastguard Worker   // fallthrough opportunites unless it selects such an exit. Keep a set of
975*9880d681SAndroid Build Coastguard Worker   // blocks where rotating to exit with that block will reach an outer loop.
976*9880d681SAndroid Build Coastguard Worker   SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
977*9880d681SAndroid Build Coastguard Worker 
978*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "Finding best loop exit for: " << getBlockName(L.getHeader())
979*9880d681SAndroid Build Coastguard Worker                << "\n");
980*9880d681SAndroid Build Coastguard Worker   for (MachineBasicBlock *MBB : L.getBlocks()) {
981*9880d681SAndroid Build Coastguard Worker     BlockChain &Chain = *BlockToChain[MBB];
982*9880d681SAndroid Build Coastguard Worker     // Ensure that this block is at the end of a chain; otherwise it could be
983*9880d681SAndroid Build Coastguard Worker     // mid-way through an inner loop or a successor of an unanalyzable branch.
984*9880d681SAndroid Build Coastguard Worker     if (MBB != *std::prev(Chain.end()))
985*9880d681SAndroid Build Coastguard Worker       continue;
986*9880d681SAndroid Build Coastguard Worker 
987*9880d681SAndroid Build Coastguard Worker     // Now walk the successors. We need to establish whether this has a viable
988*9880d681SAndroid Build Coastguard Worker     // exiting successor and whether it has a viable non-exiting successor.
989*9880d681SAndroid Build Coastguard Worker     // We store the old exiting state and restore it if a viable looping
990*9880d681SAndroid Build Coastguard Worker     // successor isn't found.
991*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock *OldExitingBB = ExitingBB;
992*9880d681SAndroid Build Coastguard Worker     BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
993*9880d681SAndroid Build Coastguard Worker     bool HasLoopingSucc = false;
994*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock *Succ : MBB->successors()) {
995*9880d681SAndroid Build Coastguard Worker       if (Succ->isEHPad())
996*9880d681SAndroid Build Coastguard Worker         continue;
997*9880d681SAndroid Build Coastguard Worker       if (Succ == MBB)
998*9880d681SAndroid Build Coastguard Worker         continue;
999*9880d681SAndroid Build Coastguard Worker       BlockChain &SuccChain = *BlockToChain[Succ];
1000*9880d681SAndroid Build Coastguard Worker       // Don't split chains, either this chain or the successor's chain.
1001*9880d681SAndroid Build Coastguard Worker       if (&Chain == &SuccChain) {
1002*9880d681SAndroid Build Coastguard Worker         DEBUG(dbgs() << "    exiting: " << getBlockName(MBB) << " -> "
1003*9880d681SAndroid Build Coastguard Worker                      << getBlockName(Succ) << " (chain conflict)\n");
1004*9880d681SAndroid Build Coastguard Worker         continue;
1005*9880d681SAndroid Build Coastguard Worker       }
1006*9880d681SAndroid Build Coastguard Worker 
1007*9880d681SAndroid Build Coastguard Worker       auto SuccProb = MBPI->getEdgeProbability(MBB, Succ);
1008*9880d681SAndroid Build Coastguard Worker       if (LoopBlockSet.count(Succ)) {
1009*9880d681SAndroid Build Coastguard Worker         DEBUG(dbgs() << "    looping: " << getBlockName(MBB) << " -> "
1010*9880d681SAndroid Build Coastguard Worker                      << getBlockName(Succ) << " (" << SuccProb << ")\n");
1011*9880d681SAndroid Build Coastguard Worker         HasLoopingSucc = true;
1012*9880d681SAndroid Build Coastguard Worker         continue;
1013*9880d681SAndroid Build Coastguard Worker       }
1014*9880d681SAndroid Build Coastguard Worker 
1015*9880d681SAndroid Build Coastguard Worker       unsigned SuccLoopDepth = 0;
1016*9880d681SAndroid Build Coastguard Worker       if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) {
1017*9880d681SAndroid Build Coastguard Worker         SuccLoopDepth = ExitLoop->getLoopDepth();
1018*9880d681SAndroid Build Coastguard Worker         if (ExitLoop->contains(&L))
1019*9880d681SAndroid Build Coastguard Worker           BlocksExitingToOuterLoop.insert(MBB);
1020*9880d681SAndroid Build Coastguard Worker       }
1021*9880d681SAndroid Build Coastguard Worker 
1022*9880d681SAndroid Build Coastguard Worker       BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
1023*9880d681SAndroid Build Coastguard Worker       DEBUG(dbgs() << "    exiting: " << getBlockName(MBB) << " -> "
1024*9880d681SAndroid Build Coastguard Worker                    << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] (";
1025*9880d681SAndroid Build Coastguard Worker             MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n");
1026*9880d681SAndroid Build Coastguard Worker       // Note that we bias this toward an existing layout successor to retain
1027*9880d681SAndroid Build Coastguard Worker       // incoming order in the absence of better information. The exit must have
1028*9880d681SAndroid Build Coastguard Worker       // a frequency higher than the current exit before we consider breaking
1029*9880d681SAndroid Build Coastguard Worker       // the layout.
1030*9880d681SAndroid Build Coastguard Worker       BranchProbability Bias(100 - ExitBlockBias, 100);
1031*9880d681SAndroid Build Coastguard Worker       if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth ||
1032*9880d681SAndroid Build Coastguard Worker           ExitEdgeFreq > BestExitEdgeFreq ||
1033*9880d681SAndroid Build Coastguard Worker           (MBB->isLayoutSuccessor(Succ) &&
1034*9880d681SAndroid Build Coastguard Worker            !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) {
1035*9880d681SAndroid Build Coastguard Worker         BestExitEdgeFreq = ExitEdgeFreq;
1036*9880d681SAndroid Build Coastguard Worker         ExitingBB = MBB;
1037*9880d681SAndroid Build Coastguard Worker       }
1038*9880d681SAndroid Build Coastguard Worker     }
1039*9880d681SAndroid Build Coastguard Worker 
1040*9880d681SAndroid Build Coastguard Worker     if (!HasLoopingSucc) {
1041*9880d681SAndroid Build Coastguard Worker       // Restore the old exiting state, no viable looping successor was found.
1042*9880d681SAndroid Build Coastguard Worker       ExitingBB = OldExitingBB;
1043*9880d681SAndroid Build Coastguard Worker       BestExitEdgeFreq = OldBestExitEdgeFreq;
1044*9880d681SAndroid Build Coastguard Worker     }
1045*9880d681SAndroid Build Coastguard Worker   }
1046*9880d681SAndroid Build Coastguard Worker   // Without a candidate exiting block or with only a single block in the
1047*9880d681SAndroid Build Coastguard Worker   // loop, just use the loop header to layout the loop.
1048*9880d681SAndroid Build Coastguard Worker   if (!ExitingBB || L.getNumBlocks() == 1)
1049*9880d681SAndroid Build Coastguard Worker     return nullptr;
1050*9880d681SAndroid Build Coastguard Worker 
1051*9880d681SAndroid Build Coastguard Worker   // Also, if we have exit blocks which lead to outer loops but didn't select
1052*9880d681SAndroid Build Coastguard Worker   // one of them as the exiting block we are rotating toward, disable loop
1053*9880d681SAndroid Build Coastguard Worker   // rotation altogether.
1054*9880d681SAndroid Build Coastguard Worker   if (!BlocksExitingToOuterLoop.empty() &&
1055*9880d681SAndroid Build Coastguard Worker       !BlocksExitingToOuterLoop.count(ExitingBB))
1056*9880d681SAndroid Build Coastguard Worker     return nullptr;
1057*9880d681SAndroid Build Coastguard Worker 
1058*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "  Best exiting block: " << getBlockName(ExitingBB) << "\n");
1059*9880d681SAndroid Build Coastguard Worker   return ExitingBB;
1060*9880d681SAndroid Build Coastguard Worker }
1061*9880d681SAndroid Build Coastguard Worker 
1062*9880d681SAndroid Build Coastguard Worker /// \brief Attempt to rotate an exiting block to the bottom of the loop.
1063*9880d681SAndroid Build Coastguard Worker ///
1064*9880d681SAndroid Build Coastguard Worker /// Once we have built a chain, try to rotate it to line up the hot exit block
1065*9880d681SAndroid Build Coastguard Worker /// with fallthrough out of the loop if doing so doesn't introduce unnecessary
1066*9880d681SAndroid Build Coastguard Worker /// branches. For example, if the loop has fallthrough into its header and out
1067*9880d681SAndroid Build Coastguard Worker /// of its bottom already, don't rotate it.
rotateLoop(BlockChain & LoopChain,MachineBasicBlock * ExitingBB,const BlockFilterSet & LoopBlockSet)1068*9880d681SAndroid Build Coastguard Worker void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
1069*9880d681SAndroid Build Coastguard Worker                                        MachineBasicBlock *ExitingBB,
1070*9880d681SAndroid Build Coastguard Worker                                        const BlockFilterSet &LoopBlockSet) {
1071*9880d681SAndroid Build Coastguard Worker   if (!ExitingBB)
1072*9880d681SAndroid Build Coastguard Worker     return;
1073*9880d681SAndroid Build Coastguard Worker 
1074*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *Top = *LoopChain.begin();
1075*9880d681SAndroid Build Coastguard Worker   bool ViableTopFallthrough = false;
1076*9880d681SAndroid Build Coastguard Worker   for (MachineBasicBlock *Pred : Top->predecessors()) {
1077*9880d681SAndroid Build Coastguard Worker     BlockChain *PredChain = BlockToChain[Pred];
1078*9880d681SAndroid Build Coastguard Worker     if (!LoopBlockSet.count(Pred) &&
1079*9880d681SAndroid Build Coastguard Worker         (!PredChain || Pred == *std::prev(PredChain->end()))) {
1080*9880d681SAndroid Build Coastguard Worker       ViableTopFallthrough = true;
1081*9880d681SAndroid Build Coastguard Worker       break;
1082*9880d681SAndroid Build Coastguard Worker     }
1083*9880d681SAndroid Build Coastguard Worker   }
1084*9880d681SAndroid Build Coastguard Worker 
1085*9880d681SAndroid Build Coastguard Worker   // If the header has viable fallthrough, check whether the current loop
1086*9880d681SAndroid Build Coastguard Worker   // bottom is a viable exiting block. If so, bail out as rotating will
1087*9880d681SAndroid Build Coastguard Worker   // introduce an unnecessary branch.
1088*9880d681SAndroid Build Coastguard Worker   if (ViableTopFallthrough) {
1089*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
1090*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock *Succ : Bottom->successors()) {
1091*9880d681SAndroid Build Coastguard Worker       BlockChain *SuccChain = BlockToChain[Succ];
1092*9880d681SAndroid Build Coastguard Worker       if (!LoopBlockSet.count(Succ) &&
1093*9880d681SAndroid Build Coastguard Worker           (!SuccChain || Succ == *SuccChain->begin()))
1094*9880d681SAndroid Build Coastguard Worker         return;
1095*9880d681SAndroid Build Coastguard Worker     }
1096*9880d681SAndroid Build Coastguard Worker   }
1097*9880d681SAndroid Build Coastguard Worker 
1098*9880d681SAndroid Build Coastguard Worker   BlockChain::iterator ExitIt =
1099*9880d681SAndroid Build Coastguard Worker       std::find(LoopChain.begin(), LoopChain.end(), ExitingBB);
1100*9880d681SAndroid Build Coastguard Worker   if (ExitIt == LoopChain.end())
1101*9880d681SAndroid Build Coastguard Worker     return;
1102*9880d681SAndroid Build Coastguard Worker 
1103*9880d681SAndroid Build Coastguard Worker   std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
1104*9880d681SAndroid Build Coastguard Worker }
1105*9880d681SAndroid Build Coastguard Worker 
1106*9880d681SAndroid Build Coastguard Worker /// \brief Attempt to rotate a loop based on profile data to reduce branch cost.
1107*9880d681SAndroid Build Coastguard Worker ///
1108*9880d681SAndroid Build Coastguard Worker /// With profile data, we can determine the cost in terms of missed fall through
1109*9880d681SAndroid Build Coastguard Worker /// opportunities when rotating a loop chain and select the best rotation.
1110*9880d681SAndroid Build Coastguard Worker /// Basically, there are three kinds of cost to consider for each rotation:
1111*9880d681SAndroid Build Coastguard Worker ///    1. The possibly missed fall through edge (if it exists) from BB out of
1112*9880d681SAndroid Build Coastguard Worker ///    the loop to the loop header.
1113*9880d681SAndroid Build Coastguard Worker ///    2. The possibly missed fall through edges (if they exist) from the loop
1114*9880d681SAndroid Build Coastguard Worker ///    exits to BB out of the loop.
1115*9880d681SAndroid Build Coastguard Worker ///    3. The missed fall through edge (if it exists) from the last BB to the
1116*9880d681SAndroid Build Coastguard Worker ///    first BB in the loop chain.
1117*9880d681SAndroid Build Coastguard Worker ///  Therefore, the cost for a given rotation is the sum of costs listed above.
1118*9880d681SAndroid Build Coastguard Worker ///  We select the best rotation with the smallest cost.
rotateLoopWithProfile(BlockChain & LoopChain,MachineLoop & L,const BlockFilterSet & LoopBlockSet)1119*9880d681SAndroid Build Coastguard Worker void MachineBlockPlacement::rotateLoopWithProfile(
1120*9880d681SAndroid Build Coastguard Worker     BlockChain &LoopChain, MachineLoop &L, const BlockFilterSet &LoopBlockSet) {
1121*9880d681SAndroid Build Coastguard Worker   auto HeaderBB = L.getHeader();
1122*9880d681SAndroid Build Coastguard Worker   auto HeaderIter = std::find(LoopChain.begin(), LoopChain.end(), HeaderBB);
1123*9880d681SAndroid Build Coastguard Worker   auto RotationPos = LoopChain.end();
1124*9880d681SAndroid Build Coastguard Worker 
1125*9880d681SAndroid Build Coastguard Worker   BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
1126*9880d681SAndroid Build Coastguard Worker 
1127*9880d681SAndroid Build Coastguard Worker   // A utility lambda that scales up a block frequency by dividing it by a
1128*9880d681SAndroid Build Coastguard Worker   // branch probability which is the reciprocal of the scale.
1129*9880d681SAndroid Build Coastguard Worker   auto ScaleBlockFrequency = [](BlockFrequency Freq,
1130*9880d681SAndroid Build Coastguard Worker                                 unsigned Scale) -> BlockFrequency {
1131*9880d681SAndroid Build Coastguard Worker     if (Scale == 0)
1132*9880d681SAndroid Build Coastguard Worker       return 0;
1133*9880d681SAndroid Build Coastguard Worker     // Use operator / between BlockFrequency and BranchProbability to implement
1134*9880d681SAndroid Build Coastguard Worker     // saturating multiplication.
1135*9880d681SAndroid Build Coastguard Worker     return Freq / BranchProbability(1, Scale);
1136*9880d681SAndroid Build Coastguard Worker   };
1137*9880d681SAndroid Build Coastguard Worker 
1138*9880d681SAndroid Build Coastguard Worker   // Compute the cost of the missed fall-through edge to the loop header if the
1139*9880d681SAndroid Build Coastguard Worker   // chain head is not the loop header. As we only consider natural loops with
1140*9880d681SAndroid Build Coastguard Worker   // single header, this computation can be done only once.
1141*9880d681SAndroid Build Coastguard Worker   BlockFrequency HeaderFallThroughCost(0);
1142*9880d681SAndroid Build Coastguard Worker   for (auto *Pred : HeaderBB->predecessors()) {
1143*9880d681SAndroid Build Coastguard Worker     BlockChain *PredChain = BlockToChain[Pred];
1144*9880d681SAndroid Build Coastguard Worker     if (!LoopBlockSet.count(Pred) &&
1145*9880d681SAndroid Build Coastguard Worker         (!PredChain || Pred == *std::prev(PredChain->end()))) {
1146*9880d681SAndroid Build Coastguard Worker       auto EdgeFreq =
1147*9880d681SAndroid Build Coastguard Worker           MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, HeaderBB);
1148*9880d681SAndroid Build Coastguard Worker       auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost);
1149*9880d681SAndroid Build Coastguard Worker       // If the predecessor has only an unconditional jump to the header, we
1150*9880d681SAndroid Build Coastguard Worker       // need to consider the cost of this jump.
1151*9880d681SAndroid Build Coastguard Worker       if (Pred->succ_size() == 1)
1152*9880d681SAndroid Build Coastguard Worker         FallThruCost += ScaleBlockFrequency(EdgeFreq, JumpInstCost);
1153*9880d681SAndroid Build Coastguard Worker       HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
1154*9880d681SAndroid Build Coastguard Worker     }
1155*9880d681SAndroid Build Coastguard Worker   }
1156*9880d681SAndroid Build Coastguard Worker 
1157*9880d681SAndroid Build Coastguard Worker   // Here we collect all exit blocks in the loop, and for each exit we find out
1158*9880d681SAndroid Build Coastguard Worker   // its hottest exit edge. For each loop rotation, we define the loop exit cost
1159*9880d681SAndroid Build Coastguard Worker   // as the sum of frequencies of exit edges we collect here, excluding the exit
1160*9880d681SAndroid Build Coastguard Worker   // edge from the tail of the loop chain.
1161*9880d681SAndroid Build Coastguard Worker   SmallVector<std::pair<MachineBasicBlock *, BlockFrequency>, 4> ExitsWithFreq;
1162*9880d681SAndroid Build Coastguard Worker   for (auto BB : LoopChain) {
1163*9880d681SAndroid Build Coastguard Worker     auto LargestExitEdgeProb = BranchProbability::getZero();
1164*9880d681SAndroid Build Coastguard Worker     for (auto *Succ : BB->successors()) {
1165*9880d681SAndroid Build Coastguard Worker       BlockChain *SuccChain = BlockToChain[Succ];
1166*9880d681SAndroid Build Coastguard Worker       if (!LoopBlockSet.count(Succ) &&
1167*9880d681SAndroid Build Coastguard Worker           (!SuccChain || Succ == *SuccChain->begin())) {
1168*9880d681SAndroid Build Coastguard Worker         auto SuccProb = MBPI->getEdgeProbability(BB, Succ);
1169*9880d681SAndroid Build Coastguard Worker         LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
1170*9880d681SAndroid Build Coastguard Worker       }
1171*9880d681SAndroid Build Coastguard Worker     }
1172*9880d681SAndroid Build Coastguard Worker     if (LargestExitEdgeProb > BranchProbability::getZero()) {
1173*9880d681SAndroid Build Coastguard Worker       auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
1174*9880d681SAndroid Build Coastguard Worker       ExitsWithFreq.emplace_back(BB, ExitFreq);
1175*9880d681SAndroid Build Coastguard Worker     }
1176*9880d681SAndroid Build Coastguard Worker   }
1177*9880d681SAndroid Build Coastguard Worker 
1178*9880d681SAndroid Build Coastguard Worker   // In this loop we iterate every block in the loop chain and calculate the
1179*9880d681SAndroid Build Coastguard Worker   // cost assuming the block is the head of the loop chain. When the loop ends,
1180*9880d681SAndroid Build Coastguard Worker   // we should have found the best candidate as the loop chain's head.
1181*9880d681SAndroid Build Coastguard Worker   for (auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
1182*9880d681SAndroid Build Coastguard Worker             EndIter = LoopChain.end();
1183*9880d681SAndroid Build Coastguard Worker        Iter != EndIter; Iter++, TailIter++) {
1184*9880d681SAndroid Build Coastguard Worker     // TailIter is used to track the tail of the loop chain if the block we are
1185*9880d681SAndroid Build Coastguard Worker     // checking (pointed by Iter) is the head of the chain.
1186*9880d681SAndroid Build Coastguard Worker     if (TailIter == LoopChain.end())
1187*9880d681SAndroid Build Coastguard Worker       TailIter = LoopChain.begin();
1188*9880d681SAndroid Build Coastguard Worker 
1189*9880d681SAndroid Build Coastguard Worker     auto TailBB = *TailIter;
1190*9880d681SAndroid Build Coastguard Worker 
1191*9880d681SAndroid Build Coastguard Worker     // Calculate the cost by putting this BB to the top.
1192*9880d681SAndroid Build Coastguard Worker     BlockFrequency Cost = 0;
1193*9880d681SAndroid Build Coastguard Worker 
1194*9880d681SAndroid Build Coastguard Worker     // If the current BB is the loop header, we need to take into account the
1195*9880d681SAndroid Build Coastguard Worker     // cost of the missed fall through edge from outside of the loop to the
1196*9880d681SAndroid Build Coastguard Worker     // header.
1197*9880d681SAndroid Build Coastguard Worker     if (Iter != HeaderIter)
1198*9880d681SAndroid Build Coastguard Worker       Cost += HeaderFallThroughCost;
1199*9880d681SAndroid Build Coastguard Worker 
1200*9880d681SAndroid Build Coastguard Worker     // Collect the loop exit cost by summing up frequencies of all exit edges
1201*9880d681SAndroid Build Coastguard Worker     // except the one from the chain tail.
1202*9880d681SAndroid Build Coastguard Worker     for (auto &ExitWithFreq : ExitsWithFreq)
1203*9880d681SAndroid Build Coastguard Worker       if (TailBB != ExitWithFreq.first)
1204*9880d681SAndroid Build Coastguard Worker         Cost += ExitWithFreq.second;
1205*9880d681SAndroid Build Coastguard Worker 
1206*9880d681SAndroid Build Coastguard Worker     // The cost of breaking the once fall-through edge from the tail to the top
1207*9880d681SAndroid Build Coastguard Worker     // of the loop chain. Here we need to consider three cases:
1208*9880d681SAndroid Build Coastguard Worker     // 1. If the tail node has only one successor, then we will get an
1209*9880d681SAndroid Build Coastguard Worker     //    additional jmp instruction. So the cost here is (MisfetchCost +
1210*9880d681SAndroid Build Coastguard Worker     //    JumpInstCost) * tail node frequency.
1211*9880d681SAndroid Build Coastguard Worker     // 2. If the tail node has two successors, then we may still get an
1212*9880d681SAndroid Build Coastguard Worker     //    additional jmp instruction if the layout successor after the loop
1213*9880d681SAndroid Build Coastguard Worker     //    chain is not its CFG successor. Note that the more frequently executed
1214*9880d681SAndroid Build Coastguard Worker     //    jmp instruction will be put ahead of the other one. Assume the
1215*9880d681SAndroid Build Coastguard Worker     //    frequency of those two branches are x and y, where x is the frequency
1216*9880d681SAndroid Build Coastguard Worker     //    of the edge to the chain head, then the cost will be
1217*9880d681SAndroid Build Coastguard Worker     //    (x * MisfetechCost + min(x, y) * JumpInstCost) * tail node frequency.
1218*9880d681SAndroid Build Coastguard Worker     // 3. If the tail node has more than two successors (this rarely happens),
1219*9880d681SAndroid Build Coastguard Worker     //    we won't consider any additional cost.
1220*9880d681SAndroid Build Coastguard Worker     if (TailBB->isSuccessor(*Iter)) {
1221*9880d681SAndroid Build Coastguard Worker       auto TailBBFreq = MBFI->getBlockFreq(TailBB);
1222*9880d681SAndroid Build Coastguard Worker       if (TailBB->succ_size() == 1)
1223*9880d681SAndroid Build Coastguard Worker         Cost += ScaleBlockFrequency(TailBBFreq.getFrequency(),
1224*9880d681SAndroid Build Coastguard Worker                                     MisfetchCost + JumpInstCost);
1225*9880d681SAndroid Build Coastguard Worker       else if (TailBB->succ_size() == 2) {
1226*9880d681SAndroid Build Coastguard Worker         auto TailToHeadProb = MBPI->getEdgeProbability(TailBB, *Iter);
1227*9880d681SAndroid Build Coastguard Worker         auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
1228*9880d681SAndroid Build Coastguard Worker         auto ColderEdgeFreq = TailToHeadProb > BranchProbability(1, 2)
1229*9880d681SAndroid Build Coastguard Worker                                   ? TailBBFreq * TailToHeadProb.getCompl()
1230*9880d681SAndroid Build Coastguard Worker                                   : TailToHeadFreq;
1231*9880d681SAndroid Build Coastguard Worker         Cost += ScaleBlockFrequency(TailToHeadFreq, MisfetchCost) +
1232*9880d681SAndroid Build Coastguard Worker                 ScaleBlockFrequency(ColderEdgeFreq, JumpInstCost);
1233*9880d681SAndroid Build Coastguard Worker       }
1234*9880d681SAndroid Build Coastguard Worker     }
1235*9880d681SAndroid Build Coastguard Worker 
1236*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "The cost of loop rotation by making " << getBlockName(*Iter)
1237*9880d681SAndroid Build Coastguard Worker                  << " to the top: " << Cost.getFrequency() << "\n");
1238*9880d681SAndroid Build Coastguard Worker 
1239*9880d681SAndroid Build Coastguard Worker     if (Cost < SmallestRotationCost) {
1240*9880d681SAndroid Build Coastguard Worker       SmallestRotationCost = Cost;
1241*9880d681SAndroid Build Coastguard Worker       RotationPos = Iter;
1242*9880d681SAndroid Build Coastguard Worker     }
1243*9880d681SAndroid Build Coastguard Worker   }
1244*9880d681SAndroid Build Coastguard Worker 
1245*9880d681SAndroid Build Coastguard Worker   if (RotationPos != LoopChain.end()) {
1246*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << "Rotate loop by making " << getBlockName(*RotationPos)
1247*9880d681SAndroid Build Coastguard Worker                  << " to the top\n");
1248*9880d681SAndroid Build Coastguard Worker     std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
1249*9880d681SAndroid Build Coastguard Worker   }
1250*9880d681SAndroid Build Coastguard Worker }
1251*9880d681SAndroid Build Coastguard Worker 
1252*9880d681SAndroid Build Coastguard Worker /// \brief Collect blocks in the given loop that are to be placed.
1253*9880d681SAndroid Build Coastguard Worker ///
1254*9880d681SAndroid Build Coastguard Worker /// When profile data is available, exclude cold blocks from the returned set;
1255*9880d681SAndroid Build Coastguard Worker /// otherwise, collect all blocks in the loop.
1256*9880d681SAndroid Build Coastguard Worker MachineBlockPlacement::BlockFilterSet
collectLoopBlockSet(MachineLoop & L)1257*9880d681SAndroid Build Coastguard Worker MachineBlockPlacement::collectLoopBlockSet(MachineLoop &L) {
1258*9880d681SAndroid Build Coastguard Worker   BlockFilterSet LoopBlockSet;
1259*9880d681SAndroid Build Coastguard Worker 
1260*9880d681SAndroid Build Coastguard Worker   // Filter cold blocks off from LoopBlockSet when profile data is available.
1261*9880d681SAndroid Build Coastguard Worker   // Collect the sum of frequencies of incoming edges to the loop header from
1262*9880d681SAndroid Build Coastguard Worker   // outside. If we treat the loop as a super block, this is the frequency of
1263*9880d681SAndroid Build Coastguard Worker   // the loop. Then for each block in the loop, we calculate the ratio between
1264*9880d681SAndroid Build Coastguard Worker   // its frequency and the frequency of the loop block. When it is too small,
1265*9880d681SAndroid Build Coastguard Worker   // don't add it to the loop chain. If there are outer loops, then this block
1266*9880d681SAndroid Build Coastguard Worker   // will be merged into the first outer loop chain for which this block is not
1267*9880d681SAndroid Build Coastguard Worker   // cold anymore. This needs precise profile data and we only do this when
1268*9880d681SAndroid Build Coastguard Worker   // profile data is available.
1269*9880d681SAndroid Build Coastguard Worker   if (F->getFunction()->getEntryCount()) {
1270*9880d681SAndroid Build Coastguard Worker     BlockFrequency LoopFreq(0);
1271*9880d681SAndroid Build Coastguard Worker     for (auto LoopPred : L.getHeader()->predecessors())
1272*9880d681SAndroid Build Coastguard Worker       if (!L.contains(LoopPred))
1273*9880d681SAndroid Build Coastguard Worker         LoopFreq += MBFI->getBlockFreq(LoopPred) *
1274*9880d681SAndroid Build Coastguard Worker                     MBPI->getEdgeProbability(LoopPred, L.getHeader());
1275*9880d681SAndroid Build Coastguard Worker 
1276*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock *LoopBB : L.getBlocks()) {
1277*9880d681SAndroid Build Coastguard Worker       auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency();
1278*9880d681SAndroid Build Coastguard Worker       if (Freq == 0 || LoopFreq.getFrequency() / Freq > LoopToColdBlockRatio)
1279*9880d681SAndroid Build Coastguard Worker         continue;
1280*9880d681SAndroid Build Coastguard Worker       LoopBlockSet.insert(LoopBB);
1281*9880d681SAndroid Build Coastguard Worker     }
1282*9880d681SAndroid Build Coastguard Worker   } else
1283*9880d681SAndroid Build Coastguard Worker     LoopBlockSet.insert(L.block_begin(), L.block_end());
1284*9880d681SAndroid Build Coastguard Worker 
1285*9880d681SAndroid Build Coastguard Worker   return LoopBlockSet;
1286*9880d681SAndroid Build Coastguard Worker }
1287*9880d681SAndroid Build Coastguard Worker 
1288*9880d681SAndroid Build Coastguard Worker /// \brief Forms basic block chains from the natural loop structures.
1289*9880d681SAndroid Build Coastguard Worker ///
1290*9880d681SAndroid Build Coastguard Worker /// These chains are designed to preserve the existing *structure* of the code
1291*9880d681SAndroid Build Coastguard Worker /// as much as possible. We can then stitch the chains together in a way which
1292*9880d681SAndroid Build Coastguard Worker /// both preserves the topological structure and minimizes taken conditional
1293*9880d681SAndroid Build Coastguard Worker /// branches.
buildLoopChains(MachineLoop & L)1294*9880d681SAndroid Build Coastguard Worker void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
1295*9880d681SAndroid Build Coastguard Worker   // First recurse through any nested loops, building chains for those inner
1296*9880d681SAndroid Build Coastguard Worker   // loops.
1297*9880d681SAndroid Build Coastguard Worker   for (MachineLoop *InnerLoop : L)
1298*9880d681SAndroid Build Coastguard Worker     buildLoopChains(*InnerLoop);
1299*9880d681SAndroid Build Coastguard Worker 
1300*9880d681SAndroid Build Coastguard Worker   assert(BlockWorkList.empty());
1301*9880d681SAndroid Build Coastguard Worker   assert(EHPadWorkList.empty());
1302*9880d681SAndroid Build Coastguard Worker   BlockFilterSet LoopBlockSet = collectLoopBlockSet(L);
1303*9880d681SAndroid Build Coastguard Worker 
1304*9880d681SAndroid Build Coastguard Worker   // Check if we have profile data for this function. If yes, we will rotate
1305*9880d681SAndroid Build Coastguard Worker   // this loop by modeling costs more precisely which requires the profile data
1306*9880d681SAndroid Build Coastguard Worker   // for better layout.
1307*9880d681SAndroid Build Coastguard Worker   bool RotateLoopWithProfile =
1308*9880d681SAndroid Build Coastguard Worker       ForcePreciseRotationCost ||
1309*9880d681SAndroid Build Coastguard Worker       (PreciseRotationCost && F->getFunction()->getEntryCount());
1310*9880d681SAndroid Build Coastguard Worker 
1311*9880d681SAndroid Build Coastguard Worker   // First check to see if there is an obviously preferable top block for the
1312*9880d681SAndroid Build Coastguard Worker   // loop. This will default to the header, but may end up as one of the
1313*9880d681SAndroid Build Coastguard Worker   // predecessors to the header if there is one which will result in strictly
1314*9880d681SAndroid Build Coastguard Worker   // fewer branches in the loop body.
1315*9880d681SAndroid Build Coastguard Worker   // When we use profile data to rotate the loop, this is unnecessary.
1316*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *LoopTop =
1317*9880d681SAndroid Build Coastguard Worker       RotateLoopWithProfile ? L.getHeader() : findBestLoopTop(L, LoopBlockSet);
1318*9880d681SAndroid Build Coastguard Worker 
1319*9880d681SAndroid Build Coastguard Worker   // If we selected just the header for the loop top, look for a potentially
1320*9880d681SAndroid Build Coastguard Worker   // profitable exit block in the event that rotating the loop can eliminate
1321*9880d681SAndroid Build Coastguard Worker   // branches by placing an exit edge at the bottom.
1322*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *ExitingBB = nullptr;
1323*9880d681SAndroid Build Coastguard Worker   if (!RotateLoopWithProfile && LoopTop == L.getHeader())
1324*9880d681SAndroid Build Coastguard Worker     ExitingBB = findBestLoopExit(L, LoopBlockSet);
1325*9880d681SAndroid Build Coastguard Worker 
1326*9880d681SAndroid Build Coastguard Worker   BlockChain &LoopChain = *BlockToChain[LoopTop];
1327*9880d681SAndroid Build Coastguard Worker 
1328*9880d681SAndroid Build Coastguard Worker   // FIXME: This is a really lame way of walking the chains in the loop: we
1329*9880d681SAndroid Build Coastguard Worker   // walk the blocks, and use a set to prevent visiting a particular chain
1330*9880d681SAndroid Build Coastguard Worker   // twice.
1331*9880d681SAndroid Build Coastguard Worker   SmallPtrSet<BlockChain *, 4> UpdatedPreds;
1332*9880d681SAndroid Build Coastguard Worker   assert(LoopChain.UnscheduledPredecessors == 0);
1333*9880d681SAndroid Build Coastguard Worker   UpdatedPreds.insert(&LoopChain);
1334*9880d681SAndroid Build Coastguard Worker 
1335*9880d681SAndroid Build Coastguard Worker   for (MachineBasicBlock *LoopBB : LoopBlockSet)
1336*9880d681SAndroid Build Coastguard Worker     fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
1337*9880d681SAndroid Build Coastguard Worker 
1338*9880d681SAndroid Build Coastguard Worker   buildChain(LoopTop, LoopChain, &LoopBlockSet);
1339*9880d681SAndroid Build Coastguard Worker 
1340*9880d681SAndroid Build Coastguard Worker   if (RotateLoopWithProfile)
1341*9880d681SAndroid Build Coastguard Worker     rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
1342*9880d681SAndroid Build Coastguard Worker   else
1343*9880d681SAndroid Build Coastguard Worker     rotateLoop(LoopChain, ExitingBB, LoopBlockSet);
1344*9880d681SAndroid Build Coastguard Worker 
1345*9880d681SAndroid Build Coastguard Worker   DEBUG({
1346*9880d681SAndroid Build Coastguard Worker     // Crash at the end so we get all of the debugging output first.
1347*9880d681SAndroid Build Coastguard Worker     bool BadLoop = false;
1348*9880d681SAndroid Build Coastguard Worker     if (LoopChain.UnscheduledPredecessors) {
1349*9880d681SAndroid Build Coastguard Worker       BadLoop = true;
1350*9880d681SAndroid Build Coastguard Worker       dbgs() << "Loop chain contains a block without its preds placed!\n"
1351*9880d681SAndroid Build Coastguard Worker              << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
1352*9880d681SAndroid Build Coastguard Worker              << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
1353*9880d681SAndroid Build Coastguard Worker     }
1354*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock *ChainBB : LoopChain) {
1355*9880d681SAndroid Build Coastguard Worker       dbgs() << "          ... " << getBlockName(ChainBB) << "\n";
1356*9880d681SAndroid Build Coastguard Worker       if (!LoopBlockSet.erase(ChainBB)) {
1357*9880d681SAndroid Build Coastguard Worker         // We don't mark the loop as bad here because there are real situations
1358*9880d681SAndroid Build Coastguard Worker         // where this can occur. For example, with an unanalyzable fallthrough
1359*9880d681SAndroid Build Coastguard Worker         // from a loop block to a non-loop block or vice versa.
1360*9880d681SAndroid Build Coastguard Worker         dbgs() << "Loop chain contains a block not contained by the loop!\n"
1361*9880d681SAndroid Build Coastguard Worker                << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
1362*9880d681SAndroid Build Coastguard Worker                << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
1363*9880d681SAndroid Build Coastguard Worker                << "  Bad block:    " << getBlockName(ChainBB) << "\n";
1364*9880d681SAndroid Build Coastguard Worker       }
1365*9880d681SAndroid Build Coastguard Worker     }
1366*9880d681SAndroid Build Coastguard Worker 
1367*9880d681SAndroid Build Coastguard Worker     if (!LoopBlockSet.empty()) {
1368*9880d681SAndroid Build Coastguard Worker       BadLoop = true;
1369*9880d681SAndroid Build Coastguard Worker       for (MachineBasicBlock *LoopBB : LoopBlockSet)
1370*9880d681SAndroid Build Coastguard Worker         dbgs() << "Loop contains blocks never placed into a chain!\n"
1371*9880d681SAndroid Build Coastguard Worker                << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
1372*9880d681SAndroid Build Coastguard Worker                << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
1373*9880d681SAndroid Build Coastguard Worker                << "  Bad block:    " << getBlockName(LoopBB) << "\n";
1374*9880d681SAndroid Build Coastguard Worker     }
1375*9880d681SAndroid Build Coastguard Worker     assert(!BadLoop && "Detected problems with the placement of this loop.");
1376*9880d681SAndroid Build Coastguard Worker   });
1377*9880d681SAndroid Build Coastguard Worker 
1378*9880d681SAndroid Build Coastguard Worker   BlockWorkList.clear();
1379*9880d681SAndroid Build Coastguard Worker   EHPadWorkList.clear();
1380*9880d681SAndroid Build Coastguard Worker }
1381*9880d681SAndroid Build Coastguard Worker 
1382*9880d681SAndroid Build Coastguard Worker /// When OutlineOpitonalBranches is on, this method colects BBs that
1383*9880d681SAndroid Build Coastguard Worker /// dominates all terminator blocks of the function \p F.
collectMustExecuteBBs()1384*9880d681SAndroid Build Coastguard Worker void MachineBlockPlacement::collectMustExecuteBBs() {
1385*9880d681SAndroid Build Coastguard Worker   if (OutlineOptionalBranches) {
1386*9880d681SAndroid Build Coastguard Worker     // Find the nearest common dominator of all of F's terminators.
1387*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock *Terminator = nullptr;
1388*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock &MBB : *F) {
1389*9880d681SAndroid Build Coastguard Worker       if (MBB.succ_size() == 0) {
1390*9880d681SAndroid Build Coastguard Worker         if (Terminator == nullptr)
1391*9880d681SAndroid Build Coastguard Worker           Terminator = &MBB;
1392*9880d681SAndroid Build Coastguard Worker         else
1393*9880d681SAndroid Build Coastguard Worker           Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
1394*9880d681SAndroid Build Coastguard Worker       }
1395*9880d681SAndroid Build Coastguard Worker     }
1396*9880d681SAndroid Build Coastguard Worker 
1397*9880d681SAndroid Build Coastguard Worker     // MBBs dominating this common dominator are unavoidable.
1398*9880d681SAndroid Build Coastguard Worker     UnavoidableBlocks.clear();
1399*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock &MBB : *F) {
1400*9880d681SAndroid Build Coastguard Worker       if (MDT->dominates(&MBB, Terminator)) {
1401*9880d681SAndroid Build Coastguard Worker         UnavoidableBlocks.insert(&MBB);
1402*9880d681SAndroid Build Coastguard Worker       }
1403*9880d681SAndroid Build Coastguard Worker     }
1404*9880d681SAndroid Build Coastguard Worker   }
1405*9880d681SAndroid Build Coastguard Worker }
1406*9880d681SAndroid Build Coastguard Worker 
buildCFGChains()1407*9880d681SAndroid Build Coastguard Worker void MachineBlockPlacement::buildCFGChains() {
1408*9880d681SAndroid Build Coastguard Worker   // Ensure that every BB in the function has an associated chain to simplify
1409*9880d681SAndroid Build Coastguard Worker   // the assumptions of the remaining algorithm.
1410*9880d681SAndroid Build Coastguard Worker   SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
1411*9880d681SAndroid Build Coastguard Worker   for (MachineFunction::iterator FI = F->begin(), FE = F->end(); FI != FE;
1412*9880d681SAndroid Build Coastguard Worker        ++FI) {
1413*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock *BB = &*FI;
1414*9880d681SAndroid Build Coastguard Worker     BlockChain *Chain =
1415*9880d681SAndroid Build Coastguard Worker         new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
1416*9880d681SAndroid Build Coastguard Worker     // Also, merge any blocks which we cannot reason about and must preserve
1417*9880d681SAndroid Build Coastguard Worker     // the exact fallthrough behavior for.
1418*9880d681SAndroid Build Coastguard Worker     for (;;) {
1419*9880d681SAndroid Build Coastguard Worker       Cond.clear();
1420*9880d681SAndroid Build Coastguard Worker       MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
1421*9880d681SAndroid Build Coastguard Worker       if (!TII->analyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
1422*9880d681SAndroid Build Coastguard Worker         break;
1423*9880d681SAndroid Build Coastguard Worker 
1424*9880d681SAndroid Build Coastguard Worker       MachineFunction::iterator NextFI = std::next(FI);
1425*9880d681SAndroid Build Coastguard Worker       MachineBasicBlock *NextBB = &*NextFI;
1426*9880d681SAndroid Build Coastguard Worker       // Ensure that the layout successor is a viable block, as we know that
1427*9880d681SAndroid Build Coastguard Worker       // fallthrough is a possibility.
1428*9880d681SAndroid Build Coastguard Worker       assert(NextFI != FE && "Can't fallthrough past the last block.");
1429*9880d681SAndroid Build Coastguard Worker       DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
1430*9880d681SAndroid Build Coastguard Worker                    << getBlockName(BB) << " -> " << getBlockName(NextBB)
1431*9880d681SAndroid Build Coastguard Worker                    << "\n");
1432*9880d681SAndroid Build Coastguard Worker       Chain->merge(NextBB, nullptr);
1433*9880d681SAndroid Build Coastguard Worker       FI = NextFI;
1434*9880d681SAndroid Build Coastguard Worker       BB = NextBB;
1435*9880d681SAndroid Build Coastguard Worker     }
1436*9880d681SAndroid Build Coastguard Worker   }
1437*9880d681SAndroid Build Coastguard Worker 
1438*9880d681SAndroid Build Coastguard Worker   // Turned on with OutlineOptionalBranches option
1439*9880d681SAndroid Build Coastguard Worker   collectMustExecuteBBs();
1440*9880d681SAndroid Build Coastguard Worker 
1441*9880d681SAndroid Build Coastguard Worker   // Build any loop-based chains.
1442*9880d681SAndroid Build Coastguard Worker   for (MachineLoop *L : *MLI)
1443*9880d681SAndroid Build Coastguard Worker     buildLoopChains(*L);
1444*9880d681SAndroid Build Coastguard Worker 
1445*9880d681SAndroid Build Coastguard Worker   assert(BlockWorkList.empty());
1446*9880d681SAndroid Build Coastguard Worker   assert(EHPadWorkList.empty());
1447*9880d681SAndroid Build Coastguard Worker 
1448*9880d681SAndroid Build Coastguard Worker   SmallPtrSet<BlockChain *, 4> UpdatedPreds;
1449*9880d681SAndroid Build Coastguard Worker   for (MachineBasicBlock &MBB : *F)
1450*9880d681SAndroid Build Coastguard Worker     fillWorkLists(&MBB, UpdatedPreds);
1451*9880d681SAndroid Build Coastguard Worker 
1452*9880d681SAndroid Build Coastguard Worker   BlockChain &FunctionChain = *BlockToChain[&F->front()];
1453*9880d681SAndroid Build Coastguard Worker   buildChain(&F->front(), FunctionChain);
1454*9880d681SAndroid Build Coastguard Worker 
1455*9880d681SAndroid Build Coastguard Worker #ifndef NDEBUG
1456*9880d681SAndroid Build Coastguard Worker   typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
1457*9880d681SAndroid Build Coastguard Worker #endif
1458*9880d681SAndroid Build Coastguard Worker   DEBUG({
1459*9880d681SAndroid Build Coastguard Worker     // Crash at the end so we get all of the debugging output first.
1460*9880d681SAndroid Build Coastguard Worker     bool BadFunc = false;
1461*9880d681SAndroid Build Coastguard Worker     FunctionBlockSetType FunctionBlockSet;
1462*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock &MBB : *F)
1463*9880d681SAndroid Build Coastguard Worker       FunctionBlockSet.insert(&MBB);
1464*9880d681SAndroid Build Coastguard Worker 
1465*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock *ChainBB : FunctionChain)
1466*9880d681SAndroid Build Coastguard Worker       if (!FunctionBlockSet.erase(ChainBB)) {
1467*9880d681SAndroid Build Coastguard Worker         BadFunc = true;
1468*9880d681SAndroid Build Coastguard Worker         dbgs() << "Function chain contains a block not in the function!\n"
1469*9880d681SAndroid Build Coastguard Worker                << "  Bad block:    " << getBlockName(ChainBB) << "\n";
1470*9880d681SAndroid Build Coastguard Worker       }
1471*9880d681SAndroid Build Coastguard Worker 
1472*9880d681SAndroid Build Coastguard Worker     if (!FunctionBlockSet.empty()) {
1473*9880d681SAndroid Build Coastguard Worker       BadFunc = true;
1474*9880d681SAndroid Build Coastguard Worker       for (MachineBasicBlock *RemainingBB : FunctionBlockSet)
1475*9880d681SAndroid Build Coastguard Worker         dbgs() << "Function contains blocks never placed into a chain!\n"
1476*9880d681SAndroid Build Coastguard Worker                << "  Bad block:    " << getBlockName(RemainingBB) << "\n";
1477*9880d681SAndroid Build Coastguard Worker     }
1478*9880d681SAndroid Build Coastguard Worker     assert(!BadFunc && "Detected problems with the block placement.");
1479*9880d681SAndroid Build Coastguard Worker   });
1480*9880d681SAndroid Build Coastguard Worker 
1481*9880d681SAndroid Build Coastguard Worker   // Splice the blocks into place.
1482*9880d681SAndroid Build Coastguard Worker   MachineFunction::iterator InsertPos = F->begin();
1483*9880d681SAndroid Build Coastguard Worker   DEBUG(dbgs() << "[MBP] Function: "<< F->getName() << "\n");
1484*9880d681SAndroid Build Coastguard Worker   for (MachineBasicBlock *ChainBB : FunctionChain) {
1485*9880d681SAndroid Build Coastguard Worker     DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain "
1486*9880d681SAndroid Build Coastguard Worker                                                        : "          ... ")
1487*9880d681SAndroid Build Coastguard Worker                  << getBlockName(ChainBB) << "\n");
1488*9880d681SAndroid Build Coastguard Worker     if (InsertPos != MachineFunction::iterator(ChainBB))
1489*9880d681SAndroid Build Coastguard Worker       F->splice(InsertPos, ChainBB);
1490*9880d681SAndroid Build Coastguard Worker     else
1491*9880d681SAndroid Build Coastguard Worker       ++InsertPos;
1492*9880d681SAndroid Build Coastguard Worker 
1493*9880d681SAndroid Build Coastguard Worker     // Update the terminator of the previous block.
1494*9880d681SAndroid Build Coastguard Worker     if (ChainBB == *FunctionChain.begin())
1495*9880d681SAndroid Build Coastguard Worker       continue;
1496*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock *PrevBB = &*std::prev(MachineFunction::iterator(ChainBB));
1497*9880d681SAndroid Build Coastguard Worker 
1498*9880d681SAndroid Build Coastguard Worker     // FIXME: It would be awesome of updateTerminator would just return rather
1499*9880d681SAndroid Build Coastguard Worker     // than assert when the branch cannot be analyzed in order to remove this
1500*9880d681SAndroid Build Coastguard Worker     // boiler plate.
1501*9880d681SAndroid Build Coastguard Worker     Cond.clear();
1502*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
1503*9880d681SAndroid Build Coastguard Worker 
1504*9880d681SAndroid Build Coastguard Worker     // The "PrevBB" is not yet updated to reflect current code layout, so,
1505*9880d681SAndroid Build Coastguard Worker     //   o. it may fall-through to a block without explict "goto" instruction
1506*9880d681SAndroid Build Coastguard Worker     //      before layout, and no longer fall-through it after layout; or
1507*9880d681SAndroid Build Coastguard Worker     //   o. just opposite.
1508*9880d681SAndroid Build Coastguard Worker     //
1509*9880d681SAndroid Build Coastguard Worker     // analyzeBranch() may return erroneous value for FBB when these two
1510*9880d681SAndroid Build Coastguard Worker     // situations take place. For the first scenario FBB is mistakenly set NULL;
1511*9880d681SAndroid Build Coastguard Worker     // for the 2nd scenario, the FBB, which is expected to be NULL, is
1512*9880d681SAndroid Build Coastguard Worker     // mistakenly pointing to "*BI".
1513*9880d681SAndroid Build Coastguard Worker     // Thus, if the future change needs to use FBB before the layout is set, it
1514*9880d681SAndroid Build Coastguard Worker     // has to correct FBB first by using the code similar to the following:
1515*9880d681SAndroid Build Coastguard Worker     //
1516*9880d681SAndroid Build Coastguard Worker     // if (!Cond.empty() && (!FBB || FBB == ChainBB)) {
1517*9880d681SAndroid Build Coastguard Worker     //   PrevBB->updateTerminator();
1518*9880d681SAndroid Build Coastguard Worker     //   Cond.clear();
1519*9880d681SAndroid Build Coastguard Worker     //   TBB = FBB = nullptr;
1520*9880d681SAndroid Build Coastguard Worker     //   if (TII->analyzeBranch(*PrevBB, TBB, FBB, Cond)) {
1521*9880d681SAndroid Build Coastguard Worker     //     // FIXME: This should never take place.
1522*9880d681SAndroid Build Coastguard Worker     //     TBB = FBB = nullptr;
1523*9880d681SAndroid Build Coastguard Worker     //   }
1524*9880d681SAndroid Build Coastguard Worker     // }
1525*9880d681SAndroid Build Coastguard Worker     if (!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond))
1526*9880d681SAndroid Build Coastguard Worker       PrevBB->updateTerminator();
1527*9880d681SAndroid Build Coastguard Worker   }
1528*9880d681SAndroid Build Coastguard Worker 
1529*9880d681SAndroid Build Coastguard Worker   // Fixup the last block.
1530*9880d681SAndroid Build Coastguard Worker   Cond.clear();
1531*9880d681SAndroid Build Coastguard Worker   MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
1532*9880d681SAndroid Build Coastguard Worker   if (!TII->analyzeBranch(F->back(), TBB, FBB, Cond))
1533*9880d681SAndroid Build Coastguard Worker     F->back().updateTerminator();
1534*9880d681SAndroid Build Coastguard Worker 
1535*9880d681SAndroid Build Coastguard Worker   BlockWorkList.clear();
1536*9880d681SAndroid Build Coastguard Worker   EHPadWorkList.clear();
1537*9880d681SAndroid Build Coastguard Worker }
1538*9880d681SAndroid Build Coastguard Worker 
optimizeBranches()1539*9880d681SAndroid Build Coastguard Worker void MachineBlockPlacement::optimizeBranches() {
1540*9880d681SAndroid Build Coastguard Worker   BlockChain &FunctionChain = *BlockToChain[&F->front()];
1541*9880d681SAndroid Build Coastguard Worker   SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
1542*9880d681SAndroid Build Coastguard Worker 
1543*9880d681SAndroid Build Coastguard Worker   // Now that all the basic blocks in the chain have the proper layout,
1544*9880d681SAndroid Build Coastguard Worker   // make a final call to AnalyzeBranch with AllowModify set.
1545*9880d681SAndroid Build Coastguard Worker   // Indeed, the target may be able to optimize the branches in a way we
1546*9880d681SAndroid Build Coastguard Worker   // cannot because all branches may not be analyzable.
1547*9880d681SAndroid Build Coastguard Worker   // E.g., the target may be able to remove an unconditional branch to
1548*9880d681SAndroid Build Coastguard Worker   // a fallthrough when it occurs after predicated terminators.
1549*9880d681SAndroid Build Coastguard Worker   for (MachineBasicBlock *ChainBB : FunctionChain) {
1550*9880d681SAndroid Build Coastguard Worker     Cond.clear();
1551*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
1552*9880d681SAndroid Build Coastguard Worker     if (!TII->analyzeBranch(*ChainBB, TBB, FBB, Cond, /*AllowModify*/ true)) {
1553*9880d681SAndroid Build Coastguard Worker       // If PrevBB has a two-way branch, try to re-order the branches
1554*9880d681SAndroid Build Coastguard Worker       // such that we branch to the successor with higher probability first.
1555*9880d681SAndroid Build Coastguard Worker       if (TBB && !Cond.empty() && FBB &&
1556*9880d681SAndroid Build Coastguard Worker           MBPI->getEdgeProbability(ChainBB, FBB) >
1557*9880d681SAndroid Build Coastguard Worker               MBPI->getEdgeProbability(ChainBB, TBB) &&
1558*9880d681SAndroid Build Coastguard Worker           !TII->ReverseBranchCondition(Cond)) {
1559*9880d681SAndroid Build Coastguard Worker         DEBUG(dbgs() << "Reverse order of the two branches: "
1560*9880d681SAndroid Build Coastguard Worker                      << getBlockName(ChainBB) << "\n");
1561*9880d681SAndroid Build Coastguard Worker         DEBUG(dbgs() << "    Edge probability: "
1562*9880d681SAndroid Build Coastguard Worker                      << MBPI->getEdgeProbability(ChainBB, FBB) << " vs "
1563*9880d681SAndroid Build Coastguard Worker                      << MBPI->getEdgeProbability(ChainBB, TBB) << "\n");
1564*9880d681SAndroid Build Coastguard Worker         DebugLoc dl; // FIXME: this is nowhere
1565*9880d681SAndroid Build Coastguard Worker         TII->RemoveBranch(*ChainBB);
1566*9880d681SAndroid Build Coastguard Worker         TII->InsertBranch(*ChainBB, FBB, TBB, Cond, dl);
1567*9880d681SAndroid Build Coastguard Worker         ChainBB->updateTerminator();
1568*9880d681SAndroid Build Coastguard Worker       }
1569*9880d681SAndroid Build Coastguard Worker     }
1570*9880d681SAndroid Build Coastguard Worker   }
1571*9880d681SAndroid Build Coastguard Worker }
1572*9880d681SAndroid Build Coastguard Worker 
alignBlocks()1573*9880d681SAndroid Build Coastguard Worker void MachineBlockPlacement::alignBlocks() {
1574*9880d681SAndroid Build Coastguard Worker   // Walk through the backedges of the function now that we have fully laid out
1575*9880d681SAndroid Build Coastguard Worker   // the basic blocks and align the destination of each backedge. We don't rely
1576*9880d681SAndroid Build Coastguard Worker   // exclusively on the loop info here so that we can align backedges in
1577*9880d681SAndroid Build Coastguard Worker   // unnatural CFGs and backedges that were introduced purely because of the
1578*9880d681SAndroid Build Coastguard Worker   // loop rotations done during this layout pass.
1579*9880d681SAndroid Build Coastguard Worker   if (F->getFunction()->optForSize())
1580*9880d681SAndroid Build Coastguard Worker     return;
1581*9880d681SAndroid Build Coastguard Worker   BlockChain &FunctionChain = *BlockToChain[&F->front()];
1582*9880d681SAndroid Build Coastguard Worker   if (FunctionChain.begin() == FunctionChain.end())
1583*9880d681SAndroid Build Coastguard Worker     return; // Empty chain.
1584*9880d681SAndroid Build Coastguard Worker 
1585*9880d681SAndroid Build Coastguard Worker   const BranchProbability ColdProb(1, 5); // 20%
1586*9880d681SAndroid Build Coastguard Worker   BlockFrequency EntryFreq = MBFI->getBlockFreq(&F->front());
1587*9880d681SAndroid Build Coastguard Worker   BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
1588*9880d681SAndroid Build Coastguard Worker   for (MachineBasicBlock *ChainBB : FunctionChain) {
1589*9880d681SAndroid Build Coastguard Worker     if (ChainBB == *FunctionChain.begin())
1590*9880d681SAndroid Build Coastguard Worker       continue;
1591*9880d681SAndroid Build Coastguard Worker 
1592*9880d681SAndroid Build Coastguard Worker     // Don't align non-looping basic blocks. These are unlikely to execute
1593*9880d681SAndroid Build Coastguard Worker     // enough times to matter in practice. Note that we'll still handle
1594*9880d681SAndroid Build Coastguard Worker     // unnatural CFGs inside of a natural outer loop (the common case) and
1595*9880d681SAndroid Build Coastguard Worker     // rotated loops.
1596*9880d681SAndroid Build Coastguard Worker     MachineLoop *L = MLI->getLoopFor(ChainBB);
1597*9880d681SAndroid Build Coastguard Worker     if (!L)
1598*9880d681SAndroid Build Coastguard Worker       continue;
1599*9880d681SAndroid Build Coastguard Worker 
1600*9880d681SAndroid Build Coastguard Worker     unsigned Align = TLI->getPrefLoopAlignment(L);
1601*9880d681SAndroid Build Coastguard Worker     if (!Align)
1602*9880d681SAndroid Build Coastguard Worker       continue; // Don't care about loop alignment.
1603*9880d681SAndroid Build Coastguard Worker 
1604*9880d681SAndroid Build Coastguard Worker     // If the block is cold relative to the function entry don't waste space
1605*9880d681SAndroid Build Coastguard Worker     // aligning it.
1606*9880d681SAndroid Build Coastguard Worker     BlockFrequency Freq = MBFI->getBlockFreq(ChainBB);
1607*9880d681SAndroid Build Coastguard Worker     if (Freq < WeightedEntryFreq)
1608*9880d681SAndroid Build Coastguard Worker       continue;
1609*9880d681SAndroid Build Coastguard Worker 
1610*9880d681SAndroid Build Coastguard Worker     // If the block is cold relative to its loop header, don't align it
1611*9880d681SAndroid Build Coastguard Worker     // regardless of what edges into the block exist.
1612*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock *LoopHeader = L->getHeader();
1613*9880d681SAndroid Build Coastguard Worker     BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
1614*9880d681SAndroid Build Coastguard Worker     if (Freq < (LoopHeaderFreq * ColdProb))
1615*9880d681SAndroid Build Coastguard Worker       continue;
1616*9880d681SAndroid Build Coastguard Worker 
1617*9880d681SAndroid Build Coastguard Worker     // Check for the existence of a non-layout predecessor which would benefit
1618*9880d681SAndroid Build Coastguard Worker     // from aligning this block.
1619*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock *LayoutPred =
1620*9880d681SAndroid Build Coastguard Worker         &*std::prev(MachineFunction::iterator(ChainBB));
1621*9880d681SAndroid Build Coastguard Worker 
1622*9880d681SAndroid Build Coastguard Worker     // Force alignment if all the predecessors are jumps. We already checked
1623*9880d681SAndroid Build Coastguard Worker     // that the block isn't cold above.
1624*9880d681SAndroid Build Coastguard Worker     if (!LayoutPred->isSuccessor(ChainBB)) {
1625*9880d681SAndroid Build Coastguard Worker       ChainBB->setAlignment(Align);
1626*9880d681SAndroid Build Coastguard Worker       continue;
1627*9880d681SAndroid Build Coastguard Worker     }
1628*9880d681SAndroid Build Coastguard Worker 
1629*9880d681SAndroid Build Coastguard Worker     // Align this block if the layout predecessor's edge into this block is
1630*9880d681SAndroid Build Coastguard Worker     // cold relative to the block. When this is true, other predecessors make up
1631*9880d681SAndroid Build Coastguard Worker     // all of the hot entries into the block and thus alignment is likely to be
1632*9880d681SAndroid Build Coastguard Worker     // important.
1633*9880d681SAndroid Build Coastguard Worker     BranchProbability LayoutProb =
1634*9880d681SAndroid Build Coastguard Worker         MBPI->getEdgeProbability(LayoutPred, ChainBB);
1635*9880d681SAndroid Build Coastguard Worker     BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
1636*9880d681SAndroid Build Coastguard Worker     if (LayoutEdgeFreq <= (Freq * ColdProb))
1637*9880d681SAndroid Build Coastguard Worker       ChainBB->setAlignment(Align);
1638*9880d681SAndroid Build Coastguard Worker   }
1639*9880d681SAndroid Build Coastguard Worker }
1640*9880d681SAndroid Build Coastguard Worker 
runOnMachineFunction(MachineFunction & MF)1641*9880d681SAndroid Build Coastguard Worker bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
1642*9880d681SAndroid Build Coastguard Worker   if (skipFunction(*MF.getFunction()))
1643*9880d681SAndroid Build Coastguard Worker     return false;
1644*9880d681SAndroid Build Coastguard Worker 
1645*9880d681SAndroid Build Coastguard Worker   // Check for single-block functions and skip them.
1646*9880d681SAndroid Build Coastguard Worker   if (std::next(MF.begin()) == MF.end())
1647*9880d681SAndroid Build Coastguard Worker     return false;
1648*9880d681SAndroid Build Coastguard Worker 
1649*9880d681SAndroid Build Coastguard Worker   F = &MF;
1650*9880d681SAndroid Build Coastguard Worker   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
1651*9880d681SAndroid Build Coastguard Worker   MBFI = llvm::make_unique<BranchFolder::MBFIWrapper>(
1652*9880d681SAndroid Build Coastguard Worker       getAnalysis<MachineBlockFrequencyInfo>());
1653*9880d681SAndroid Build Coastguard Worker   MLI = &getAnalysis<MachineLoopInfo>();
1654*9880d681SAndroid Build Coastguard Worker   TII = MF.getSubtarget().getInstrInfo();
1655*9880d681SAndroid Build Coastguard Worker   TLI = MF.getSubtarget().getTargetLowering();
1656*9880d681SAndroid Build Coastguard Worker   MDT = &getAnalysis<MachineDominatorTree>();
1657*9880d681SAndroid Build Coastguard Worker   assert(BlockToChain.empty());
1658*9880d681SAndroid Build Coastguard Worker 
1659*9880d681SAndroid Build Coastguard Worker   buildCFGChains();
1660*9880d681SAndroid Build Coastguard Worker 
1661*9880d681SAndroid Build Coastguard Worker   // Changing the layout can create new tail merging opportunities.
1662*9880d681SAndroid Build Coastguard Worker   TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
1663*9880d681SAndroid Build Coastguard Worker   // TailMerge can create jump into if branches that make CFG irreducible for
1664*9880d681SAndroid Build Coastguard Worker   // HW that requires structurized CFG.
1665*9880d681SAndroid Build Coastguard Worker   bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
1666*9880d681SAndroid Build Coastguard Worker                          PassConfig->getEnableTailMerge() &&
1667*9880d681SAndroid Build Coastguard Worker                          BranchFoldPlacement;
1668*9880d681SAndroid Build Coastguard Worker   // No tail merging opportunities if the block number is less than four.
1669*9880d681SAndroid Build Coastguard Worker   if (MF.size() > 3 && EnableTailMerge) {
1670*9880d681SAndroid Build Coastguard Worker     BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
1671*9880d681SAndroid Build Coastguard Worker                     *MBPI);
1672*9880d681SAndroid Build Coastguard Worker 
1673*9880d681SAndroid Build Coastguard Worker     if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
1674*9880d681SAndroid Build Coastguard Worker                             getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
1675*9880d681SAndroid Build Coastguard Worker                             /*AfterBlockPlacement=*/true)) {
1676*9880d681SAndroid Build Coastguard Worker       // Redo the layout if tail merging creates/removes/moves blocks.
1677*9880d681SAndroid Build Coastguard Worker       BlockToChain.clear();
1678*9880d681SAndroid Build Coastguard Worker       ChainAllocator.DestroyAll();
1679*9880d681SAndroid Build Coastguard Worker       buildCFGChains();
1680*9880d681SAndroid Build Coastguard Worker     }
1681*9880d681SAndroid Build Coastguard Worker   }
1682*9880d681SAndroid Build Coastguard Worker 
1683*9880d681SAndroid Build Coastguard Worker   optimizeBranches();
1684*9880d681SAndroid Build Coastguard Worker   alignBlocks();
1685*9880d681SAndroid Build Coastguard Worker 
1686*9880d681SAndroid Build Coastguard Worker   BlockToChain.clear();
1687*9880d681SAndroid Build Coastguard Worker   ChainAllocator.DestroyAll();
1688*9880d681SAndroid Build Coastguard Worker 
1689*9880d681SAndroid Build Coastguard Worker   if (AlignAllBlock)
1690*9880d681SAndroid Build Coastguard Worker     // Align all of the blocks in the function to a specific alignment.
1691*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock &MBB : MF)
1692*9880d681SAndroid Build Coastguard Worker       MBB.setAlignment(AlignAllBlock);
1693*9880d681SAndroid Build Coastguard Worker   else if (AlignAllNonFallThruBlocks) {
1694*9880d681SAndroid Build Coastguard Worker     // Align all of the blocks that have no fall-through predecessors to a
1695*9880d681SAndroid Build Coastguard Worker     // specific alignment.
1696*9880d681SAndroid Build Coastguard Worker     for (auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) {
1697*9880d681SAndroid Build Coastguard Worker       auto LayoutPred = std::prev(MBI);
1698*9880d681SAndroid Build Coastguard Worker       if (!LayoutPred->isSuccessor(&*MBI))
1699*9880d681SAndroid Build Coastguard Worker         MBI->setAlignment(AlignAllNonFallThruBlocks);
1700*9880d681SAndroid Build Coastguard Worker     }
1701*9880d681SAndroid Build Coastguard Worker   }
1702*9880d681SAndroid Build Coastguard Worker 
1703*9880d681SAndroid Build Coastguard Worker   // We always return true as we have no way to track whether the final order
1704*9880d681SAndroid Build Coastguard Worker   // differs from the original order.
1705*9880d681SAndroid Build Coastguard Worker   return true;
1706*9880d681SAndroid Build Coastguard Worker }
1707*9880d681SAndroid Build Coastguard Worker 
1708*9880d681SAndroid Build Coastguard Worker namespace {
1709*9880d681SAndroid Build Coastguard Worker /// \brief A pass to compute block placement statistics.
1710*9880d681SAndroid Build Coastguard Worker ///
1711*9880d681SAndroid Build Coastguard Worker /// A separate pass to compute interesting statistics for evaluating block
1712*9880d681SAndroid Build Coastguard Worker /// placement. This is separate from the actual placement pass so that they can
1713*9880d681SAndroid Build Coastguard Worker /// be computed in the absence of any placement transformations or when using
1714*9880d681SAndroid Build Coastguard Worker /// alternative placement strategies.
1715*9880d681SAndroid Build Coastguard Worker class MachineBlockPlacementStats : public MachineFunctionPass {
1716*9880d681SAndroid Build Coastguard Worker   /// \brief A handle to the branch probability pass.
1717*9880d681SAndroid Build Coastguard Worker   const MachineBranchProbabilityInfo *MBPI;
1718*9880d681SAndroid Build Coastguard Worker 
1719*9880d681SAndroid Build Coastguard Worker   /// \brief A handle to the function-wide block frequency pass.
1720*9880d681SAndroid Build Coastguard Worker   const MachineBlockFrequencyInfo *MBFI;
1721*9880d681SAndroid Build Coastguard Worker 
1722*9880d681SAndroid Build Coastguard Worker public:
1723*9880d681SAndroid Build Coastguard Worker   static char ID; // Pass identification, replacement for typeid
MachineBlockPlacementStats()1724*9880d681SAndroid Build Coastguard Worker   MachineBlockPlacementStats() : MachineFunctionPass(ID) {
1725*9880d681SAndroid Build Coastguard Worker     initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
1726*9880d681SAndroid Build Coastguard Worker   }
1727*9880d681SAndroid Build Coastguard Worker 
1728*9880d681SAndroid Build Coastguard Worker   bool runOnMachineFunction(MachineFunction &F) override;
1729*9880d681SAndroid Build Coastguard Worker 
getAnalysisUsage(AnalysisUsage & AU) const1730*9880d681SAndroid Build Coastguard Worker   void getAnalysisUsage(AnalysisUsage &AU) const override {
1731*9880d681SAndroid Build Coastguard Worker     AU.addRequired<MachineBranchProbabilityInfo>();
1732*9880d681SAndroid Build Coastguard Worker     AU.addRequired<MachineBlockFrequencyInfo>();
1733*9880d681SAndroid Build Coastguard Worker     AU.setPreservesAll();
1734*9880d681SAndroid Build Coastguard Worker     MachineFunctionPass::getAnalysisUsage(AU);
1735*9880d681SAndroid Build Coastguard Worker   }
1736*9880d681SAndroid Build Coastguard Worker };
1737*9880d681SAndroid Build Coastguard Worker }
1738*9880d681SAndroid Build Coastguard Worker 
1739*9880d681SAndroid Build Coastguard Worker char MachineBlockPlacementStats::ID = 0;
1740*9880d681SAndroid Build Coastguard Worker char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
1741*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
1742*9880d681SAndroid Build Coastguard Worker                       "Basic Block Placement Stats", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)1743*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
1744*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
1745*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats",
1746*9880d681SAndroid Build Coastguard Worker                     "Basic Block Placement Stats", false, false)
1747*9880d681SAndroid Build Coastguard Worker 
1748*9880d681SAndroid Build Coastguard Worker bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
1749*9880d681SAndroid Build Coastguard Worker   // Check for single-block functions and skip them.
1750*9880d681SAndroid Build Coastguard Worker   if (std::next(F.begin()) == F.end())
1751*9880d681SAndroid Build Coastguard Worker     return false;
1752*9880d681SAndroid Build Coastguard Worker 
1753*9880d681SAndroid Build Coastguard Worker   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
1754*9880d681SAndroid Build Coastguard Worker   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
1755*9880d681SAndroid Build Coastguard Worker 
1756*9880d681SAndroid Build Coastguard Worker   for (MachineBasicBlock &MBB : F) {
1757*9880d681SAndroid Build Coastguard Worker     BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB);
1758*9880d681SAndroid Build Coastguard Worker     Statistic &NumBranches =
1759*9880d681SAndroid Build Coastguard Worker         (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches;
1760*9880d681SAndroid Build Coastguard Worker     Statistic &BranchTakenFreq =
1761*9880d681SAndroid Build Coastguard Worker         (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq;
1762*9880d681SAndroid Build Coastguard Worker     for (MachineBasicBlock *Succ : MBB.successors()) {
1763*9880d681SAndroid Build Coastguard Worker       // Skip if this successor is a fallthrough.
1764*9880d681SAndroid Build Coastguard Worker       if (MBB.isLayoutSuccessor(Succ))
1765*9880d681SAndroid Build Coastguard Worker         continue;
1766*9880d681SAndroid Build Coastguard Worker 
1767*9880d681SAndroid Build Coastguard Worker       BlockFrequency EdgeFreq =
1768*9880d681SAndroid Build Coastguard Worker           BlockFreq * MBPI->getEdgeProbability(&MBB, Succ);
1769*9880d681SAndroid Build Coastguard Worker       ++NumBranches;
1770*9880d681SAndroid Build Coastguard Worker       BranchTakenFreq += EdgeFreq.getFrequency();
1771*9880d681SAndroid Build Coastguard Worker     }
1772*9880d681SAndroid Build Coastguard Worker   }
1773*9880d681SAndroid Build Coastguard Worker 
1774*9880d681SAndroid Build Coastguard Worker   return false;
1775*9880d681SAndroid Build Coastguard Worker }
1776