Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. ////===- SampleProfileLoadBaseImpl.h - Profile loader base impl --*- C++-*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. /// \file
  10. /// This file provides the interface for the sampled PGO profile loader base
  11. /// implementation.
  12. //
  13. //===----------------------------------------------------------------------===//
  14.  
  15. #ifndef LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
  16. #define LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
  17.  
  18. #include "llvm/ADT/ArrayRef.h"
  19. #include "llvm/ADT/DenseMap.h"
  20. #include "llvm/ADT/DenseSet.h"
  21. #include "llvm/ADT/SmallPtrSet.h"
  22. #include "llvm/ADT/SmallSet.h"
  23. #include "llvm/ADT/SmallVector.h"
  24. #include "llvm/Analysis/LoopInfo.h"
  25. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  26. #include "llvm/Analysis/PostDominators.h"
  27. #include "llvm/IR/BasicBlock.h"
  28. #include "llvm/IR/CFG.h"
  29. #include "llvm/IR/DebugInfoMetadata.h"
  30. #include "llvm/IR/DebugLoc.h"
  31. #include "llvm/IR/Dominators.h"
  32. #include "llvm/IR/Function.h"
  33. #include "llvm/IR/Instruction.h"
  34. #include "llvm/IR/Instructions.h"
  35. #include "llvm/IR/Module.h"
  36. #include "llvm/ProfileData/SampleProf.h"
  37. #include "llvm/ProfileData/SampleProfReader.h"
  38. #include "llvm/Support/CommandLine.h"
  39. #include "llvm/Support/GenericDomTree.h"
  40. #include "llvm/Support/raw_ostream.h"
  41. #include "llvm/Transforms/Utils/SampleProfileInference.h"
  42. #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
  43.  
  44. namespace llvm {
  45. using namespace sampleprof;
  46. using namespace sampleprofutil;
  47. using ProfileCount = Function::ProfileCount;
  48.  
  49. #define DEBUG_TYPE "sample-profile-impl"
  50.  
  51. namespace afdo_detail {
  52.  
  53. template <typename BlockT> struct IRTraits;
  54. template <> struct IRTraits<BasicBlock> {
  55.   using InstructionT = Instruction;
  56.   using BasicBlockT = BasicBlock;
  57.   using FunctionT = Function;
  58.   using BlockFrequencyInfoT = BlockFrequencyInfo;
  59.   using LoopT = Loop;
  60.   using LoopInfoPtrT = std::unique_ptr<LoopInfo>;
  61.   using DominatorTreePtrT = std::unique_ptr<DominatorTree>;
  62.   using PostDominatorTreeT = PostDominatorTree;
  63.   using PostDominatorTreePtrT = std::unique_ptr<PostDominatorTree>;
  64.   using OptRemarkEmitterT = OptimizationRemarkEmitter;
  65.   using OptRemarkAnalysisT = OptimizationRemarkAnalysis;
  66.   using PredRangeT = pred_range;
  67.   using SuccRangeT = succ_range;
  68.   static Function &getFunction(Function &F) { return F; }
  69.   static const BasicBlock *getEntryBB(const Function *F) {
  70.     return &F->getEntryBlock();
  71.   }
  72.   static pred_range getPredecessors(BasicBlock *BB) { return predecessors(BB); }
  73.   static succ_range getSuccessors(BasicBlock *BB) { return successors(BB); }
  74. };
  75.  
  76. } // end namespace afdo_detail
  77.  
  78. extern cl::opt<bool> SampleProfileUseProfi;
  79.  
  80. template <typename BT> class SampleProfileLoaderBaseImpl {
  81. public:
  82.   SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName)
  83.       : Filename(Name), RemappingFilename(RemapName) {}
  84.   void dump() { Reader->dump(); }
  85.  
  86.   using InstructionT = typename afdo_detail::IRTraits<BT>::InstructionT;
  87.   using BasicBlockT = typename afdo_detail::IRTraits<BT>::BasicBlockT;
  88.   using BlockFrequencyInfoT =
  89.       typename afdo_detail::IRTraits<BT>::BlockFrequencyInfoT;
  90.   using FunctionT = typename afdo_detail::IRTraits<BT>::FunctionT;
  91.   using LoopT = typename afdo_detail::IRTraits<BT>::LoopT;
  92.   using LoopInfoPtrT = typename afdo_detail::IRTraits<BT>::LoopInfoPtrT;
  93.   using DominatorTreePtrT =
  94.       typename afdo_detail::IRTraits<BT>::DominatorTreePtrT;
  95.   using PostDominatorTreePtrT =
  96.       typename afdo_detail::IRTraits<BT>::PostDominatorTreePtrT;
  97.   using PostDominatorTreeT =
  98.       typename afdo_detail::IRTraits<BT>::PostDominatorTreeT;
  99.   using OptRemarkEmitterT =
  100.       typename afdo_detail::IRTraits<BT>::OptRemarkEmitterT;
  101.   using OptRemarkAnalysisT =
  102.       typename afdo_detail::IRTraits<BT>::OptRemarkAnalysisT;
  103.   using PredRangeT = typename afdo_detail::IRTraits<BT>::PredRangeT;
  104.   using SuccRangeT = typename afdo_detail::IRTraits<BT>::SuccRangeT;
  105.  
  106.   using BlockWeightMap = DenseMap<const BasicBlockT *, uint64_t>;
  107.   using EquivalenceClassMap =
  108.       DenseMap<const BasicBlockT *, const BasicBlockT *>;
  109.   using Edge = std::pair<const BasicBlockT *, const BasicBlockT *>;
  110.   using EdgeWeightMap = DenseMap<Edge, uint64_t>;
  111.   using BlockEdgeMap =
  112.       DenseMap<const BasicBlockT *, SmallVector<const BasicBlockT *, 8>>;
  113.  
  114. protected:
  115.   ~SampleProfileLoaderBaseImpl() = default;
  116.   friend class SampleCoverageTracker;
  117.  
  118.   Function &getFunction(FunctionT &F) {
  119.     return afdo_detail::IRTraits<BT>::getFunction(F);
  120.   }
  121.   const BasicBlockT *getEntryBB(const FunctionT *F) {
  122.     return afdo_detail::IRTraits<BT>::getEntryBB(F);
  123.   }
  124.   PredRangeT getPredecessors(BasicBlockT *BB) {
  125.     return afdo_detail::IRTraits<BT>::getPredecessors(BB);
  126.   }
  127.   SuccRangeT getSuccessors(BasicBlockT *BB) {
  128.     return afdo_detail::IRTraits<BT>::getSuccessors(BB);
  129.   }
  130.  
  131.   unsigned getFunctionLoc(FunctionT &Func);
  132.   virtual ErrorOr<uint64_t> getInstWeight(const InstructionT &Inst);
  133.   ErrorOr<uint64_t> getInstWeightImpl(const InstructionT &Inst);
  134.   ErrorOr<uint64_t> getBlockWeight(const BasicBlockT *BB);
  135.   mutable DenseMap<const DILocation *, const FunctionSamples *>
  136.       DILocation2SampleMap;
  137.   virtual const FunctionSamples *
  138.   findFunctionSamples(const InstructionT &I) const;
  139.   void printEdgeWeight(raw_ostream &OS, Edge E);
  140.   void printBlockWeight(raw_ostream &OS, const BasicBlockT *BB) const;
  141.   void printBlockEquivalence(raw_ostream &OS, const BasicBlockT *BB);
  142.   bool computeBlockWeights(FunctionT &F);
  143.   void findEquivalenceClasses(FunctionT &F);
  144.   void findEquivalencesFor(BasicBlockT *BB1,
  145.                            ArrayRef<BasicBlockT *> Descendants,
  146.                            PostDominatorTreeT *DomTree);
  147.   void propagateWeights(FunctionT &F);
  148.   void applyProfi(FunctionT &F, BlockEdgeMap &Successors,
  149.                   BlockWeightMap &SampleBlockWeights,
  150.                   BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights);
  151.   uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
  152.   void buildEdges(FunctionT &F);
  153.   bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount);
  154.   void clearFunctionData(bool ResetDT = true);
  155.   void computeDominanceAndLoopInfo(FunctionT &F);
  156.   bool
  157.   computeAndPropagateWeights(FunctionT &F,
  158.                              const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
  159.   void initWeightPropagation(FunctionT &F,
  160.                              const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
  161.   void
  162.   finalizeWeightPropagation(FunctionT &F,
  163.                             const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
  164.   void emitCoverageRemarks(FunctionT &F);
  165.  
  166.   /// Map basic blocks to their computed weights.
  167.   ///
  168.   /// The weight of a basic block is defined to be the maximum
  169.   /// of all the instruction weights in that block.
  170.   BlockWeightMap BlockWeights;
  171.  
  172.   /// Map edges to their computed weights.
  173.   ///
  174.   /// Edge weights are computed by propagating basic block weights in
  175.   /// SampleProfile::propagateWeights.
  176.   EdgeWeightMap EdgeWeights;
  177.  
  178.   /// Set of visited blocks during propagation.
  179.   SmallPtrSet<const BasicBlockT *, 32> VisitedBlocks;
  180.  
  181.   /// Set of visited edges during propagation.
  182.   SmallSet<Edge, 32> VisitedEdges;
  183.  
  184.   /// Equivalence classes for block weights.
  185.   ///
  186.   /// Two blocks BB1 and BB2 are in the same equivalence class if they
  187.   /// dominate and post-dominate each other, and they are in the same loop
  188.   /// nest. When this happens, the two blocks are guaranteed to execute
  189.   /// the same number of times.
  190.   EquivalenceClassMap EquivalenceClass;
  191.  
  192.   /// Dominance, post-dominance and loop information.
  193.   DominatorTreePtrT DT;
  194.   PostDominatorTreePtrT PDT;
  195.   LoopInfoPtrT LI;
  196.  
  197.   /// Predecessors for each basic block in the CFG.
  198.   BlockEdgeMap Predecessors;
  199.  
  200.   /// Successors for each basic block in the CFG.
  201.   BlockEdgeMap Successors;
  202.  
  203.   /// Profile coverage tracker.
  204.   SampleCoverageTracker CoverageTracker;
  205.  
  206.   /// Profile reader object.
  207.   std::unique_ptr<SampleProfileReader> Reader;
  208.  
  209.   /// Samples collected for the body of this function.
  210.   FunctionSamples *Samples = nullptr;
  211.  
  212.   /// Name of the profile file to load.
  213.   std::string Filename;
  214.  
  215.   /// Name of the profile remapping file to load.
  216.   std::string RemappingFilename;
  217.  
  218.   /// Profile Summary Info computed from sample profile.
  219.   ProfileSummaryInfo *PSI = nullptr;
  220.  
  221.   /// Optimization Remark Emitter used to emit diagnostic remarks.
  222.   OptRemarkEmitterT *ORE = nullptr;
  223. };
  224.  
  225. /// Clear all the per-function data used to load samples and propagate weights.
  226. template <typename BT>
  227. void SampleProfileLoaderBaseImpl<BT>::clearFunctionData(bool ResetDT) {
  228.   BlockWeights.clear();
  229.   EdgeWeights.clear();
  230.   VisitedBlocks.clear();
  231.   VisitedEdges.clear();
  232.   EquivalenceClass.clear();
  233.   if (ResetDT) {
  234.     DT = nullptr;
  235.     PDT = nullptr;
  236.     LI = nullptr;
  237.   }
  238.   Predecessors.clear();
  239.   Successors.clear();
  240.   CoverageTracker.clear();
  241. }
  242.  
  243. #ifndef NDEBUG
  244. /// Print the weight of edge \p E on stream \p OS.
  245. ///
  246. /// \param OS  Stream to emit the output to.
  247. /// \param E  Edge to print.
  248. template <typename BT>
  249. void SampleProfileLoaderBaseImpl<BT>::printEdgeWeight(raw_ostream &OS, Edge E) {
  250.   OS << "weight[" << E.first->getName() << "->" << E.second->getName()
  251.      << "]: " << EdgeWeights[E] << "\n";
  252. }
  253.  
  254. /// Print the equivalence class of block \p BB on stream \p OS.
  255. ///
  256. /// \param OS  Stream to emit the output to.
  257. /// \param BB  Block to print.
  258. template <typename BT>
  259. void SampleProfileLoaderBaseImpl<BT>::printBlockEquivalence(
  260.     raw_ostream &OS, const BasicBlockT *BB) {
  261.   const BasicBlockT *Equiv = EquivalenceClass[BB];
  262.   OS << "equivalence[" << BB->getName()
  263.      << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
  264. }
  265.  
  266. /// Print the weight of block \p BB on stream \p OS.
  267. ///
  268. /// \param OS  Stream to emit the output to.
  269. /// \param BB  Block to print.
  270. template <typename BT>
  271. void SampleProfileLoaderBaseImpl<BT>::printBlockWeight(
  272.     raw_ostream &OS, const BasicBlockT *BB) const {
  273.   const auto &I = BlockWeights.find(BB);
  274.   uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
  275.   OS << "weight[" << BB->getName() << "]: " << W << "\n";
  276. }
  277. #endif
  278.  
  279. /// Get the weight for an instruction.
  280. ///
  281. /// The "weight" of an instruction \p Inst is the number of samples
  282. /// collected on that instruction at runtime. To retrieve it, we
  283. /// need to compute the line number of \p Inst relative to the start of its
  284. /// function. We use HeaderLineno to compute the offset. We then
  285. /// look up the samples collected for \p Inst using BodySamples.
  286. ///
  287. /// \param Inst Instruction to query.
  288. ///
  289. /// \returns the weight of \p Inst.
  290. template <typename BT>
  291. ErrorOr<uint64_t>
  292. SampleProfileLoaderBaseImpl<BT>::getInstWeight(const InstructionT &Inst) {
  293.   return getInstWeightImpl(Inst);
  294. }
  295.  
  296. template <typename BT>
  297. ErrorOr<uint64_t>
  298. SampleProfileLoaderBaseImpl<BT>::getInstWeightImpl(const InstructionT &Inst) {
  299.   const FunctionSamples *FS = findFunctionSamples(Inst);
  300.   if (!FS)
  301.     return std::error_code();
  302.  
  303.   const DebugLoc &DLoc = Inst.getDebugLoc();
  304.   if (!DLoc)
  305.     return std::error_code();
  306.  
  307.   const DILocation *DIL = DLoc;
  308.   uint32_t LineOffset = FunctionSamples::getOffset(DIL);
  309.   uint32_t Discriminator;
  310.   if (EnableFSDiscriminator)
  311.     Discriminator = DIL->getDiscriminator();
  312.   else
  313.     Discriminator = DIL->getBaseDiscriminator();
  314.  
  315.   ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
  316.   if (R) {
  317.     bool FirstMark =
  318.         CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
  319.     if (FirstMark) {
  320.       ORE->emit([&]() {
  321.         OptRemarkAnalysisT Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
  322.         Remark << "Applied " << ore::NV("NumSamples", *R);
  323.         Remark << " samples from profile (offset: ";
  324.         Remark << ore::NV("LineOffset", LineOffset);
  325.         if (Discriminator) {
  326.           Remark << ".";
  327.           Remark << ore::NV("Discriminator", Discriminator);
  328.         }
  329.         Remark << ")";
  330.         return Remark;
  331.       });
  332.     }
  333.     LLVM_DEBUG(dbgs() << "    " << DLoc.getLine() << "." << Discriminator << ":"
  334.                       << Inst << " (line offset: " << LineOffset << "."
  335.                       << Discriminator << " - weight: " << R.get() << ")\n");
  336.   }
  337.   return R;
  338. }
  339.  
  340. /// Compute the weight of a basic block.
  341. ///
  342. /// The weight of basic block \p BB is the maximum weight of all the
  343. /// instructions in BB.
  344. ///
  345. /// \param BB The basic block to query.
  346. ///
  347. /// \returns the weight for \p BB.
  348. template <typename BT>
  349. ErrorOr<uint64_t>
  350. SampleProfileLoaderBaseImpl<BT>::getBlockWeight(const BasicBlockT *BB) {
  351.   uint64_t Max = 0;
  352.   bool HasWeight = false;
  353.   for (auto &I : *BB) {
  354.     const ErrorOr<uint64_t> &R = getInstWeight(I);
  355.     if (R) {
  356.       Max = std::max(Max, R.get());
  357.       HasWeight = true;
  358.     }
  359.   }
  360.   return HasWeight ? ErrorOr<uint64_t>(Max) : std::error_code();
  361. }
  362.  
  363. /// Compute and store the weights of every basic block.
  364. ///
  365. /// This populates the BlockWeights map by computing
  366. /// the weights of every basic block in the CFG.
  367. ///
  368. /// \param F The function to query.
  369. template <typename BT>
  370. bool SampleProfileLoaderBaseImpl<BT>::computeBlockWeights(FunctionT &F) {
  371.   bool Changed = false;
  372.   LLVM_DEBUG(dbgs() << "Block weights\n");
  373.   for (const auto &BB : F) {
  374.     ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
  375.     if (Weight) {
  376.       BlockWeights[&BB] = Weight.get();
  377.       VisitedBlocks.insert(&BB);
  378.       Changed = true;
  379.     }
  380.     LLVM_DEBUG(printBlockWeight(dbgs(), &BB));
  381.   }
  382.  
  383.   return Changed;
  384. }
  385.  
  386. /// Get the FunctionSamples for an instruction.
  387. ///
  388. /// The FunctionSamples of an instruction \p Inst is the inlined instance
  389. /// in which that instruction is coming from. We traverse the inline stack
  390. /// of that instruction, and match it with the tree nodes in the profile.
  391. ///
  392. /// \param Inst Instruction to query.
  393. ///
  394. /// \returns the FunctionSamples pointer to the inlined instance.
  395. template <typename BT>
  396. const FunctionSamples *SampleProfileLoaderBaseImpl<BT>::findFunctionSamples(
  397.     const InstructionT &Inst) const {
  398.   const DILocation *DIL = Inst.getDebugLoc();
  399.   if (!DIL)
  400.     return Samples;
  401.  
  402.   auto it = DILocation2SampleMap.try_emplace(DIL, nullptr);
  403.   if (it.second) {
  404.     it.first->second = Samples->findFunctionSamples(DIL, Reader->getRemapper());
  405.   }
  406.   return it.first->second;
  407. }
  408.  
  409. /// Find equivalence classes for the given block.
  410. ///
  411. /// This finds all the blocks that are guaranteed to execute the same
  412. /// number of times as \p BB1. To do this, it traverses all the
  413. /// descendants of \p BB1 in the dominator or post-dominator tree.
  414. ///
  415. /// A block BB2 will be in the same equivalence class as \p BB1 if
  416. /// the following holds:
  417. ///
  418. /// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
  419. ///    is a descendant of \p BB1 in the dominator tree, then BB2 should
  420. ///    dominate BB1 in the post-dominator tree.
  421. ///
  422. /// 2- Both BB2 and \p BB1 must be in the same loop.
  423. ///
  424. /// For every block BB2 that meets those two requirements, we set BB2's
  425. /// equivalence class to \p BB1.
  426. ///
  427. /// \param BB1  Block to check.
  428. /// \param Descendants  Descendants of \p BB1 in either the dom or pdom tree.
  429. /// \param DomTree  Opposite dominator tree. If \p Descendants is filled
  430. ///                 with blocks from \p BB1's dominator tree, then
  431. ///                 this is the post-dominator tree, and vice versa.
  432. template <typename BT>
  433. void SampleProfileLoaderBaseImpl<BT>::findEquivalencesFor(
  434.     BasicBlockT *BB1, ArrayRef<BasicBlockT *> Descendants,
  435.     PostDominatorTreeT *DomTree) {
  436.   const BasicBlockT *EC = EquivalenceClass[BB1];
  437.   uint64_t Weight = BlockWeights[EC];
  438.   for (const auto *BB2 : Descendants) {
  439.     bool IsDomParent = DomTree->dominates(BB2, BB1);
  440.     bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
  441.     if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
  442.       EquivalenceClass[BB2] = EC;
  443.       // If BB2 is visited, then the entire EC should be marked as visited.
  444.       if (VisitedBlocks.count(BB2)) {
  445.         VisitedBlocks.insert(EC);
  446.       }
  447.  
  448.       // If BB2 is heavier than BB1, make BB2 have the same weight
  449.       // as BB1.
  450.       //
  451.       // Note that we don't worry about the opposite situation here
  452.       // (when BB2 is lighter than BB1). We will deal with this
  453.       // during the propagation phase. Right now, we just want to
  454.       // make sure that BB1 has the largest weight of all the
  455.       // members of its equivalence set.
  456.       Weight = std::max(Weight, BlockWeights[BB2]);
  457.     }
  458.   }
  459.   const BasicBlockT *EntryBB = getEntryBB(EC->getParent());
  460.   if (EC == EntryBB) {
  461.     BlockWeights[EC] = Samples->getHeadSamples() + 1;
  462.   } else {
  463.     BlockWeights[EC] = Weight;
  464.   }
  465. }
  466.  
  467. /// Find equivalence classes.
  468. ///
  469. /// Since samples may be missing from blocks, we can fill in the gaps by setting
  470. /// the weights of all the blocks in the same equivalence class to the same
  471. /// weight. To compute the concept of equivalence, we use dominance and loop
  472. /// information. Two blocks B1 and B2 are in the same equivalence class if B1
  473. /// dominates B2, B2 post-dominates B1 and both are in the same loop.
  474. ///
  475. /// \param F The function to query.
  476. template <typename BT>
  477. void SampleProfileLoaderBaseImpl<BT>::findEquivalenceClasses(FunctionT &F) {
  478.   SmallVector<BasicBlockT *, 8> DominatedBBs;
  479.   LLVM_DEBUG(dbgs() << "\nBlock equivalence classes\n");
  480.   // Find equivalence sets based on dominance and post-dominance information.
  481.   for (auto &BB : F) {
  482.     BasicBlockT *BB1 = &BB;
  483.  
  484.     // Compute BB1's equivalence class once.
  485.     if (EquivalenceClass.count(BB1)) {
  486.       LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
  487.       continue;
  488.     }
  489.  
  490.     // By default, blocks are in their own equivalence class.
  491.     EquivalenceClass[BB1] = BB1;
  492.  
  493.     // Traverse all the blocks dominated by BB1. We are looking for
  494.     // every basic block BB2 such that:
  495.     //
  496.     // 1- BB1 dominates BB2.
  497.     // 2- BB2 post-dominates BB1.
  498.     // 3- BB1 and BB2 are in the same loop nest.
  499.     //
  500.     // If all those conditions hold, it means that BB2 is executed
  501.     // as many times as BB1, so they are placed in the same equivalence
  502.     // class by making BB2's equivalence class be BB1.
  503.     DominatedBBs.clear();
  504.     DT->getDescendants(BB1, DominatedBBs);
  505.     findEquivalencesFor(BB1, DominatedBBs, &*PDT);
  506.  
  507.     LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
  508.   }
  509.  
  510.   // Assign weights to equivalence classes.
  511.   //
  512.   // All the basic blocks in the same equivalence class will execute
  513.   // the same number of times. Since we know that the head block in
  514.   // each equivalence class has the largest weight, assign that weight
  515.   // to all the blocks in that equivalence class.
  516.   LLVM_DEBUG(
  517.       dbgs() << "\nAssign the same weight to all blocks in the same class\n");
  518.   for (auto &BI : F) {
  519.     const BasicBlockT *BB = &BI;
  520.     const BasicBlockT *EquivBB = EquivalenceClass[BB];
  521.     if (BB != EquivBB)
  522.       BlockWeights[BB] = BlockWeights[EquivBB];
  523.     LLVM_DEBUG(printBlockWeight(dbgs(), BB));
  524.   }
  525. }
  526.  
  527. /// Visit the given edge to decide if it has a valid weight.
  528. ///
  529. /// If \p E has not been visited before, we copy to \p UnknownEdge
  530. /// and increment the count of unknown edges.
  531. ///
  532. /// \param E  Edge to visit.
  533. /// \param NumUnknownEdges  Current number of unknown edges.
  534. /// \param UnknownEdge  Set if E has not been visited before.
  535. ///
  536. /// \returns E's weight, if known. Otherwise, return 0.
  537. template <typename BT>
  538. uint64_t SampleProfileLoaderBaseImpl<BT>::visitEdge(Edge E,
  539.                                                     unsigned *NumUnknownEdges,
  540.                                                     Edge *UnknownEdge) {
  541.   if (!VisitedEdges.count(E)) {
  542.     (*NumUnknownEdges)++;
  543.     *UnknownEdge = E;
  544.     return 0;
  545.   }
  546.  
  547.   return EdgeWeights[E];
  548. }
  549.  
  550. /// Propagate weights through incoming/outgoing edges.
  551. ///
  552. /// If the weight of a basic block is known, and there is only one edge
  553. /// with an unknown weight, we can calculate the weight of that edge.
  554. ///
  555. /// Similarly, if all the edges have a known count, we can calculate the
  556. /// count of the basic block, if needed.
  557. ///
  558. /// \param F  Function to process.
  559. /// \param UpdateBlockCount  Whether we should update basic block counts that
  560. ///                          has already been annotated.
  561. ///
  562. /// \returns  True if new weights were assigned to edges or blocks.
  563. template <typename BT>
  564. bool SampleProfileLoaderBaseImpl<BT>::propagateThroughEdges(
  565.     FunctionT &F, bool UpdateBlockCount) {
  566.   bool Changed = false;
  567.   LLVM_DEBUG(dbgs() << "\nPropagation through edges\n");
  568.   for (const auto &BI : F) {
  569.     const BasicBlockT *BB = &BI;
  570.     const BasicBlockT *EC = EquivalenceClass[BB];
  571.  
  572.     // Visit all the predecessor and successor edges to determine
  573.     // which ones have a weight assigned already. Note that it doesn't
  574.     // matter that we only keep track of a single unknown edge. The
  575.     // only case we are interested in handling is when only a single
  576.     // edge is unknown (see setEdgeOrBlockWeight).
  577.     for (unsigned i = 0; i < 2; i++) {
  578.       uint64_t TotalWeight = 0;
  579.       unsigned NumUnknownEdges = 0, NumTotalEdges = 0;
  580.       Edge UnknownEdge, SelfReferentialEdge, SingleEdge;
  581.  
  582.       if (i == 0) {
  583.         // First, visit all predecessor edges.
  584.         NumTotalEdges = Predecessors[BB].size();
  585.         for (auto *Pred : Predecessors[BB]) {
  586.           Edge E = std::make_pair(Pred, BB);
  587.           TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
  588.           if (E.first == E.second)
  589.             SelfReferentialEdge = E;
  590.         }
  591.         if (NumTotalEdges == 1) {
  592.           SingleEdge = std::make_pair(Predecessors[BB][0], BB);
  593.         }
  594.       } else {
  595.         // On the second round, visit all successor edges.
  596.         NumTotalEdges = Successors[BB].size();
  597.         for (auto *Succ : Successors[BB]) {
  598.           Edge E = std::make_pair(BB, Succ);
  599.           TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
  600.         }
  601.         if (NumTotalEdges == 1) {
  602.           SingleEdge = std::make_pair(BB, Successors[BB][0]);
  603.         }
  604.       }
  605.  
  606.       // After visiting all the edges, there are three cases that we
  607.       // can handle immediately:
  608.       //
  609.       // - All the edge weights are known (i.e., NumUnknownEdges == 0).
  610.       //   In this case, we simply check that the sum of all the edges
  611.       //   is the same as BB's weight. If not, we change BB's weight
  612.       //   to match. Additionally, if BB had not been visited before,
  613.       //   we mark it visited.
  614.       //
  615.       // - Only one edge is unknown and BB has already been visited.
  616.       //   In this case, we can compute the weight of the edge by
  617.       //   subtracting the total block weight from all the known
  618.       //   edge weights. If the edges weight more than BB, then the
  619.       //   edge of the last remaining edge is set to zero.
  620.       //
  621.       // - There exists a self-referential edge and the weight of BB is
  622.       //   known. In this case, this edge can be based on BB's weight.
  623.       //   We add up all the other known edges and set the weight on
  624.       //   the self-referential edge as we did in the previous case.
  625.       //
  626.       // In any other case, we must continue iterating. Eventually,
  627.       // all edges will get a weight, or iteration will stop when
  628.       // it reaches SampleProfileMaxPropagateIterations.
  629.       if (NumUnknownEdges <= 1) {
  630.         uint64_t &BBWeight = BlockWeights[EC];
  631.         if (NumUnknownEdges == 0) {
  632.           if (!VisitedBlocks.count(EC)) {
  633.             // If we already know the weight of all edges, the weight of the
  634.             // basic block can be computed. It should be no larger than the sum
  635.             // of all edge weights.
  636.             if (TotalWeight > BBWeight) {
  637.               BBWeight = TotalWeight;
  638.               Changed = true;
  639.               LLVM_DEBUG(dbgs() << "All edge weights for " << BB->getName()
  640.                                 << " known. Set weight for block: ";
  641.                          printBlockWeight(dbgs(), BB););
  642.             }
  643.           } else if (NumTotalEdges == 1 &&
  644.                      EdgeWeights[SingleEdge] < BlockWeights[EC]) {
  645.             // If there is only one edge for the visited basic block, use the
  646.             // block weight to adjust edge weight if edge weight is smaller.
  647.             EdgeWeights[SingleEdge] = BlockWeights[EC];
  648.             Changed = true;
  649.           }
  650.         } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
  651.           // If there is a single unknown edge and the block has been
  652.           // visited, then we can compute E's weight.
  653.           if (BBWeight >= TotalWeight)
  654.             EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
  655.           else
  656.             EdgeWeights[UnknownEdge] = 0;
  657.           const BasicBlockT *OtherEC;
  658.           if (i == 0)
  659.             OtherEC = EquivalenceClass[UnknownEdge.first];
  660.           else
  661.             OtherEC = EquivalenceClass[UnknownEdge.second];
  662.           // Edge weights should never exceed the BB weights it connects.
  663.           if (VisitedBlocks.count(OtherEC) &&
  664.               EdgeWeights[UnknownEdge] > BlockWeights[OtherEC])
  665.             EdgeWeights[UnknownEdge] = BlockWeights[OtherEC];
  666.           VisitedEdges.insert(UnknownEdge);
  667.           Changed = true;
  668.           LLVM_DEBUG(dbgs() << "Set weight for edge: ";
  669.                      printEdgeWeight(dbgs(), UnknownEdge));
  670.         }
  671.       } else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) {
  672.         // If a block Weights 0, all its in/out edges should weight 0.
  673.         if (i == 0) {
  674.           for (auto *Pred : Predecessors[BB]) {
  675.             Edge E = std::make_pair(Pred, BB);
  676.             EdgeWeights[E] = 0;
  677.             VisitedEdges.insert(E);
  678.           }
  679.         } else {
  680.           for (auto *Succ : Successors[BB]) {
  681.             Edge E = std::make_pair(BB, Succ);
  682.             EdgeWeights[E] = 0;
  683.             VisitedEdges.insert(E);
  684.           }
  685.         }
  686.       } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
  687.         uint64_t &BBWeight = BlockWeights[BB];
  688.         // We have a self-referential edge and the weight of BB is known.
  689.         if (BBWeight >= TotalWeight)
  690.           EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
  691.         else
  692.           EdgeWeights[SelfReferentialEdge] = 0;
  693.         VisitedEdges.insert(SelfReferentialEdge);
  694.         Changed = true;
  695.         LLVM_DEBUG(dbgs() << "Set self-referential edge weight to: ";
  696.                    printEdgeWeight(dbgs(), SelfReferentialEdge));
  697.       }
  698.       if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) {
  699.         BlockWeights[EC] = TotalWeight;
  700.         VisitedBlocks.insert(EC);
  701.         Changed = true;
  702.       }
  703.     }
  704.   }
  705.  
  706.   return Changed;
  707. }
  708.  
  709. /// Build in/out edge lists for each basic block in the CFG.
  710. ///
  711. /// We are interested in unique edges. If a block B1 has multiple
  712. /// edges to another block B2, we only add a single B1->B2 edge.
  713. template <typename BT>
  714. void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) {
  715.   for (auto &BI : F) {
  716.     BasicBlockT *B1 = &BI;
  717.  
  718.     // Add predecessors for B1.
  719.     SmallPtrSet<BasicBlockT *, 16> Visited;
  720.     if (!Predecessors[B1].empty())
  721.       llvm_unreachable("Found a stale predecessors list in a basic block.");
  722.     for (auto *B2 : getPredecessors(B1))
  723.       if (Visited.insert(B2).second)
  724.         Predecessors[B1].push_back(B2);
  725.  
  726.     // Add successors for B1.
  727.     Visited.clear();
  728.     if (!Successors[B1].empty())
  729.       llvm_unreachable("Found a stale successors list in a basic block.");
  730.     for (auto *B2 : getSuccessors(B1))
  731.       if (Visited.insert(B2).second)
  732.         Successors[B1].push_back(B2);
  733.   }
  734. }
  735.  
  736. /// Propagate weights into edges
  737. ///
  738. /// The following rules are applied to every block BB in the CFG:
  739. ///
  740. /// - If BB has a single predecessor/successor, then the weight
  741. ///   of that edge is the weight of the block.
  742. ///
  743. /// - If all incoming or outgoing edges are known except one, and the
  744. ///   weight of the block is already known, the weight of the unknown
  745. ///   edge will be the weight of the block minus the sum of all the known
  746. ///   edges. If the sum of all the known edges is larger than BB's weight,
  747. ///   we set the unknown edge weight to zero.
  748. ///
  749. /// - If there is a self-referential edge, and the weight of the block is
  750. ///   known, the weight for that edge is set to the weight of the block
  751. ///   minus the weight of the other incoming edges to that block (if
  752. ///   known).
  753. template <typename BT>
  754. void SampleProfileLoaderBaseImpl<BT>::propagateWeights(FunctionT &F) {
  755.   // Flow-based profile inference is only usable with BasicBlock instantiation
  756.   // of SampleProfileLoaderBaseImpl.
  757.   if (SampleProfileUseProfi) {
  758.     // Prepare block sample counts for inference.
  759.     BlockWeightMap SampleBlockWeights;
  760.     for (const auto &BI : F) {
  761.       ErrorOr<uint64_t> Weight = getBlockWeight(&BI);
  762.       if (Weight)
  763.         SampleBlockWeights[&BI] = Weight.get();
  764.     }
  765.     // Fill in BlockWeights and EdgeWeights using an inference algorithm.
  766.     applyProfi(F, Successors, SampleBlockWeights, BlockWeights, EdgeWeights);
  767.   } else {
  768.     bool Changed = true;
  769.     unsigned I = 0;
  770.  
  771.     // If BB weight is larger than its corresponding loop's header BB weight,
  772.     // use the BB weight to replace the loop header BB weight.
  773.     for (auto &BI : F) {
  774.       BasicBlockT *BB = &BI;
  775.       LoopT *L = LI->getLoopFor(BB);
  776.       if (!L) {
  777.         continue;
  778.       }
  779.       BasicBlockT *Header = L->getHeader();
  780.       if (Header && BlockWeights[BB] > BlockWeights[Header]) {
  781.         BlockWeights[Header] = BlockWeights[BB];
  782.       }
  783.     }
  784.  
  785.     // Propagate until we converge or we go past the iteration limit.
  786.     while (Changed && I++ < SampleProfileMaxPropagateIterations) {
  787.       Changed = propagateThroughEdges(F, false);
  788.     }
  789.  
  790.     // The first propagation propagates BB counts from annotated BBs to unknown
  791.     // BBs. The 2nd propagation pass resets edges weights, and use all BB
  792.     // weights to propagate edge weights.
  793.     VisitedEdges.clear();
  794.     Changed = true;
  795.     while (Changed && I++ < SampleProfileMaxPropagateIterations) {
  796.       Changed = propagateThroughEdges(F, false);
  797.     }
  798.  
  799.     // The 3rd propagation pass allows adjust annotated BB weights that are
  800.     // obviously wrong.
  801.     Changed = true;
  802.     while (Changed && I++ < SampleProfileMaxPropagateIterations) {
  803.       Changed = propagateThroughEdges(F, true);
  804.     }
  805.   }
  806. }
  807.  
  808. template <typename BT>
  809. void SampleProfileLoaderBaseImpl<BT>::applyProfi(
  810.     FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights,
  811.     BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights) {
  812.   auto Infer = SampleProfileInference<BT>(F, Successors, SampleBlockWeights);
  813.   Infer.apply(BlockWeights, EdgeWeights);
  814. }
  815.  
  816. /// Generate branch weight metadata for all branches in \p F.
  817. ///
  818. /// Branch weights are computed out of instruction samples using a
  819. /// propagation heuristic. Propagation proceeds in 3 phases:
  820. ///
  821. /// 1- Assignment of block weights. All the basic blocks in the function
  822. ///    are initial assigned the same weight as their most frequently
  823. ///    executed instruction.
  824. ///
  825. /// 2- Creation of equivalence classes. Since samples may be missing from
  826. ///    blocks, we can fill in the gaps by setting the weights of all the
  827. ///    blocks in the same equivalence class to the same weight. To compute
  828. ///    the concept of equivalence, we use dominance and loop information.
  829. ///    Two blocks B1 and B2 are in the same equivalence class if B1
  830. ///    dominates B2, B2 post-dominates B1 and both are in the same loop.
  831. ///
  832. /// 3- Propagation of block weights into edges. This uses a simple
  833. ///    propagation heuristic. The following rules are applied to every
  834. ///    block BB in the CFG:
  835. ///
  836. ///    - If BB has a single predecessor/successor, then the weight
  837. ///      of that edge is the weight of the block.
  838. ///
  839. ///    - If all the edges are known except one, and the weight of the
  840. ///      block is already known, the weight of the unknown edge will
  841. ///      be the weight of the block minus the sum of all the known
  842. ///      edges. If the sum of all the known edges is larger than BB's weight,
  843. ///      we set the unknown edge weight to zero.
  844. ///
  845. ///    - If there is a self-referential edge, and the weight of the block is
  846. ///      known, the weight for that edge is set to the weight of the block
  847. ///      minus the weight of the other incoming edges to that block (if
  848. ///      known).
  849. ///
  850. /// Since this propagation is not guaranteed to finalize for every CFG, we
  851. /// only allow it to proceed for a limited number of iterations (controlled
  852. /// by -sample-profile-max-propagate-iterations).
  853. ///
  854. /// FIXME: Try to replace this propagation heuristic with a scheme
  855. /// that is guaranteed to finalize. A work-list approach similar to
  856. /// the standard value propagation algorithm used by SSA-CCP might
  857. /// work here.
  858. ///
  859. /// \param F The function to query.
  860. ///
  861. /// \returns true if \p F was modified. Returns false, otherwise.
  862. template <typename BT>
  863. bool SampleProfileLoaderBaseImpl<BT>::computeAndPropagateWeights(
  864.     FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
  865.   bool Changed = (InlinedGUIDs.size() != 0);
  866.  
  867.   // Compute basic block weights.
  868.   Changed |= computeBlockWeights(F);
  869.  
  870.   if (Changed) {
  871.     // Initialize propagation.
  872.     initWeightPropagation(F, InlinedGUIDs);
  873.  
  874.     // Propagate weights to all edges.
  875.     propagateWeights(F);
  876.  
  877.     // Post-process propagated weights.
  878.     finalizeWeightPropagation(F, InlinedGUIDs);
  879.   }
  880.  
  881.   return Changed;
  882. }
  883.  
  884. template <typename BT>
  885. void SampleProfileLoaderBaseImpl<BT>::initWeightPropagation(
  886.     FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
  887.   // Add an entry count to the function using the samples gathered at the
  888.   // function entry.
  889.   // Sets the GUIDs that are inlined in the profiled binary. This is used
  890.   // for ThinLink to make correct liveness analysis, and also make the IR
  891.   // match the profiled binary before annotation.
  892.   getFunction(F).setEntryCount(
  893.       ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real),
  894.       &InlinedGUIDs);
  895.  
  896.   if (!SampleProfileUseProfi) {
  897.     // Compute dominance and loop info needed for propagation.
  898.     computeDominanceAndLoopInfo(F);
  899.  
  900.     // Find equivalence classes.
  901.     findEquivalenceClasses(F);
  902.   }
  903.  
  904.   // Before propagation starts, build, for each block, a list of
  905.   // unique predecessors and successors. This is necessary to handle
  906.   // identical edges in multiway branches. Since we visit all blocks and all
  907.   // edges of the CFG, it is cleaner to build these lists once at the start
  908.   // of the pass.
  909.   buildEdges(F);
  910. }
  911.  
  912. template <typename BT>
  913. void SampleProfileLoaderBaseImpl<BT>::finalizeWeightPropagation(
  914.     FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
  915.   // If we utilize a flow-based count inference, then we trust the computed
  916.   // counts and set the entry count as computed by the algorithm. This is
  917.   // primarily done to sync the counts produced by profi and BFI inference,
  918.   // which uses the entry count for mass propagation.
  919.   // If profi produces a zero-value for the entry count, we fallback to
  920.   // Samples->getHeadSamples() + 1 to avoid functions with zero count.
  921.   if (SampleProfileUseProfi) {
  922.     const BasicBlockT *EntryBB = getEntryBB(&F);
  923.     ErrorOr<uint64_t> EntryWeight = getBlockWeight(EntryBB);
  924.     if (BlockWeights[EntryBB] > 0) {
  925.       getFunction(F).setEntryCount(
  926.           ProfileCount(BlockWeights[EntryBB], Function::PCT_Real),
  927.           &InlinedGUIDs);
  928.     }
  929.   }
  930. }
  931.  
  932. template <typename BT>
  933. void SampleProfileLoaderBaseImpl<BT>::emitCoverageRemarks(FunctionT &F) {
  934.   // If coverage checking was requested, compute it now.
  935.   const Function &Func = getFunction(F);
  936.   if (SampleProfileRecordCoverage) {
  937.     unsigned Used = CoverageTracker.countUsedRecords(Samples, PSI);
  938.     unsigned Total = CoverageTracker.countBodyRecords(Samples, PSI);
  939.     unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
  940.     if (Coverage < SampleProfileRecordCoverage) {
  941.       Func.getContext().diagnose(DiagnosticInfoSampleProfile(
  942.           Func.getSubprogram()->getFilename(), getFunctionLoc(F),
  943.           Twine(Used) + " of " + Twine(Total) + " available profile records (" +
  944.               Twine(Coverage) + "%) were applied",
  945.           DS_Warning));
  946.     }
  947.   }
  948.  
  949.   if (SampleProfileSampleCoverage) {
  950.     uint64_t Used = CoverageTracker.getTotalUsedSamples();
  951.     uint64_t Total = CoverageTracker.countBodySamples(Samples, PSI);
  952.     unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
  953.     if (Coverage < SampleProfileSampleCoverage) {
  954.       Func.getContext().diagnose(DiagnosticInfoSampleProfile(
  955.           Func.getSubprogram()->getFilename(), getFunctionLoc(F),
  956.           Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
  957.               Twine(Coverage) + "%) were applied",
  958.           DS_Warning));
  959.     }
  960.   }
  961. }
  962.  
  963. /// Get the line number for the function header.
  964. ///
  965. /// This looks up function \p F in the current compilation unit and
  966. /// retrieves the line number where the function is defined. This is
  967. /// line 0 for all the samples read from the profile file. Every line
  968. /// number is relative to this line.
  969. ///
  970. /// \param F  Function object to query.
  971. ///
  972. /// \returns the line number where \p F is defined. If it returns 0,
  973. ///          it means that there is no debug information available for \p F.
  974. template <typename BT>
  975. unsigned SampleProfileLoaderBaseImpl<BT>::getFunctionLoc(FunctionT &F) {
  976.   const Function &Func = getFunction(F);
  977.   if (DISubprogram *S = Func.getSubprogram())
  978.     return S->getLine();
  979.  
  980.   if (NoWarnSampleUnused)
  981.     return 0;
  982.  
  983.   // If the start of \p F is missing, emit a diagnostic to inform the user
  984.   // about the missed opportunity.
  985.   Func.getContext().diagnose(DiagnosticInfoSampleProfile(
  986.       "No debug information found in function " + Func.getName() +
  987.           ": Function profile not used",
  988.       DS_Warning));
  989.   return 0;
  990. }
  991.  
  992. template <typename BT>
  993. void SampleProfileLoaderBaseImpl<BT>::computeDominanceAndLoopInfo(
  994.     FunctionT &F) {
  995.   DT.reset(new DominatorTree);
  996.   DT->recalculate(F);
  997.  
  998.   PDT.reset(new PostDominatorTree(F));
  999.  
  1000.   LI.reset(new LoopInfo);
  1001.   LI->analyze(*DT);
  1002. }
  1003.  
  1004. #undef DEBUG_TYPE
  1005.  
  1006. } // namespace llvm
  1007. #endif // LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
  1008.