Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. //===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the interface for the MachineTraceMetrics analysis pass
  10. // that estimates CPU resource usage and critical data dependency paths through
  11. // preferred traces. This is useful for super-scalar CPUs where execution speed
  12. // can be limited both by data dependencies and by limited execution resources.
  13. //
  14. // Out-of-order CPUs will often be executing instructions from multiple basic
  15. // blocks at the same time. This makes it difficult to estimate the resource
  16. // usage accurately in a single basic block. Resources can be estimated better
  17. // by looking at a trace through the current basic block.
  18. //
  19. // For every block, the MachineTraceMetrics pass will pick a preferred trace
  20. // that passes through the block. The trace is chosen based on loop structure,
  21. // branch probabilities, and resource usage. The intention is to pick likely
  22. // traces that would be the most affected by code transformations.
  23. //
  24. // It is expensive to compute a full arbitrary trace for every block, so to
  25. // save some computations, traces are chosen to be convergent. This means that
  26. // if the traces through basic blocks A and B ever cross when moving away from
  27. // A and B, they never diverge again. This applies in both directions - If the
  28. // traces meet above A and B, they won't diverge when going further back.
  29. //
  30. // Traces tend to align with loops. The trace through a block in an inner loop
  31. // will begin at the loop entry block and end at a back edge. If there are
  32. // nested loops, the trace may begin and end at those instead.
  33. //
  34. // For each trace, we compute the critical path length, which is the number of
  35. // cycles required to execute the trace when execution is limited by data
  36. // dependencies only. We also compute the resource height, which is the number
  37. // of cycles required to execute all instructions in the trace when ignoring
  38. // data dependencies.
  39. //
  40. // Every instruction in the current block has a slack - the number of cycles
  41. // execution of the instruction can be delayed without extending the critical
  42. // path.
  43. //
  44. //===----------------------------------------------------------------------===//
  45.  
  46. #ifndef LLVM_CODEGEN_MACHINETRACEMETRICS_H
  47. #define LLVM_CODEGEN_MACHINETRACEMETRICS_H
  48.  
  49. #include "llvm/ADT/SparseSet.h"
  50. #include "llvm/ADT/ArrayRef.h"
  51. #include "llvm/ADT/DenseMap.h"
  52. #include "llvm/ADT/SmallVector.h"
  53. #include "llvm/CodeGen/MachineBasicBlock.h"
  54. #include "llvm/CodeGen/MachineFunctionPass.h"
  55. #include "llvm/CodeGen/TargetSchedule.h"
  56.  
  57. namespace llvm {
  58.  
  59. class AnalysisUsage;
  60. class MachineFunction;
  61. class MachineInstr;
  62. class MachineLoop;
  63. class MachineLoopInfo;
  64. class MachineRegisterInfo;
  65. struct MCSchedClassDesc;
  66. class raw_ostream;
  67. class TargetInstrInfo;
  68. class TargetRegisterInfo;
  69.  
  70. // Keep track of physreg data dependencies by recording each live register unit.
  71. // Associate each regunit with an instruction operand. Depending on the
  72. // direction instructions are scanned, it could be the operand that defined the
  73. // regunit, or the highest operand to read the regunit.
  74. struct LiveRegUnit {
  75.   unsigned RegUnit;
  76.   unsigned Cycle = 0;
  77.   const MachineInstr *MI = nullptr;
  78.   unsigned Op = 0;
  79.  
  80.   unsigned getSparseSetIndex() const { return RegUnit; }
  81.  
  82.   LiveRegUnit(unsigned RU) : RegUnit(RU) {}
  83. };
  84.  
  85.  
  86. class MachineTraceMetrics : public MachineFunctionPass {
  87.   const MachineFunction *MF = nullptr;
  88.   const TargetInstrInfo *TII = nullptr;
  89.   const TargetRegisterInfo *TRI = nullptr;
  90.   const MachineRegisterInfo *MRI = nullptr;
  91.   const MachineLoopInfo *Loops = nullptr;
  92.   TargetSchedModel SchedModel;
  93.  
  94. public:
  95.   friend class Ensemble;
  96.   friend class Trace;
  97.  
  98.   class Ensemble;
  99.  
  100.   static char ID;
  101.  
  102.   MachineTraceMetrics();
  103.  
  104.   void getAnalysisUsage(AnalysisUsage&) const override;
  105.   bool runOnMachineFunction(MachineFunction&) override;
  106.   void releaseMemory() override;
  107.   void verifyAnalysis() const override;
  108.  
  109.   /// Per-basic block information that doesn't depend on the trace through the
  110.   /// block.
  111.   struct FixedBlockInfo {
  112.     /// The number of non-trivial instructions in the block.
  113.     /// Doesn't count PHI and COPY instructions that are likely to be removed.
  114.     unsigned InstrCount = ~0u;
  115.  
  116.     /// True when the block contains calls.
  117.     bool HasCalls = false;
  118.  
  119.     FixedBlockInfo() = default;
  120.  
  121.     /// Returns true when resource information for this block has been computed.
  122.     bool hasResources() const { return InstrCount != ~0u; }
  123.  
  124.     /// Invalidate resource information.
  125.     void invalidate() { InstrCount = ~0u; }
  126.   };
  127.  
  128.   /// Get the fixed resource information about MBB. Compute it on demand.
  129.   const FixedBlockInfo *getResources(const MachineBasicBlock*);
  130.  
  131.   /// Get the scaled number of cycles used per processor resource in MBB.
  132.   /// This is an array with SchedModel.getNumProcResourceKinds() entries.
  133.   /// The getResources() function above must have been called first.
  134.   ///
  135.   /// These numbers have already been scaled by SchedModel.getResourceFactor().
  136.   ArrayRef<unsigned> getProcResourceCycles(unsigned MBBNum) const;
  137.  
  138.   /// A virtual register or regunit required by a basic block or its trace
  139.   /// successors.
  140.   struct LiveInReg {
  141.     /// The virtual register required, or a register unit.
  142.     Register Reg;
  143.  
  144.     /// For virtual registers: Minimum height of the defining instruction.
  145.     /// For regunits: Height of the highest user in the trace.
  146.     unsigned Height;
  147.  
  148.     LiveInReg(Register Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
  149.   };
  150.  
  151.   /// Per-basic block information that relates to a specific trace through the
  152.   /// block. Convergent traces means that only one of these is required per
  153.   /// block in a trace ensemble.
  154.   struct TraceBlockInfo {
  155.     /// Trace predecessor, or NULL for the first block in the trace.
  156.     /// Valid when hasValidDepth().
  157.     const MachineBasicBlock *Pred = nullptr;
  158.  
  159.     /// Trace successor, or NULL for the last block in the trace.
  160.     /// Valid when hasValidHeight().
  161.     const MachineBasicBlock *Succ = nullptr;
  162.  
  163.     /// The block number of the head of the trace. (When hasValidDepth()).
  164.     unsigned Head;
  165.  
  166.     /// The block number of the tail of the trace. (When hasValidHeight()).
  167.     unsigned Tail;
  168.  
  169.     /// Accumulated number of instructions in the trace above this block.
  170.     /// Does not include instructions in this block.
  171.     unsigned InstrDepth = ~0u;
  172.  
  173.     /// Accumulated number of instructions in the trace below this block.
  174.     /// Includes instructions in this block.
  175.     unsigned InstrHeight = ~0u;
  176.  
  177.     TraceBlockInfo() = default;
  178.  
  179.     /// Returns true if the depth resources have been computed from the trace
  180.     /// above this block.
  181.     bool hasValidDepth() const { return InstrDepth != ~0u; }
  182.  
  183.     /// Returns true if the height resources have been computed from the trace
  184.     /// below this block.
  185.     bool hasValidHeight() const { return InstrHeight != ~0u; }
  186.  
  187.     /// Invalidate depth resources when some block above this one has changed.
  188.     void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; }
  189.  
  190.     /// Invalidate height resources when a block below this one has changed.
  191.     void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; }
  192.  
  193.     /// Assuming that this is a dominator of TBI, determine if it contains
  194.     /// useful instruction depths. A dominating block can be above the current
  195.     /// trace head, and any dependencies from such a far away dominator are not
  196.     /// expected to affect the critical path.
  197.     ///
  198.     /// Also returns true when TBI == this.
  199.     bool isUsefulDominator(const TraceBlockInfo &TBI) const {
  200.       // The trace for TBI may not even be calculated yet.
  201.       if (!hasValidDepth() || !TBI.hasValidDepth())
  202.         return false;
  203.       // Instruction depths are only comparable if the traces share a head.
  204.       if (Head != TBI.Head)
  205.         return false;
  206.       // It is almost always the case that TBI belongs to the same trace as
  207.       // this block, but rare convoluted cases involving irreducible control
  208.       // flow, a dominator may share a trace head without actually being on the
  209.       // same trace as TBI. This is not a big problem as long as it doesn't
  210.       // increase the instruction depth.
  211.       return HasValidInstrDepths && InstrDepth <= TBI.InstrDepth;
  212.     }
  213.  
  214.     // Data-dependency-related information. Per-instruction depth and height
  215.     // are computed from data dependencies in the current trace, using
  216.     // itinerary data.
  217.  
  218.     /// Instruction depths have been computed. This implies hasValidDepth().
  219.     bool HasValidInstrDepths = false;
  220.  
  221.     /// Instruction heights have been computed. This implies hasValidHeight().
  222.     bool HasValidInstrHeights = false;
  223.  
  224.     /// Critical path length. This is the number of cycles in the longest data
  225.     /// dependency chain through the trace. This is only valid when both
  226.     /// HasValidInstrDepths and HasValidInstrHeights are set.
  227.     unsigned CriticalPath;
  228.  
  229.     /// Live-in registers. These registers are defined above the current block
  230.     /// and used by this block or a block below it.
  231.     /// This does not include PHI uses in the current block, but it does
  232.     /// include PHI uses in deeper blocks.
  233.     SmallVector<LiveInReg, 4> LiveIns;
  234.  
  235.     void print(raw_ostream&) const;
  236.   };
  237.  
  238.   /// InstrCycles represents the cycle height and depth of an instruction in a
  239.   /// trace.
  240.   struct InstrCycles {
  241.     /// Earliest issue cycle as determined by data dependencies and instruction
  242.     /// latencies from the beginning of the trace. Data dependencies from
  243.     /// before the trace are not included.
  244.     unsigned Depth;
  245.  
  246.     /// Minimum number of cycles from this instruction is issued to the of the
  247.     /// trace, as determined by data dependencies and instruction latencies.
  248.     unsigned Height;
  249.   };
  250.  
  251.   /// A trace represents a plausible sequence of executed basic blocks that
  252.   /// passes through the current basic block one. The Trace class serves as a
  253.   /// handle to internal cached data structures.
  254.   class Trace {
  255.     Ensemble &TE;
  256.     TraceBlockInfo &TBI;
  257.  
  258.     unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
  259.  
  260.   public:
  261.     explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
  262.  
  263.     void print(raw_ostream&) const;
  264.  
  265.     /// Compute the total number of instructions in the trace.
  266.     unsigned getInstrCount() const {
  267.       return TBI.InstrDepth + TBI.InstrHeight;
  268.     }
  269.  
  270.     /// Return the resource depth of the top/bottom of the trace center block.
  271.     /// This is the number of cycles required to execute all instructions from
  272.     /// the trace head to the trace center block. The resource depth only
  273.     /// considers execution resources, it ignores data dependencies.
  274.     /// When Bottom is set, instructions in the trace center block are included.
  275.     unsigned getResourceDepth(bool Bottom) const;
  276.  
  277.     /// Return the resource length of the trace. This is the number of cycles
  278.     /// required to execute the instructions in the trace if they were all
  279.     /// independent, exposing the maximum instruction-level parallelism.
  280.     ///
  281.     /// Any blocks in Extrablocks are included as if they were part of the
  282.     /// trace. Likewise, extra resources required by the specified scheduling
  283.     /// classes are included. For the caller to account for extra machine
  284.     /// instructions, it must first resolve each instruction's scheduling class.
  285.     unsigned getResourceLength(
  286.         ArrayRef<const MachineBasicBlock *> Extrablocks = std::nullopt,
  287.         ArrayRef<const MCSchedClassDesc *> ExtraInstrs = std::nullopt,
  288.         ArrayRef<const MCSchedClassDesc *> RemoveInstrs = std::nullopt) const;
  289.  
  290.     /// Return the length of the (data dependency) critical path through the
  291.     /// trace.
  292.     unsigned getCriticalPath() const { return TBI.CriticalPath; }
  293.  
  294.     /// Return the depth and height of MI. The depth is only valid for
  295.     /// instructions in or above the trace center block. The height is only
  296.     /// valid for instructions in or below the trace center block.
  297.     InstrCycles getInstrCycles(const MachineInstr &MI) const {
  298.       return TE.Cycles.lookup(&MI);
  299.     }
  300.  
  301.     /// Return the slack of MI. This is the number of cycles MI can be delayed
  302.     /// before the critical path becomes longer.
  303.     /// MI must be an instruction in the trace center block.
  304.     unsigned getInstrSlack(const MachineInstr &MI) const;
  305.  
  306.     /// Return the Depth of a PHI instruction in a trace center block successor.
  307.     /// The PHI does not have to be part of the trace.
  308.     unsigned getPHIDepth(const MachineInstr &PHI) const;
  309.  
  310.     /// A dependence is useful if the basic block of the defining instruction
  311.     /// is part of the trace of the user instruction. It is assumed that DefMI
  312.     /// dominates UseMI (see also isUsefulDominator).
  313.     bool isDepInTrace(const MachineInstr &DefMI,
  314.                       const MachineInstr &UseMI) const;
  315.   };
  316.  
  317.   /// A trace ensemble is a collection of traces selected using the same
  318.   /// strategy, for example 'minimum resource height'. There is one trace for
  319.   /// every block in the function.
  320.   class Ensemble {
  321.     friend class Trace;
  322.  
  323.     SmallVector<TraceBlockInfo, 4> BlockInfo;
  324.     DenseMap<const MachineInstr*, InstrCycles> Cycles;
  325.     SmallVector<unsigned, 0> ProcResourceDepths;
  326.     SmallVector<unsigned, 0> ProcResourceHeights;
  327.  
  328.     void computeTrace(const MachineBasicBlock*);
  329.     void computeDepthResources(const MachineBasicBlock*);
  330.     void computeHeightResources(const MachineBasicBlock*);
  331.     unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
  332.     void computeInstrDepths(const MachineBasicBlock*);
  333.     void computeInstrHeights(const MachineBasicBlock*);
  334.     void addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
  335.                     ArrayRef<const MachineBasicBlock*> Trace);
  336.  
  337.   protected:
  338.     MachineTraceMetrics &MTM;
  339.  
  340.     explicit Ensemble(MachineTraceMetrics*);
  341.  
  342.     virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0;
  343.     virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0;
  344.     const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
  345.     const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;
  346.     const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;
  347.     ArrayRef<unsigned> getProcResourceDepths(unsigned MBBNum) const;
  348.     ArrayRef<unsigned> getProcResourceHeights(unsigned MBBNum) const;
  349.  
  350.   public:
  351.     virtual ~Ensemble();
  352.  
  353.     virtual const char *getName() const = 0;
  354.     void print(raw_ostream&) const;
  355.     void invalidate(const MachineBasicBlock *MBB);
  356.     void verify() const;
  357.  
  358.     /// Get the trace that passes through MBB.
  359.     /// The trace is computed on demand.
  360.     Trace getTrace(const MachineBasicBlock *MBB);
  361.  
  362.     /// Updates the depth of an machine instruction, given RegUnits.
  363.     void updateDepth(TraceBlockInfo &TBI, const MachineInstr&,
  364.                      SparseSet<LiveRegUnit> &RegUnits);
  365.     void updateDepth(const MachineBasicBlock *, const MachineInstr&,
  366.                      SparseSet<LiveRegUnit> &RegUnits);
  367.  
  368.     /// Updates the depth of the instructions from Start to End.
  369.     void updateDepths(MachineBasicBlock::iterator Start,
  370.                       MachineBasicBlock::iterator End,
  371.                       SparseSet<LiveRegUnit> &RegUnits);
  372.  
  373.   };
  374.  
  375.   /// Strategies for selecting traces.
  376.   enum Strategy {
  377.     /// Select the trace through a block that has the fewest instructions.
  378.     TS_MinInstrCount,
  379.  
  380.     TS_NumStrategies
  381.   };
  382.  
  383.   /// Get the trace ensemble representing the given trace selection strategy.
  384.   /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
  385.   /// and valid for the lifetime of the analysis pass.
  386.   Ensemble *getEnsemble(Strategy);
  387.  
  388.   /// Invalidate cached information about MBB. This must be called *before* MBB
  389.   /// is erased, or the CFG is otherwise changed.
  390.   ///
  391.   /// This invalidates per-block information about resource usage for MBB only,
  392.   /// and it invalidates per-trace information for any trace that passes
  393.   /// through MBB.
  394.   ///
  395.   /// Call Ensemble::getTrace() again to update any trace handles.
  396.   void invalidate(const MachineBasicBlock *MBB);
  397.  
  398. private:
  399.   // One entry per basic block, indexed by block number.
  400.   SmallVector<FixedBlockInfo, 4> BlockInfo;
  401.  
  402.   // Cycles consumed on each processor resource per block.
  403.   // The number of processor resource kinds is constant for a given subtarget,
  404.   // but it is not known at compile time. The number of cycles consumed by
  405.   // block B on processor resource R is at ProcResourceCycles[B*Kinds + R]
  406.   // where Kinds = SchedModel.getNumProcResourceKinds().
  407.   SmallVector<unsigned, 0> ProcResourceCycles;
  408.  
  409.   // One ensemble per strategy.
  410.   Ensemble* Ensembles[TS_NumStrategies];
  411.  
  412.   // Convert scaled resource usage to a cycle count that can be compared with
  413.   // latencies.
  414.   unsigned getCycles(unsigned Scaled) {
  415.     unsigned Factor = SchedModel.getLatencyFactor();
  416.     return (Scaled + Factor - 1) / Factor;
  417.   }
  418. };
  419.  
  420. inline raw_ostream &operator<<(raw_ostream &OS,
  421.                                const MachineTraceMetrics::Trace &Tr) {
  422.   Tr.print(OS);
  423.   return OS;
  424. }
  425.  
  426. inline raw_ostream &operator<<(raw_ostream &OS,
  427.                                const MachineTraceMetrics::Ensemble &En) {
  428.   En.print(OS);
  429.   return OS;
  430. }
  431.  
  432. } // end namespace llvm
  433.  
  434. #endif // LLVM_CODEGEN_MACHINETRACEMETRICS_H
  435.