Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. //===- ModuloSchedule.h - Software pipeline schedule expansion ------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Software pipelining (SWP) is an instruction scheduling technique for loops
  10. // that overlaps loop iterations and exploits ILP via compiler transformations.
  11. //
  12. // There are multiple methods for analyzing a loop and creating a schedule.
  13. // An example algorithm is Swing Modulo Scheduling (implemented by the
  14. // MachinePipeliner). The details of how a schedule is arrived at are irrelevant
  15. // for the task of actually rewriting a loop to adhere to the schedule, which
  16. // is what this file does.
  17. //
  18. // A schedule is, for every instruction in a block, a Cycle and a Stage. Note
  19. // that we only support single-block loops, so "block" and "loop" can be used
  20. // interchangably.
  21. //
  22. // The Cycle of an instruction defines a partial order of the instructions in
  23. // the remapped loop. Instructions within a cycle must not consume the output
  24. // of any instruction in the same cycle. Cycle information is assumed to have
  25. // been calculated such that the processor will execute instructions in
  26. // lock-step (for example in a VLIW ISA).
  27. //
  28. // The Stage of an instruction defines the mapping between logical loop
  29. // iterations and pipelined loop iterations. An example (unrolled) pipeline
  30. // may look something like:
  31. //
  32. //  I0[0]                      Execute instruction I0 of iteration 0
  33. //  I1[0], I0[1]               Execute I0 of iteration 1 and I1 of iteration 1
  34. //         I1[1], I0[2]
  35. //                I1[2], I0[3]
  36. //
  37. // In the schedule for this unrolled sequence we would say that I0 was scheduled
  38. // in stage 0 and I1 in stage 1:
  39. //
  40. //  loop:
  41. //    [stage 0] x = I0
  42. //    [stage 1] I1 x (from stage 0)
  43. //
  44. // And to actually generate valid code we must insert a phi:
  45. //
  46. //  loop:
  47. //    x' = phi(x)
  48. //    x = I0
  49. //    I1 x'
  50. //
  51. // This is a simple example; the rules for how to generate correct code given
  52. // an arbitrary schedule containing loop-carried values are complex.
  53. //
  54. // Note that these examples only mention the steady-state kernel of the
  55. // generated loop; prologs and epilogs must be generated also that prime and
  56. // flush the pipeline. Doing so is nontrivial.
  57. //
  58. //===----------------------------------------------------------------------===//
  59.  
  60. #ifndef LLVM_CODEGEN_MODULOSCHEDULE_H
  61. #define LLVM_CODEGEN_MODULOSCHEDULE_H
  62.  
  63. #include "llvm/CodeGen/MachineFunction.h"
  64. #include "llvm/CodeGen/MachineLoopUtils.h"
  65. #include "llvm/CodeGen/TargetInstrInfo.h"
  66. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  67. #include <deque>
  68. #include <vector>
  69.  
  70. namespace llvm {
  71. class MachineBasicBlock;
  72. class MachineLoop;
  73. class MachineRegisterInfo;
  74. class MachineInstr;
  75. class LiveIntervals;
  76.  
  77. /// Represents a schedule for a single-block loop. For every instruction we
  78. /// maintain a Cycle and Stage.
  79. class ModuloSchedule {
  80. private:
  81.   /// The block containing the loop instructions.
  82.   MachineLoop *Loop;
  83.  
  84.   /// The instructions to be generated, in total order. Cycle provides a partial
  85.   /// order; the total order within cycles has been decided by the schedule
  86.   /// producer.
  87.   std::vector<MachineInstr *> ScheduledInstrs;
  88.  
  89.   /// The cycle for each instruction.
  90.   DenseMap<MachineInstr *, int> Cycle;
  91.  
  92.   /// The stage for each instruction.
  93.   DenseMap<MachineInstr *, int> Stage;
  94.  
  95.   /// The number of stages in this schedule (Max(Stage) + 1).
  96.   int NumStages;
  97.  
  98. public:
  99.   /// Create a new ModuloSchedule.
  100.   /// \arg ScheduledInstrs The new loop instructions, in total resequenced
  101.   ///    order.
  102.   /// \arg Cycle Cycle index for all instructions in ScheduledInstrs. Cycle does
  103.   ///    not need to start at zero. ScheduledInstrs must be partially ordered by
  104.   ///    Cycle.
  105.   /// \arg Stage Stage index for all instructions in ScheduleInstrs.
  106.   ModuloSchedule(MachineFunction &MF, MachineLoop *Loop,
  107.                  std::vector<MachineInstr *> ScheduledInstrs,
  108.                  DenseMap<MachineInstr *, int> Cycle,
  109.                  DenseMap<MachineInstr *, int> Stage)
  110.       : Loop(Loop), ScheduledInstrs(ScheduledInstrs), Cycle(std::move(Cycle)),
  111.         Stage(std::move(Stage)) {
  112.     NumStages = 0;
  113.     for (auto &KV : this->Stage)
  114.       NumStages = std::max(NumStages, KV.second);
  115.     ++NumStages;
  116.   }
  117.  
  118.   /// Return the single-block loop being scheduled.
  119.   MachineLoop *getLoop() const { return Loop; }
  120.  
  121.   /// Return the number of stages contained in this schedule, which is the
  122.   /// largest stage index + 1.
  123.   int getNumStages() const { return NumStages; }
  124.  
  125.   /// Return the first cycle in the schedule, which is the cycle index of the
  126.   /// first instruction.
  127.   int getFirstCycle() { return Cycle[ScheduledInstrs.front()]; }
  128.  
  129.   /// Return the final cycle in the schedule, which is the cycle index of the
  130.   /// last instruction.
  131.   int getFinalCycle() { return Cycle[ScheduledInstrs.back()]; }
  132.  
  133.   /// Return the stage that MI is scheduled in, or -1.
  134.   int getStage(MachineInstr *MI) {
  135.     auto I = Stage.find(MI);
  136.     return I == Stage.end() ? -1 : I->second;
  137.   }
  138.  
  139.   /// Return the cycle that MI is scheduled at, or -1.
  140.   int getCycle(MachineInstr *MI) {
  141.     auto I = Cycle.find(MI);
  142.     return I == Cycle.end() ? -1 : I->second;
  143.   }
  144.  
  145.   /// Set the stage of a newly created instruction.
  146.   void setStage(MachineInstr *MI, int MIStage) {
  147.     assert(Stage.count(MI) == 0);
  148.     Stage[MI] = MIStage;
  149.   }
  150.  
  151.   /// Return the rescheduled instructions in order.
  152.   ArrayRef<MachineInstr *> getInstructions() { return ScheduledInstrs; }
  153.  
  154.   void dump() { print(dbgs()); }
  155.   void print(raw_ostream &OS);
  156. };
  157.  
  158. /// The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place,
  159. /// rewriting the old loop and inserting prologs and epilogs as required.
  160. class ModuloScheduleExpander {
  161. public:
  162.   using InstrChangesTy = DenseMap<MachineInstr *, std::pair<unsigned, int64_t>>;
  163.  
  164. private:
  165.   using ValueMapTy = DenseMap<unsigned, unsigned>;
  166.   using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
  167.   using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
  168.  
  169.   ModuloSchedule &Schedule;
  170.   MachineFunction &MF;
  171.   const TargetSubtargetInfo &ST;
  172.   MachineRegisterInfo &MRI;
  173.   const TargetInstrInfo *TII;
  174.   LiveIntervals &LIS;
  175.  
  176.   MachineBasicBlock *BB;
  177.   MachineBasicBlock *Preheader;
  178.   MachineBasicBlock *NewKernel = nullptr;
  179.   std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
  180.  
  181.   /// Map for each register and the max difference between its uses and def.
  182.   /// The first element in the pair is the max difference in stages. The
  183.   /// second is true if the register defines a Phi value and loop value is
  184.   /// scheduled before the Phi.
  185.   std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
  186.  
  187.   /// Instructions to change when emitting the final schedule.
  188.   InstrChangesTy InstrChanges;
  189.  
  190.   void generatePipelinedLoop();
  191.   void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
  192.                       ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
  193.   void generateEpilog(unsigned LastStage, MachineBasicBlock *KernelBB,
  194.                       MachineBasicBlock *OrigBB, ValueMapTy *VRMap,
  195.                       ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
  196.                       MBBVectorTy &PrologBBs);
  197.   void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
  198.                             MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
  199.                             ValueMapTy *VRMap, InstrMapTy &InstrMap,
  200.                             unsigned LastStageNum, unsigned CurStageNum,
  201.                             bool IsLast);
  202.   void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
  203.                     MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
  204.                     ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
  205.                     InstrMapTy &InstrMap, unsigned LastStageNum,
  206.                     unsigned CurStageNum, bool IsLast);
  207.   void removeDeadInstructions(MachineBasicBlock *KernelBB,
  208.                               MBBVectorTy &EpilogBBs);
  209.   void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs);
  210.   void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
  211.                    MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
  212.                    ValueMapTy *VRMap);
  213.   bool computeDelta(MachineInstr &MI, unsigned &Delta);
  214.   void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
  215.                          unsigned Num);
  216.   MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
  217.                            unsigned InstStageNum);
  218.   MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
  219.                                     unsigned InstStageNum);
  220.   void updateInstruction(MachineInstr *NewMI, bool LastDef,
  221.                          unsigned CurStageNum, unsigned InstrStageNum,
  222.                          ValueMapTy *VRMap);
  223.   MachineInstr *findDefInLoop(unsigned Reg);
  224.   unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
  225.                          unsigned LoopStage, ValueMapTy *VRMap,
  226.                          MachineBasicBlock *BB);
  227.   void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
  228.                         ValueMapTy *VRMap, InstrMapTy &InstrMap);
  229.   void rewriteScheduledInstr(MachineBasicBlock *BB, InstrMapTy &InstrMap,
  230.                              unsigned CurStageNum, unsigned PhiNum,
  231.                              MachineInstr *Phi, unsigned OldReg,
  232.                              unsigned NewReg, unsigned PrevReg = 0);
  233.   bool isLoopCarried(MachineInstr &Phi);
  234.  
  235.   /// Return the max. number of stages/iterations that can occur between a
  236.   /// register definition and its uses.
  237.   unsigned getStagesForReg(int Reg, unsigned CurStage) {
  238.     std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
  239.     if ((int)CurStage > Schedule.getNumStages() - 1 && Stages.first == 0 &&
  240.         Stages.second)
  241.       return 1;
  242.     return Stages.first;
  243.   }
  244.  
  245.   /// The number of stages for a Phi is a little different than other
  246.   /// instructions. The minimum value computed in RegToStageDiff is 1
  247.   /// because we assume the Phi is needed for at least 1 iteration.
  248.   /// This is not the case if the loop value is scheduled prior to the
  249.   /// Phi in the same stage.  This function returns the number of stages
  250.   /// or iterations needed between the Phi definition and any uses.
  251.   unsigned getStagesForPhi(int Reg) {
  252.     std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
  253.     if (Stages.second)
  254.       return Stages.first;
  255.     return Stages.first - 1;
  256.   }
  257.  
  258. public:
  259.   /// Create a new ModuloScheduleExpander.
  260.   /// \arg InstrChanges Modifications to make to instructions with memory
  261.   ///   operands.
  262.   /// FIXME: InstrChanges is opaque and is an implementation detail of an
  263.   ///   optimization in MachinePipeliner that crosses abstraction boundaries.
  264.   ModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S,
  265.                          LiveIntervals &LIS, InstrChangesTy InstrChanges)
  266.       : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
  267.         TII(ST.getInstrInfo()), LIS(LIS),
  268.         InstrChanges(std::move(InstrChanges)) {}
  269.  
  270.   /// Performs the actual expansion.
  271.   void expand();
  272.   /// Performs final cleanup after expansion.
  273.   void cleanup();
  274.  
  275.   /// Returns the newly rewritten kernel block, or nullptr if this was
  276.   /// optimized away.
  277.   MachineBasicBlock *getRewrittenKernel() { return NewKernel; }
  278. };
  279.  
  280. /// A reimplementation of ModuloScheduleExpander. It works by generating a
  281. /// standalone kernel loop and peeling out the prologs and epilogs.
  282. class PeelingModuloScheduleExpander {
  283. public:
  284.   PeelingModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S,
  285.                                 LiveIntervals *LIS)
  286.       : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
  287.         TII(ST.getInstrInfo()), LIS(LIS) {}
  288.  
  289.   void expand();
  290.  
  291.   /// Runs ModuloScheduleExpander and treats it as a golden input to validate
  292.   /// aspects of the code generated by PeelingModuloScheduleExpander.
  293.   void validateAgainstModuloScheduleExpander();
  294.  
  295. protected:
  296.   ModuloSchedule &Schedule;
  297.   MachineFunction &MF;
  298.   const TargetSubtargetInfo &ST;
  299.   MachineRegisterInfo &MRI;
  300.   const TargetInstrInfo *TII;
  301.   LiveIntervals *LIS;
  302.  
  303.   /// The original loop block that gets rewritten in-place.
  304.   MachineBasicBlock *BB;
  305.   /// The original loop preheader.
  306.   MachineBasicBlock *Preheader;
  307.   /// All prolog and epilog blocks.
  308.   SmallVector<MachineBasicBlock *, 4> Prologs, Epilogs;
  309.   /// For every block, the stages that are produced.
  310.   DenseMap<MachineBasicBlock *, BitVector> LiveStages;
  311.   /// For every block, the stages that are available. A stage can be available
  312.   /// but not produced (in the epilog) or produced but not available (in the
  313.   /// prolog).
  314.   DenseMap<MachineBasicBlock *, BitVector> AvailableStages;
  315.   /// When peeling the epilogue keep track of the distance between the phi
  316.   /// nodes and the kernel.
  317.   DenseMap<MachineInstr *, unsigned> PhiNodeLoopIteration;
  318.  
  319.   /// CanonicalMIs and BlockMIs form a bidirectional map between any of the
  320.   /// loop kernel clones.
  321.   DenseMap<MachineInstr *, MachineInstr *> CanonicalMIs;
  322.   DenseMap<std::pair<MachineBasicBlock *, MachineInstr *>, MachineInstr *>
  323.       BlockMIs;
  324.  
  325.   /// State passed from peelKernel to peelPrologAndEpilogs().
  326.   std::deque<MachineBasicBlock *> PeeledFront, PeeledBack;
  327.   /// Illegal phis that need to be deleted once we re-link stages.
  328.   SmallVector<MachineInstr *, 4> IllegalPhisToDelete;
  329.  
  330.   /// Converts BB from the original loop body to the rewritten, pipelined
  331.   /// steady-state.
  332.   void rewriteKernel();
  333.  
  334.   /// Peels one iteration of the rewritten kernel (BB) in the specified
  335.   /// direction.
  336.   MachineBasicBlock *peelKernel(LoopPeelDirection LPD);
  337.   // Delete instructions whose stage is less than MinStage in the given basic
  338.   // block.
  339.   void filterInstructions(MachineBasicBlock *MB, int MinStage);
  340.   // Move instructions of the given stage from sourceBB to DestBB. Remap the phi
  341.   // instructions to keep a valid IR.
  342.   void moveStageBetweenBlocks(MachineBasicBlock *DestBB,
  343.                               MachineBasicBlock *SourceBB, unsigned Stage);
  344.   /// Peel the kernel forwards and backwards to produce prologs and epilogs,
  345.   /// and stitch them together.
  346.   void peelPrologAndEpilogs();
  347.   /// All prolog and epilog blocks are clones of the kernel, so any produced
  348.   /// register in one block has an corollary in all other blocks.
  349.   Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB);
  350.   /// Change all users of MI, if MI is predicated out
  351.   /// (LiveStages[MI->getParent()] == false).
  352.   void rewriteUsesOf(MachineInstr *MI);
  353.   /// Insert branches between prologs, kernel and epilogs.
  354.   void fixupBranches();
  355.   /// Create a poor-man's LCSSA by cloning only the PHIs from the kernel block
  356.   /// to a block dominated by all prologs and epilogs. This allows us to treat
  357.   /// the loop exiting block as any other kernel clone.
  358.   MachineBasicBlock *CreateLCSSAExitingBlock();
  359.   /// Helper to get the stage of an instruction in the schedule.
  360.   unsigned getStage(MachineInstr *MI) {
  361.     if (CanonicalMIs.count(MI))
  362.       MI = CanonicalMIs[MI];
  363.     return Schedule.getStage(MI);
  364.   }
  365.   /// Helper function to find the right canonical register for a phi instruction
  366.   /// coming from a peeled out prologue.
  367.   Register getPhiCanonicalReg(MachineInstr* CanonicalPhi, MachineInstr* Phi);
  368.   /// Target loop info before kernel peeling.
  369.   std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
  370. };
  371.  
  372. /// Expander that simply annotates each scheduled instruction with a post-instr
  373. /// symbol that can be consumed by the ModuloScheduleTest pass.
  374. ///
  375. /// The post-instr symbol is a way of annotating an instruction that can be
  376. /// roundtripped in MIR. The syntax is:
  377. ///   MYINST %0, post-instr-symbol <mcsymbol Stage-1_Cycle-5>
  378. class ModuloScheduleTestAnnotater {
  379.   MachineFunction &MF;
  380.   ModuloSchedule &S;
  381.  
  382. public:
  383.   ModuloScheduleTestAnnotater(MachineFunction &MF, ModuloSchedule &S)
  384.       : MF(MF), S(S) {}
  385.  
  386.   /// Performs the annotation.
  387.   void annotate();
  388. };
  389.  
  390. } // end namespace llvm
  391.  
  392. #endif // LLVM_CODEGEN_MODULOSCHEDULE_H
  393.