Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
//===- ModuloSchedule.h - Software pipeline schedule expansion ------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Software pipelining (SWP) is an instruction scheduling technique for loops
10
// that overlaps loop iterations and exploits ILP via compiler transformations.
11
//
12
// There are multiple methods for analyzing a loop and creating a schedule.
13
// An example algorithm is Swing Modulo Scheduling (implemented by the
14
// MachinePipeliner). The details of how a schedule is arrived at are irrelevant
15
// for the task of actually rewriting a loop to adhere to the schedule, which
16
// is what this file does.
17
//
18
// A schedule is, for every instruction in a block, a Cycle and a Stage. Note
19
// that we only support single-block loops, so "block" and "loop" can be used
20
// interchangably.
21
//
22
// The Cycle of an instruction defines a partial order of the instructions in
23
// the remapped loop. Instructions within a cycle must not consume the output
24
// of any instruction in the same cycle. Cycle information is assumed to have
25
// been calculated such that the processor will execute instructions in
26
// lock-step (for example in a VLIW ISA).
27
//
28
// The Stage of an instruction defines the mapping between logical loop
29
// iterations and pipelined loop iterations. An example (unrolled) pipeline
30
// may look something like:
31
//
32
//  I0[0]                      Execute instruction I0 of iteration 0
33
//  I1[0], I0[1]               Execute I0 of iteration 1 and I1 of iteration 1
34
//         I1[1], I0[2]
35
//                I1[2], I0[3]
36
//
37
// In the schedule for this unrolled sequence we would say that I0 was scheduled
38
// in stage 0 and I1 in stage 1:
39
//
40
//  loop:
41
//    [stage 0] x = I0
42
//    [stage 1] I1 x (from stage 0)
43
//
44
// And to actually generate valid code we must insert a phi:
45
//
46
//  loop:
47
//    x' = phi(x)
48
//    x = I0
49
//    I1 x'
50
//
51
// This is a simple example; the rules for how to generate correct code given
52
// an arbitrary schedule containing loop-carried values are complex.
53
//
54
// Note that these examples only mention the steady-state kernel of the
55
// generated loop; prologs and epilogs must be generated also that prime and
56
// flush the pipeline. Doing so is nontrivial.
57
//
58
//===----------------------------------------------------------------------===//
59
 
60
#ifndef LLVM_CODEGEN_MODULOSCHEDULE_H
61
#define LLVM_CODEGEN_MODULOSCHEDULE_H
62
 
63
#include "llvm/CodeGen/MachineFunction.h"
64
#include "llvm/CodeGen/MachineLoopUtils.h"
65
#include "llvm/CodeGen/TargetInstrInfo.h"
66
#include "llvm/CodeGen/TargetSubtargetInfo.h"
67
#include <deque>
68
#include <vector>
69
 
70
namespace llvm {
71
class MachineBasicBlock;
72
class MachineLoop;
73
class MachineRegisterInfo;
74
class MachineInstr;
75
class LiveIntervals;
76
 
77
/// Represents a schedule for a single-block loop. For every instruction we
78
/// maintain a Cycle and Stage.
79
class ModuloSchedule {
80
private:
81
  /// The block containing the loop instructions.
82
  MachineLoop *Loop;
83
 
84
  /// The instructions to be generated, in total order. Cycle provides a partial
85
  /// order; the total order within cycles has been decided by the schedule
86
  /// producer.
87
  std::vector<MachineInstr *> ScheduledInstrs;
88
 
89
  /// The cycle for each instruction.
90
  DenseMap<MachineInstr *, int> Cycle;
91
 
92
  /// The stage for each instruction.
93
  DenseMap<MachineInstr *, int> Stage;
94
 
95
  /// The number of stages in this schedule (Max(Stage) + 1).
96
  int NumStages;
97
 
98
public:
99
  /// Create a new ModuloSchedule.
100
  /// \arg ScheduledInstrs The new loop instructions, in total resequenced
101
  ///    order.
102
  /// \arg Cycle Cycle index for all instructions in ScheduledInstrs. Cycle does
103
  ///    not need to start at zero. ScheduledInstrs must be partially ordered by
104
  ///    Cycle.
105
  /// \arg Stage Stage index for all instructions in ScheduleInstrs.
106
  ModuloSchedule(MachineFunction &MF, MachineLoop *Loop,
107
                 std::vector<MachineInstr *> ScheduledInstrs,
108
                 DenseMap<MachineInstr *, int> Cycle,
109
                 DenseMap<MachineInstr *, int> Stage)
110
      : Loop(Loop), ScheduledInstrs(ScheduledInstrs), Cycle(std::move(Cycle)),
111
        Stage(std::move(Stage)) {
112
    NumStages = 0;
113
    for (auto &KV : this->Stage)
114
      NumStages = std::max(NumStages, KV.second);
115
    ++NumStages;
116
  }
117
 
118
  /// Return the single-block loop being scheduled.
119
  MachineLoop *getLoop() const { return Loop; }
120
 
121
  /// Return the number of stages contained in this schedule, which is the
122
  /// largest stage index + 1.
123
  int getNumStages() const { return NumStages; }
124
 
125
  /// Return the first cycle in the schedule, which is the cycle index of the
126
  /// first instruction.
127
  int getFirstCycle() { return Cycle[ScheduledInstrs.front()]; }
128
 
129
  /// Return the final cycle in the schedule, which is the cycle index of the
130
  /// last instruction.
131
  int getFinalCycle() { return Cycle[ScheduledInstrs.back()]; }
132
 
133
  /// Return the stage that MI is scheduled in, or -1.
134
  int getStage(MachineInstr *MI) {
135
    auto I = Stage.find(MI);
136
    return I == Stage.end() ? -1 : I->second;
137
  }
138
 
139
  /// Return the cycle that MI is scheduled at, or -1.
140
  int getCycle(MachineInstr *MI) {
141
    auto I = Cycle.find(MI);
142
    return I == Cycle.end() ? -1 : I->second;
143
  }
144
 
145
  /// Set the stage of a newly created instruction.
146
  void setStage(MachineInstr *MI, int MIStage) {
147
    assert(Stage.count(MI) == 0);
148
    Stage[MI] = MIStage;
149
  }
150
 
151
  /// Return the rescheduled instructions in order.
152
  ArrayRef<MachineInstr *> getInstructions() { return ScheduledInstrs; }
153
 
154
  void dump() { print(dbgs()); }
155
  void print(raw_ostream &OS);
156
};
157
 
158
/// The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place,
159
/// rewriting the old loop and inserting prologs and epilogs as required.
160
class ModuloScheduleExpander {
161
public:
162
  using InstrChangesTy = DenseMap<MachineInstr *, std::pair<unsigned, int64_t>>;
163
 
164
private:
165
  using ValueMapTy = DenseMap<unsigned, unsigned>;
166
  using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
167
  using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
168
 
169
  ModuloSchedule &Schedule;
170
  MachineFunction &MF;
171
  const TargetSubtargetInfo &ST;
172
  MachineRegisterInfo &MRI;
173
  const TargetInstrInfo *TII;
174
  LiveIntervals &LIS;
175
 
176
  MachineBasicBlock *BB;
177
  MachineBasicBlock *Preheader;
178
  MachineBasicBlock *NewKernel = nullptr;
179
  std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
180
 
181
  /// Map for each register and the max difference between its uses and def.
182
  /// The first element in the pair is the max difference in stages. The
183
  /// second is true if the register defines a Phi value and loop value is
184
  /// scheduled before the Phi.
185
  std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
186
 
187
  /// Instructions to change when emitting the final schedule.
188
  InstrChangesTy InstrChanges;
189
 
190
  void generatePipelinedLoop();
191
  void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
192
                      ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
193
  void generateEpilog(unsigned LastStage, MachineBasicBlock *KernelBB,
194
                      MachineBasicBlock *OrigBB, ValueMapTy *VRMap,
195
                      ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
196
                      MBBVectorTy &PrologBBs);
197
  void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
198
                            MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
199
                            ValueMapTy *VRMap, InstrMapTy &InstrMap,
200
                            unsigned LastStageNum, unsigned CurStageNum,
201
                            bool IsLast);
202
  void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
203
                    MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
204
                    ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
205
                    InstrMapTy &InstrMap, unsigned LastStageNum,
206
                    unsigned CurStageNum, bool IsLast);
207
  void removeDeadInstructions(MachineBasicBlock *KernelBB,
208
                              MBBVectorTy &EpilogBBs);
209
  void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs);
210
  void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
211
                   MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
212
                   ValueMapTy *VRMap);
213
  bool computeDelta(MachineInstr &MI, unsigned &Delta);
214
  void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
215
                         unsigned Num);
216
  MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
217
                           unsigned InstStageNum);
218
  MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
219
                                    unsigned InstStageNum);
220
  void updateInstruction(MachineInstr *NewMI, bool LastDef,
221
                         unsigned CurStageNum, unsigned InstrStageNum,
222
                         ValueMapTy *VRMap);
223
  MachineInstr *findDefInLoop(unsigned Reg);
224
  unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
225
                         unsigned LoopStage, ValueMapTy *VRMap,
226
                         MachineBasicBlock *BB);
227
  void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
228
                        ValueMapTy *VRMap, InstrMapTy &InstrMap);
229
  void rewriteScheduledInstr(MachineBasicBlock *BB, InstrMapTy &InstrMap,
230
                             unsigned CurStageNum, unsigned PhiNum,
231
                             MachineInstr *Phi, unsigned OldReg,
232
                             unsigned NewReg, unsigned PrevReg = 0);
233
  bool isLoopCarried(MachineInstr &Phi);
234
 
235
  /// Return the max. number of stages/iterations that can occur between a
236
  /// register definition and its uses.
237
  unsigned getStagesForReg(int Reg, unsigned CurStage) {
238
    std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
239
    if ((int)CurStage > Schedule.getNumStages() - 1 && Stages.first == 0 &&
240
        Stages.second)
241
      return 1;
242
    return Stages.first;
243
  }
244
 
245
  /// The number of stages for a Phi is a little different than other
246
  /// instructions. The minimum value computed in RegToStageDiff is 1
247
  /// because we assume the Phi is needed for at least 1 iteration.
248
  /// This is not the case if the loop value is scheduled prior to the
249
  /// Phi in the same stage.  This function returns the number of stages
250
  /// or iterations needed between the Phi definition and any uses.
251
  unsigned getStagesForPhi(int Reg) {
252
    std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
253
    if (Stages.second)
254
      return Stages.first;
255
    return Stages.first - 1;
256
  }
257
 
258
public:
259
  /// Create a new ModuloScheduleExpander.
260
  /// \arg InstrChanges Modifications to make to instructions with memory
261
  ///   operands.
262
  /// FIXME: InstrChanges is opaque and is an implementation detail of an
263
  ///   optimization in MachinePipeliner that crosses abstraction boundaries.
264
  ModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S,
265
                         LiveIntervals &LIS, InstrChangesTy InstrChanges)
266
      : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
267
        TII(ST.getInstrInfo()), LIS(LIS),
268
        InstrChanges(std::move(InstrChanges)) {}
269
 
270
  /// Performs the actual expansion.
271
  void expand();
272
  /// Performs final cleanup after expansion.
273
  void cleanup();
274
 
275
  /// Returns the newly rewritten kernel block, or nullptr if this was
276
  /// optimized away.
277
  MachineBasicBlock *getRewrittenKernel() { return NewKernel; }
278
};
279
 
280
/// A reimplementation of ModuloScheduleExpander. It works by generating a
281
/// standalone kernel loop and peeling out the prologs and epilogs.
282
class PeelingModuloScheduleExpander {
283
public:
284
  PeelingModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S,
285
                                LiveIntervals *LIS)
286
      : Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
287
        TII(ST.getInstrInfo()), LIS(LIS) {}
288
 
289
  void expand();
290
 
291
  /// Runs ModuloScheduleExpander and treats it as a golden input to validate
292
  /// aspects of the code generated by PeelingModuloScheduleExpander.
293
  void validateAgainstModuloScheduleExpander();
294
 
295
protected:
296
  ModuloSchedule &Schedule;
297
  MachineFunction &MF;
298
  const TargetSubtargetInfo &ST;
299
  MachineRegisterInfo &MRI;
300
  const TargetInstrInfo *TII;
301
  LiveIntervals *LIS;
302
 
303
  /// The original loop block that gets rewritten in-place.
304
  MachineBasicBlock *BB;
305
  /// The original loop preheader.
306
  MachineBasicBlock *Preheader;
307
  /// All prolog and epilog blocks.
308
  SmallVector<MachineBasicBlock *, 4> Prologs, Epilogs;
309
  /// For every block, the stages that are produced.
310
  DenseMap<MachineBasicBlock *, BitVector> LiveStages;
311
  /// For every block, the stages that are available. A stage can be available
312
  /// but not produced (in the epilog) or produced but not available (in the
313
  /// prolog).
314
  DenseMap<MachineBasicBlock *, BitVector> AvailableStages;
315
  /// When peeling the epilogue keep track of the distance between the phi
316
  /// nodes and the kernel.
317
  DenseMap<MachineInstr *, unsigned> PhiNodeLoopIteration;
318
 
319
  /// CanonicalMIs and BlockMIs form a bidirectional map between any of the
320
  /// loop kernel clones.
321
  DenseMap<MachineInstr *, MachineInstr *> CanonicalMIs;
322
  DenseMap<std::pair<MachineBasicBlock *, MachineInstr *>, MachineInstr *>
323
      BlockMIs;
324
 
325
  /// State passed from peelKernel to peelPrologAndEpilogs().
326
  std::deque<MachineBasicBlock *> PeeledFront, PeeledBack;
327
  /// Illegal phis that need to be deleted once we re-link stages.
328
  SmallVector<MachineInstr *, 4> IllegalPhisToDelete;
329
 
330
  /// Converts BB from the original loop body to the rewritten, pipelined
331
  /// steady-state.
332
  void rewriteKernel();
333
 
334
  /// Peels one iteration of the rewritten kernel (BB) in the specified
335
  /// direction.
336
  MachineBasicBlock *peelKernel(LoopPeelDirection LPD);
337
  // Delete instructions whose stage is less than MinStage in the given basic
338
  // block.
339
  void filterInstructions(MachineBasicBlock *MB, int MinStage);
340
  // Move instructions of the given stage from sourceBB to DestBB. Remap the phi
341
  // instructions to keep a valid IR.
342
  void moveStageBetweenBlocks(MachineBasicBlock *DestBB,
343
                              MachineBasicBlock *SourceBB, unsigned Stage);
344
  /// Peel the kernel forwards and backwards to produce prologs and epilogs,
345
  /// and stitch them together.
346
  void peelPrologAndEpilogs();
347
  /// All prolog and epilog blocks are clones of the kernel, so any produced
348
  /// register in one block has an corollary in all other blocks.
349
  Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB);
350
  /// Change all users of MI, if MI is predicated out
351
  /// (LiveStages[MI->getParent()] == false).
352
  void rewriteUsesOf(MachineInstr *MI);
353
  /// Insert branches between prologs, kernel and epilogs.
354
  void fixupBranches();
355
  /// Create a poor-man's LCSSA by cloning only the PHIs from the kernel block
356
  /// to a block dominated by all prologs and epilogs. This allows us to treat
357
  /// the loop exiting block as any other kernel clone.
358
  MachineBasicBlock *CreateLCSSAExitingBlock();
359
  /// Helper to get the stage of an instruction in the schedule.
360
  unsigned getStage(MachineInstr *MI) {
361
    if (CanonicalMIs.count(MI))
362
      MI = CanonicalMIs[MI];
363
    return Schedule.getStage(MI);
364
  }
365
  /// Helper function to find the right canonical register for a phi instruction
366
  /// coming from a peeled out prologue.
367
  Register getPhiCanonicalReg(MachineInstr* CanonicalPhi, MachineInstr* Phi);
368
  /// Target loop info before kernel peeling.
369
  std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
370
};
371
 
372
/// Expander that simply annotates each scheduled instruction with a post-instr
373
/// symbol that can be consumed by the ModuloScheduleTest pass.
374
///
375
/// The post-instr symbol is a way of annotating an instruction that can be
376
/// roundtripped in MIR. The syntax is:
377
///   MYINST %0, post-instr-symbol <mcsymbol Stage-1_Cycle-5>
378
class ModuloScheduleTestAnnotater {
379
  MachineFunction &MF;
380
  ModuloSchedule &S;
381
 
382
public:
383
  ModuloScheduleTestAnnotater(MachineFunction &MF, ModuloSchedule &S)
384
      : MF(MF), S(S) {}
385
 
386
  /// Performs the annotation.
387
  void annotate();
388
};
389
 
390
} // end namespace llvm
391
 
392
#endif // LLVM_CODEGEN_MODULOSCHEDULE_H