Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line | 
|---|---|---|---|
| 14 | pmbaty | 1 | //===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===// | 
| 2 | // | ||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | ||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| 6 | // | ||
| 7 | //===----------------------------------------------------------------------===// | ||
| 8 | // | ||
| 9 | /// \file Implements the ScheduleDAGInstrs class, which implements scheduling | ||
| 10 | /// for a MachineInstr-based dependency graph. | ||
| 11 | // | ||
| 12 | //===----------------------------------------------------------------------===// | ||
| 13 | |||
| 14 | #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H | ||
| 15 | #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H | ||
| 16 | |||
| 17 | #include "llvm/ADT/DenseMap.h" | ||
| 18 | #include "llvm/ADT/PointerIntPair.h" | ||
| 19 | #include "llvm/ADT/SmallVector.h" | ||
| 20 | #include "llvm/ADT/SparseMultiSet.h" | ||
| 21 | #include "llvm/ADT/SparseSet.h" | ||
| 22 | #include "llvm/ADT/identity.h" | ||
| 23 | #include "llvm/CodeGen/LivePhysRegs.h" | ||
| 24 | #include "llvm/CodeGen/MachineBasicBlock.h" | ||
| 25 | #include "llvm/CodeGen/ScheduleDAG.h" | ||
| 26 | #include "llvm/CodeGen/TargetRegisterInfo.h" | ||
| 27 | #include "llvm/CodeGen/TargetSchedule.h" | ||
| 28 | #include "llvm/MC/LaneBitmask.h" | ||
| 29 | #include <cassert> | ||
| 30 | #include <cstdint> | ||
| 31 | #include <list> | ||
| 32 | #include <string> | ||
| 33 | #include <utility> | ||
| 34 | #include <vector> | ||
| 35 | |||
| 36 | namespace llvm { | ||
| 37 | |||
| 38 | class AAResults; | ||
| 39 | class LiveIntervals; | ||
| 40 | class MachineFrameInfo; | ||
| 41 | class MachineFunction; | ||
| 42 | class MachineInstr; | ||
| 43 | class MachineLoopInfo; | ||
| 44 | class MachineOperand; | ||
| 45 | struct MCSchedClassDesc; | ||
| 46 | class PressureDiffs; | ||
| 47 | class PseudoSourceValue; | ||
| 48 | class RegPressureTracker; | ||
| 49 | class UndefValue; | ||
| 50 | class Value; | ||
| 51 | |||
| 52 |   /// An individual mapping from virtual register number to SUnit. | ||
| 53 | struct VReg2SUnit { | ||
| 54 | unsigned VirtReg; | ||
| 55 |     LaneBitmask LaneMask; | ||
| 56 | SUnit *SU; | ||
| 57 | |||
| 58 | VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU) | ||
| 59 | : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {} | ||
| 60 | |||
| 61 | unsigned getSparseSetIndex() const { | ||
| 62 | return Register::virtReg2Index(VirtReg); | ||
| 63 |     } | ||
| 64 | }; | ||
| 65 | |||
| 66 |   /// Mapping from virtual register to SUnit including an operand index. | ||
| 67 | struct VReg2SUnitOperIdx : public VReg2SUnit { | ||
| 68 | unsigned OperandIndex; | ||
| 69 | |||
| 70 | VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask, | ||
| 71 | unsigned OperandIndex, SUnit *SU) | ||
| 72 | : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {} | ||
| 73 | }; | ||
| 74 | |||
| 75 |   /// Record a physical register access. | ||
| 76 |   /// For non-data-dependent uses, OpIdx == -1. | ||
| 77 | struct PhysRegSUOper { | ||
| 78 | SUnit *SU; | ||
| 79 | int OpIdx; | ||
| 80 | unsigned Reg; | ||
| 81 | |||
| 82 | PhysRegSUOper(SUnit *su, int op, unsigned R): SU(su), OpIdx(op), Reg(R) {} | ||
| 83 | |||
| 84 | unsigned getSparseSetIndex() const { return Reg; } | ||
| 85 | }; | ||
| 86 | |||
| 87 |   /// Use a SparseMultiSet to track physical registers. Storage is only | ||
| 88 |   /// allocated once for the pass. It can be cleared in constant time and reused | ||
| 89 |   /// without any frees. | ||
| 90 | using Reg2SUnitsMap = | ||
| 91 | SparseMultiSet<PhysRegSUOper, identity<unsigned>, uint16_t>; | ||
| 92 | |||
| 93 |   /// Use SparseSet as a SparseMap by relying on the fact that it never | ||
| 94 |   /// compares ValueT's, only unsigned keys. This allows the set to be cleared | ||
| 95 |   /// between scheduling regions in constant time as long as ValueT does not | ||
| 96 |   /// require a destructor. | ||
| 97 | using VReg2SUnitMap = SparseSet<VReg2SUnit, VirtReg2IndexFunctor>; | ||
| 98 | |||
| 99 |   /// Track local uses of virtual registers. These uses are gathered by the DAG | ||
| 100 |   /// builder and may be consulted by the scheduler to avoid iterating an entire | ||
| 101 |   /// vreg use list. | ||
| 102 | using VReg2SUnitMultiMap = SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor>; | ||
| 103 | |||
| 104 | using VReg2SUnitOperIdxMultiMap = | ||
| 105 | SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>; | ||
| 106 | |||
| 107 | using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>; | ||
| 108 | |||
| 109 | struct UnderlyingObject : PointerIntPair<ValueType, 1, bool> { | ||
| 110 | UnderlyingObject(ValueType V, bool MayAlias) | ||
| 111 | : PointerIntPair<ValueType, 1, bool>(V, MayAlias) {} | ||
| 112 | |||
| 113 | ValueType getValue() const { return getPointer(); } | ||
| 114 | bool mayAlias() const { return getInt(); } | ||
| 115 | }; | ||
| 116 | |||
| 117 | using UnderlyingObjectsVector = SmallVector<UnderlyingObject, 4>; | ||
| 118 | |||
| 119 |   /// A ScheduleDAG for scheduling lists of MachineInstr. | ||
| 120 | class ScheduleDAGInstrs : public ScheduleDAG { | ||
| 121 | protected: | ||
| 122 | const MachineLoopInfo *MLI; | ||
| 123 | const MachineFrameInfo &MFI; | ||
| 124 | |||
| 125 |     /// TargetSchedModel provides an interface to the machine model. | ||
| 126 |     TargetSchedModel SchedModel; | ||
| 127 | |||
| 128 |     /// True if the DAG builder should remove kill flags (in preparation for | ||
| 129 |     /// rescheduling). | ||
| 130 | bool RemoveKillFlags; | ||
| 131 | |||
| 132 |     /// The standard DAG builder does not normally include terminators as DAG | ||
| 133 |     /// nodes because it does not create the necessary dependencies to prevent | ||
| 134 |     /// reordering. A specialized scheduler can override | ||
| 135 |     /// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate | ||
| 136 |     /// it has taken responsibility for scheduling the terminator correctly. | ||
| 137 | bool CanHandleTerminators = false; | ||
| 138 | |||
| 139 |     /// Whether lane masks should get tracked. | ||
| 140 | bool TrackLaneMasks = false; | ||
| 141 | |||
| 142 |     // State specific to the current scheduling region. | ||
| 143 |     // ------------------------------------------------ | ||
| 144 | |||
| 145 |     /// The block in which to insert instructions | ||
| 146 | MachineBasicBlock *BB; | ||
| 147 | |||
| 148 |     /// The beginning of the range to be scheduled. | ||
| 149 | MachineBasicBlock::iterator RegionBegin; | ||
| 150 | |||
| 151 |     /// The end of the range to be scheduled. | ||
| 152 | MachineBasicBlock::iterator RegionEnd; | ||
| 153 | |||
| 154 |     /// Instructions in this region (distance(RegionBegin, RegionEnd)). | ||
| 155 | unsigned NumRegionInstrs; | ||
| 156 | |||
| 157 |     /// After calling BuildSchedGraph, each machine instruction in the current | ||
| 158 |     /// scheduling region is mapped to an SUnit. | ||
| 159 | DenseMap<MachineInstr*, SUnit*> MISUnitMap; | ||
| 160 | |||
| 161 |     // State internal to DAG building. | ||
| 162 |     // ------------------------------- | ||
| 163 | |||
| 164 |     /// Defs, Uses - Remember where defs and uses of each register are as we | ||
| 165 |     /// iterate upward through the instructions. This is allocated here instead | ||
| 166 |     /// of inside BuildSchedGraph to avoid the need for it to be initialized and | ||
| 167 |     /// destructed for each block. | ||
| 168 |     Reg2SUnitsMap Defs; | ||
| 169 |     Reg2SUnitsMap Uses; | ||
| 170 | |||
| 171 |     /// Tracks the last instruction(s) in this region defining each virtual | ||
| 172 |     /// register. There may be multiple current definitions for a register with | ||
| 173 |     /// disjunct lanemasks. | ||
| 174 |     VReg2SUnitMultiMap CurrentVRegDefs; | ||
| 175 |     /// Tracks the last instructions in this region using each virtual register. | ||
| 176 |     VReg2SUnitOperIdxMultiMap CurrentVRegUses; | ||
| 177 | |||
| 178 | AAResults *AAForDep = nullptr; | ||
| 179 | |||
| 180 |     /// Remember a generic side-effecting instruction as we proceed. | ||
| 181 |     /// No other SU ever gets scheduled around it (except in the special | ||
| 182 |     /// case of a huge region that gets reduced). | ||
| 183 | SUnit *BarrierChain = nullptr; | ||
| 184 | |||
| 185 | public: | ||
| 186 |     /// A list of SUnits, used in Value2SUsMap, during DAG construction. | ||
| 187 |     /// Note: to gain speed it might be worth investigating an optimized | ||
| 188 |     /// implementation of this data structure, such as a singly linked list | ||
| 189 |     /// with a memory pool (SmallVector was tried but slow and SparseSet is not | ||
| 190 |     /// applicable). | ||
| 191 | using SUList = std::list<SUnit *>; | ||
| 192 | |||
| 193 | protected: | ||
| 194 |     /// A map from ValueType to SUList, used during DAG construction, as | ||
| 195 |     /// a means of remembering which SUs depend on which memory locations. | ||
| 196 | class Value2SUsMap; | ||
| 197 | |||
| 198 |     /// Reduces maps in FIFO order, by N SUs. This is better than turning | ||
| 199 |     /// every Nth memory SU into BarrierChain in buildSchedGraph(), since | ||
| 200 |     /// it avoids unnecessary edges between seen SUs above the new BarrierChain, | ||
| 201 |     /// and those below it. | ||
| 202 | void reduceHugeMemNodeMaps(Value2SUsMap &stores, | ||
| 203 | Value2SUsMap &loads, unsigned N); | ||
| 204 | |||
| 205 |     /// Adds a chain edge between SUa and SUb, but only if both | ||
| 206 |     /// AAResults and Target fail to deny the dependency. | ||
| 207 | void addChainDependency(SUnit *SUa, SUnit *SUb, | ||
| 208 | unsigned Latency = 0); | ||
| 209 | |||
| 210 |     /// Adds dependencies as needed from all SUs in list to SU. | ||
| 211 | void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency) { | ||
| 212 | for (SUnit *Entry : SUs) | ||
| 213 | addChainDependency(SU, Entry, Latency); | ||
| 214 |     } | ||
| 215 | |||
| 216 |     /// Adds dependencies as needed from all SUs in map, to SU. | ||
| 217 | void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap); | ||
| 218 | |||
| 219 |     /// Adds dependencies as needed to SU, from all SUs mapped to V. | ||
| 220 | void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap, | ||
| 221 | ValueType V); | ||
| 222 | |||
| 223 |     /// Adds barrier chain edges from all SUs in map, and then clear the map. | ||
| 224 |     /// This is equivalent to insertBarrierChain(), but optimized for the common | ||
| 225 |     /// case where the new BarrierChain (a global memory object) has a higher | ||
| 226 |     /// NodeNum than all SUs in map. It is assumed BarrierChain has been set | ||
| 227 |     /// before calling this. | ||
| 228 | void addBarrierChain(Value2SUsMap &map); | ||
| 229 | |||
| 230 |     /// Inserts a barrier chain in a huge region, far below current SU. | ||
| 231 |     /// Adds barrier chain edges from all SUs in map with higher NodeNums than | ||
| 232 |     /// this new BarrierChain, and remove them from map. It is assumed | ||
| 233 |     /// BarrierChain has been set before calling this. | ||
| 234 | void insertBarrierChain(Value2SUsMap &map); | ||
| 235 | |||
| 236 |     /// For an unanalyzable memory access, this Value is used in maps. | ||
| 237 | UndefValue *UnknownValue; | ||
| 238 | |||
| 239 | |||
| 240 |     /// Topo - A topological ordering for SUnits which permits fast IsReachable | ||
| 241 |     /// and similar queries. | ||
| 242 |     ScheduleDAGTopologicalSort Topo; | ||
| 243 | |||
| 244 | using DbgValueVector = | ||
| 245 | std::vector<std::pair<MachineInstr *, MachineInstr *>>; | ||
| 246 |     /// Remember instruction that precedes DBG_VALUE. | ||
| 247 |     /// These are generated by buildSchedGraph but persist so they can be | ||
| 248 |     /// referenced when emitting the final schedule. | ||
| 249 |     DbgValueVector DbgValues; | ||
| 250 | MachineInstr *FirstDbgValue = nullptr; | ||
| 251 | |||
| 252 |     /// Set of live physical registers for updating kill flags. | ||
| 253 |     LivePhysRegs LiveRegs; | ||
| 254 | |||
| 255 | public: | ||
| 256 | explicit ScheduleDAGInstrs(MachineFunction &mf, | ||
| 257 | const MachineLoopInfo *mli, | ||
| 258 | bool RemoveKillFlags = false); | ||
| 259 | |||
| 260 | ~ScheduleDAGInstrs() override = default; | ||
| 261 | |||
| 262 |     /// Gets the machine model for instruction scheduling. | ||
| 263 | const TargetSchedModel *getSchedModel() const { return &SchedModel; } | ||
| 264 | |||
| 265 |     /// Resolves and cache a resolved scheduling class for an SUnit. | ||
| 266 | const MCSchedClassDesc *getSchedClass(SUnit *SU) const { | ||
| 267 | if (!SU->SchedClass && SchedModel.hasInstrSchedModel()) | ||
| 268 | SU->SchedClass = SchedModel.resolveSchedClass(SU->getInstr()); | ||
| 269 | return SU->SchedClass; | ||
| 270 |     } | ||
| 271 | |||
| 272 |     /// IsReachable - Checks if SU is reachable from TargetSU. | ||
| 273 | bool IsReachable(SUnit *SU, SUnit *TargetSU) { | ||
| 274 | return Topo.IsReachable(SU, TargetSU); | ||
| 275 |     } | ||
| 276 | |||
| 277 |     /// Returns an iterator to the top of the current scheduling region. | ||
| 278 | MachineBasicBlock::iterator begin() const { return RegionBegin; } | ||
| 279 | |||
| 280 |     /// Returns an iterator to the bottom of the current scheduling region. | ||
| 281 | MachineBasicBlock::iterator end() const { return RegionEnd; } | ||
| 282 | |||
| 283 |     /// Creates a new SUnit and return a ptr to it. | ||
| 284 | SUnit *newSUnit(MachineInstr *MI); | ||
| 285 | |||
| 286 |     /// Returns an existing SUnit for this MI, or nullptr. | ||
| 287 | SUnit *getSUnit(MachineInstr *MI) const; | ||
| 288 | |||
| 289 |     /// If this method returns true, handling of the scheduling regions | ||
| 290 |     /// themselves (in case of a scheduling boundary in MBB) will be done | ||
| 291 |     /// beginning with the topmost region of MBB. | ||
| 292 | virtual bool doMBBSchedRegionsTopDown() const { return false; } | ||
| 293 | |||
| 294 |     /// Prepares to perform scheduling in the given block. | ||
| 295 | virtual void startBlock(MachineBasicBlock *BB); | ||
| 296 | |||
| 297 |     /// Cleans up after scheduling in the given block. | ||
| 298 | virtual void finishBlock(); | ||
| 299 | |||
| 300 |     /// Initialize the DAG and common scheduler state for a new | ||
| 301 |     /// scheduling region. This does not actually create the DAG, only clears | ||
| 302 |     /// it. The scheduling driver may call BuildSchedGraph multiple times per | ||
| 303 |     /// scheduling region. | ||
| 304 | virtual void enterRegion(MachineBasicBlock *bb, | ||
| 305 | MachineBasicBlock::iterator begin, | ||
| 306 | MachineBasicBlock::iterator end, | ||
| 307 | unsigned regioninstrs); | ||
| 308 | |||
| 309 |     /// Called when the scheduler has finished scheduling the current region. | ||
| 310 | virtual void exitRegion(); | ||
| 311 | |||
| 312 |     /// Builds SUnits for the current region. | ||
| 313 |     /// If \p RPTracker is non-null, compute register pressure as a side effect. | ||
| 314 |     /// The DAG builder is an efficient place to do it because it already visits | ||
| 315 |     /// operands. | ||
| 316 | void buildSchedGraph(AAResults *AA, | ||
| 317 | RegPressureTracker *RPTracker = nullptr, | ||
| 318 | PressureDiffs *PDiffs = nullptr, | ||
| 319 | LiveIntervals *LIS = nullptr, | ||
| 320 | bool TrackLaneMasks = false); | ||
| 321 | |||
| 322 |     /// Adds dependencies from instructions in the current list of | ||
| 323 |     /// instructions being scheduled to scheduling barrier. We want to make sure | ||
| 324 |     /// instructions which define registers that are either used by the | ||
| 325 |     /// terminator or are live-out are properly scheduled. This is especially | ||
| 326 |     /// important when the definition latency of the return value(s) are too | ||
| 327 |     /// high to be hidden by the branch or when the liveout registers used by | ||
| 328 |     /// instructions in the fallthrough block. | ||
| 329 | void addSchedBarrierDeps(); | ||
| 330 | |||
| 331 |     /// Orders nodes according to selected style. | ||
| 332 |     /// | ||
| 333 |     /// Typically, a scheduling algorithm will implement schedule() without | ||
| 334 |     /// overriding enterRegion() or exitRegion(). | ||
| 335 | virtual void schedule() = 0; | ||
| 336 | |||
| 337 |     /// Allow targets to perform final scheduling actions at the level of the | ||
| 338 |     /// whole MachineFunction. By default does nothing. | ||
| 339 | virtual void finalizeSchedule() {} | ||
| 340 | |||
| 341 | void dumpNode(const SUnit &SU) const override; | ||
| 342 | void dump() const override; | ||
| 343 | |||
| 344 |     /// Returns a label for a DAG node that points to an instruction. | ||
| 345 | std::string getGraphNodeLabel(const SUnit *SU) const override; | ||
| 346 | |||
| 347 |     /// Returns a label for the region of code covered by the DAG. | ||
| 348 | std::string getDAGName() const override; | ||
| 349 | |||
| 350 |     /// Fixes register kill flags that scheduling has made invalid. | ||
| 351 | void fixupKills(MachineBasicBlock &MBB); | ||
| 352 | |||
| 353 |     /// True if an edge can be added from PredSU to SuccSU without creating | ||
| 354 |     /// a cycle. | ||
| 355 | bool canAddEdge(SUnit *SuccSU, SUnit *PredSU); | ||
| 356 | |||
| 357 |     /// Add a DAG edge to the given SU with the given predecessor | ||
| 358 |     /// dependence data. | ||
| 359 |     /// | ||
| 360 |     /// \returns true if the edge may be added without creating a cycle OR if an | ||
| 361 |     /// equivalent edge already existed (false indicates failure). | ||
| 362 | bool addEdge(SUnit *SuccSU, const SDep &PredDep); | ||
| 363 | |||
| 364 | protected: | ||
| 365 | void initSUnits(); | ||
| 366 | void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx); | ||
| 367 | void addPhysRegDeps(SUnit *SU, unsigned OperIdx); | ||
| 368 | void addVRegDefDeps(SUnit *SU, unsigned OperIdx); | ||
| 369 | void addVRegUseDeps(SUnit *SU, unsigned OperIdx); | ||
| 370 | |||
| 371 |     /// Returns a mask for which lanes get read/written by the given (register) | ||
| 372 |     /// machine operand. | ||
| 373 | LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const; | ||
| 374 | |||
| 375 |     /// Returns true if the def register in \p MO has no uses. | ||
| 376 | bool deadDefHasNoUse(const MachineOperand &MO); | ||
| 377 | }; | ||
| 378 | |||
| 379 |   /// Creates a new SUnit and return a ptr to it. | ||
| 380 | inline SUnit *ScheduleDAGInstrs::newSUnit(MachineInstr *MI) { | ||
| 381 | #ifndef NDEBUG | ||
| 382 | const SUnit *Addr = SUnits.empty() ? nullptr : &SUnits[0]; | ||
| 383 | #endif | ||
| 384 | SUnits.emplace_back(MI, (unsigned)SUnits.size()); | ||
| 385 | assert((Addr == nullptr || Addr == &SUnits[0]) && | ||
| 386 | "SUnits std::vector reallocated on the fly!"); | ||
| 387 | return &SUnits.back(); | ||
| 388 |   } | ||
| 389 | |||
| 390 |   /// Returns an existing SUnit for this MI, or nullptr. | ||
| 391 | inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const { | ||
| 392 | return MISUnitMap.lookup(MI); | ||
| 393 |   } | ||
| 394 | |||
| 395 | } // end namespace llvm | ||
| 396 | |||
| 397 | #endif // LLVM_CODEGEN_SCHEDULEDAGINSTRS_H |