Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | //===- GenericUniformAnalysis.cpp --------------------*- C++ -*------------===// |
| 2 | // |
||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 6 | // |
||
| 7 | //===----------------------------------------------------------------------===// |
||
| 8 | // |
||
| 9 | // This template implementation resides in a separate file so that it |
||
| 10 | // does not get injected into every .cpp file that includes the |
||
| 11 | // generic header. |
||
| 12 | // |
||
| 13 | // DO NOT INCLUDE THIS FILE WHEN MERELY USING UNIFORMITYINFO. |
||
| 14 | // |
||
| 15 | // This file should only be included by files that implement a |
||
| 16 | // specialization of the relvant templates. Currently these are: |
||
| 17 | // - UniformityAnalysis.cpp |
||
| 18 | // |
||
| 19 | // Note: The DEBUG_TYPE macro should be defined before using this |
||
| 20 | // file so that any use of LLVM_DEBUG is associated with the |
||
| 21 | // including file rather than this file. |
||
| 22 | // |
||
| 23 | //===----------------------------------------------------------------------===// |
||
| 24 | /// |
||
| 25 | /// \file |
||
| 26 | /// \brief Implementation of uniformity analysis. |
||
| 27 | /// |
||
| 28 | /// The algorithm is a fixed point iteration that starts with the assumption |
||
| 29 | /// that all control flow and all values are uniform. Starting from sources of |
||
| 30 | /// divergence (whose discovery must be implemented by a CFG- or even |
||
| 31 | /// target-specific derived class), divergence of values is propagated from |
||
| 32 | /// definition to uses in a straight-forward way. The main complexity lies in |
||
| 33 | /// the propagation of the impact of divergent control flow on the divergence of |
||
| 34 | /// values (sync dependencies). |
||
| 35 | /// |
||
| 36 | //===----------------------------------------------------------------------===// |
||
| 37 | |||
| 38 | #ifndef LLVM_ADT_GENERICUNIFORMITYIMPL_H |
||
| 39 | #define LLVM_ADT_GENERICUNIFORMITYIMPL_H |
||
| 40 | |||
| 41 | #include "llvm/ADT/GenericUniformityInfo.h" |
||
| 42 | |||
| 43 | #include "llvm/ADT/SmallPtrSet.h" |
||
| 44 | #include "llvm/ADT/SparseBitVector.h" |
||
| 45 | #include "llvm/ADT/StringExtras.h" |
||
| 46 | #include "llvm/Support/raw_ostream.h" |
||
| 47 | |||
| 48 | #include <set> |
||
| 49 | |||
| 50 | #define DEBUG_TYPE "uniformity" |
||
| 51 | |||
| 52 | using namespace llvm; |
||
| 53 | |||
| 54 | namespace llvm { |
||
| 55 | |||
| 56 | template <typename Range> auto unique(Range &&R) { |
||
| 57 | return std::unique(adl_begin(R), adl_end(R)); |
||
| 58 | } |
||
| 59 | |||
| 60 | /// Construct a specially modified post-order traversal of cycles. |
||
| 61 | /// |
||
| 62 | /// The ModifiedPO is contructed using a virtually modified CFG as follows: |
||
| 63 | /// |
||
| 64 | /// 1. The successors of pre-entry nodes (predecessors of an cycle |
||
| 65 | /// entry that are outside the cycle) are replaced by the |
||
| 66 | /// successors of the successors of the header. |
||
| 67 | /// 2. Successors of the cycle header are replaced by the exit blocks |
||
| 68 | /// of the cycle. |
||
| 69 | /// |
||
| 70 | /// Effectively, we produce a depth-first numbering with the following |
||
| 71 | /// properties: |
||
| 72 | /// |
||
| 73 | /// 1. Nodes after a cycle are numbered earlier than the cycle header. |
||
| 74 | /// 2. The header is numbered earlier than the nodes in the cycle. |
||
| 75 | /// 3. The numbering of the nodes within the cycle forms an interval |
||
| 76 | /// starting with the header. |
||
| 77 | /// |
||
| 78 | /// Effectively, the virtual modification arranges the nodes in a |
||
| 79 | /// cycle as a DAG with the header as the sole leaf, and successors of |
||
| 80 | /// the header as the roots. A reverse traversal of this numbering has |
||
| 81 | /// the following invariant on the unmodified original CFG: |
||
| 82 | /// |
||
| 83 | /// Each node is visited after all its predecessors, except if that |
||
| 84 | /// predecessor is the cycle header. |
||
| 85 | /// |
||
| 86 | template <typename ContextT> class ModifiedPostOrder { |
||
| 87 | public: |
||
| 88 | using BlockT = typename ContextT::BlockT; |
||
| 89 | using FunctionT = typename ContextT::FunctionT; |
||
| 90 | using DominatorTreeT = typename ContextT::DominatorTreeT; |
||
| 91 | |||
| 92 | using CycleInfoT = GenericCycleInfo<ContextT>; |
||
| 93 | using CycleT = typename CycleInfoT::CycleT; |
||
| 94 | using const_iterator = typename std::vector<BlockT *>::const_iterator; |
||
| 95 | |||
| 96 | ModifiedPostOrder(const ContextT &C) : Context(C) {} |
||
| 97 | |||
| 98 | bool empty() const { return m_order.empty(); } |
||
| 99 | size_t size() const { return m_order.size(); } |
||
| 100 | |||
| 101 | void clear() { m_order.clear(); } |
||
| 102 | void compute(const CycleInfoT &CI); |
||
| 103 | |||
| 104 | unsigned count(BlockT *BB) const { return POIndex.count(BB); } |
||
| 105 | const BlockT *operator[](size_t idx) const { return m_order[idx]; } |
||
| 106 | |||
| 107 | void appendBlock(const BlockT &BB, bool isReducibleCycleHeader = false) { |
||
| 108 | POIndex[&BB] = m_order.size(); |
||
| 109 | m_order.push_back(&BB); |
||
| 110 | LLVM_DEBUG(dbgs() << "ModifiedPO(" << POIndex[&BB] |
||
| 111 | << "): " << Context.print(&BB) << "\n"); |
||
| 112 | if (isReducibleCycleHeader) |
||
| 113 | ReducibleCycleHeaders.insert(&BB); |
||
| 114 | } |
||
| 115 | |||
| 116 | unsigned getIndex(const BlockT *BB) const { |
||
| 117 | assert(POIndex.count(BB)); |
||
| 118 | return POIndex.lookup(BB); |
||
| 119 | } |
||
| 120 | |||
| 121 | bool isReducibleCycleHeader(const BlockT *BB) const { |
||
| 122 | return ReducibleCycleHeaders.contains(BB); |
||
| 123 | } |
||
| 124 | |||
| 125 | private: |
||
| 126 | SmallVector<const BlockT *> m_order; |
||
| 127 | DenseMap<const BlockT *, unsigned> POIndex; |
||
| 128 | SmallPtrSet<const BlockT *, 32> ReducibleCycleHeaders; |
||
| 129 | const ContextT &Context; |
||
| 130 | |||
| 131 | void computeCyclePO(const CycleInfoT &CI, const CycleT *Cycle, |
||
| 132 | SmallPtrSetImpl<BlockT *> &Finalized); |
||
| 133 | |||
| 134 | void computeStackPO(SmallVectorImpl<BlockT *> &Stack, const CycleInfoT &CI, |
||
| 135 | const CycleT *Cycle, |
||
| 136 | SmallPtrSetImpl<BlockT *> &Finalized); |
||
| 137 | }; |
||
| 138 | |||
| 139 | template <typename> class DivergencePropagator; |
||
| 140 | |||
| 141 | /// \class GenericSyncDependenceAnalysis |
||
| 142 | /// |
||
| 143 | /// \brief Locate join blocks for disjoint paths starting at a divergent branch. |
||
| 144 | /// |
||
| 145 | /// An analysis per divergent branch that returns the set of basic |
||
| 146 | /// blocks whose phi nodes become divergent due to divergent control. |
||
| 147 | /// These are the blocks that are reachable by two disjoint paths from |
||
| 148 | /// the branch, or cycle exits reachable along a path that is disjoint |
||
| 149 | /// from a path to the cycle latch. |
||
| 150 | |||
| 151 | // --- Above line is not a doxygen comment; intentionally left blank --- |
||
| 152 | // |
||
| 153 | // Originally implemented in SyncDependenceAnalysis.cpp for DivergenceAnalysis. |
||
| 154 | // |
||
| 155 | // The SyncDependenceAnalysis is used in the UniformityAnalysis to model |
||
| 156 | // control-induced divergence in phi nodes. |
||
| 157 | // |
||
| 158 | // -- Reference -- |
||
| 159 | // The algorithm is an extension of Section 5 of |
||
| 160 | // |
||
| 161 | // An abstract interpretation for SPMD divergence |
||
| 162 | // on reducible control flow graphs. |
||
| 163 | // Julian Rosemann, Simon Moll and Sebastian Hack |
||
| 164 | // POPL '21 |
||
| 165 | // |
||
| 166 | // |
||
| 167 | // -- Sync dependence -- |
||
| 168 | // Sync dependence characterizes the control flow aspect of the |
||
| 169 | // propagation of branch divergence. For example, |
||
| 170 | // |
||
| 171 | // %cond = icmp slt i32 %tid, 10 |
||
| 172 | // br i1 %cond, label %then, label %else |
||
| 173 | // then: |
||
| 174 | // br label %merge |
||
| 175 | // else: |
||
| 176 | // br label %merge |
||
| 177 | // merge: |
||
| 178 | // %a = phi i32 [ 0, %then ], [ 1, %else ] |
||
| 179 | // |
||
| 180 | // Suppose %tid holds the thread ID. Although %a is not data dependent on %tid |
||
| 181 | // because %tid is not on its use-def chains, %a is sync dependent on %tid |
||
| 182 | // because the branch "br i1 %cond" depends on %tid and affects which value %a |
||
| 183 | // is assigned to. |
||
| 184 | // |
||
| 185 | // |
||
| 186 | // -- Reduction to SSA construction -- |
||
| 187 | // There are two disjoint paths from A to X, if a certain variant of SSA |
||
| 188 | // construction places a phi node in X under the following set-up scheme. |
||
| 189 | // |
||
| 190 | // This variant of SSA construction ignores incoming undef values. |
||
| 191 | // That is paths from the entry without a definition do not result in |
||
| 192 | // phi nodes. |
||
| 193 | // |
||
| 194 | // entry |
||
| 195 | // / \ |
||
| 196 | // A \ |
||
| 197 | // / \ Y |
||
| 198 | // B C / |
||
| 199 | // \ / \ / |
||
| 200 | // D E |
||
| 201 | // \ / |
||
| 202 | // F |
||
| 203 | // |
||
| 204 | // Assume that A contains a divergent branch. We are interested |
||
| 205 | // in the set of all blocks where each block is reachable from A |
||
| 206 | // via two disjoint paths. This would be the set {D, F} in this |
||
| 207 | // case. |
||
| 208 | // To generally reduce this query to SSA construction we introduce |
||
| 209 | // a virtual variable x and assign to x different values in each |
||
| 210 | // successor block of A. |
||
| 211 | // |
||
| 212 | // entry |
||
| 213 | // / \ |
||
| 214 | // A \ |
||
| 215 | // / \ Y |
||
| 216 | // x = 0 x = 1 / |
||
| 217 | // \ / \ / |
||
| 218 | // D E |
||
| 219 | // \ / |
||
| 220 | // F |
||
| 221 | // |
||
| 222 | // Our flavor of SSA construction for x will construct the following |
||
| 223 | // |
||
| 224 | // entry |
||
| 225 | // / \ |
||
| 226 | // A \ |
||
| 227 | // / \ Y |
||
| 228 | // x0 = 0 x1 = 1 / |
||
| 229 | // \ / \ / |
||
| 230 | // x2 = phi E |
||
| 231 | // \ / |
||
| 232 | // x3 = phi |
||
| 233 | // |
||
| 234 | // The blocks D and F contain phi nodes and are thus each reachable |
||
| 235 | // by two disjoins paths from A. |
||
| 236 | // |
||
| 237 | // -- Remarks -- |
||
| 238 | // * In case of cycle exits we need to check for temporal divergence. |
||
| 239 | // To this end, we check whether the definition of x differs between the |
||
| 240 | // cycle exit and the cycle header (_after_ SSA construction). |
||
| 241 | // |
||
| 242 | // * In the presence of irreducible control flow, the fixed point is |
||
| 243 | // reached only after multiple iterations. This is because labels |
||
| 244 | // reaching the header of a cycle must be repropagated through the |
||
| 245 | // cycle. This is true even in a reducible cycle, since the labels |
||
| 246 | // may have been produced by a nested irreducible cycle. |
||
| 247 | // |
||
| 248 | // * Note that SyncDependenceAnalysis is not concerned with the points |
||
| 249 | // of convergence in an irreducible cycle. It's only purpose is to |
||
| 250 | // identify join blocks. The "diverged entry" criterion is |
||
| 251 | // separately applied on join blocks to determine if an entire |
||
| 252 | // irreducible cycle is assumed to be divergent. |
||
| 253 | // |
||
| 254 | // * Relevant related work: |
||
| 255 | // A simple algorithm for global data flow analysis problems. |
||
| 256 | // Matthew S. Hecht and Jeffrey D. Ullman. |
||
| 257 | // SIAM Journal on Computing, 4(4):519–532, December 1975. |
||
| 258 | // |
||
| 259 | template <typename ContextT> class GenericSyncDependenceAnalysis { |
||
| 260 | public: |
||
| 261 | using BlockT = typename ContextT::BlockT; |
||
| 262 | using DominatorTreeT = typename ContextT::DominatorTreeT; |
||
| 263 | using FunctionT = typename ContextT::FunctionT; |
||
| 264 | using ValueRefT = typename ContextT::ValueRefT; |
||
| 265 | using InstructionT = typename ContextT::InstructionT; |
||
| 266 | |||
| 267 | using CycleInfoT = GenericCycleInfo<ContextT>; |
||
| 268 | using CycleT = typename CycleInfoT::CycleT; |
||
| 269 | |||
| 270 | using ConstBlockSet = SmallPtrSet<const BlockT *, 4>; |
||
| 271 | using ModifiedPO = ModifiedPostOrder<ContextT>; |
||
| 272 | |||
| 273 | // * if BlockLabels[B] == C then C is the dominating definition at |
||
| 274 | // block B |
||
| 275 | // * if BlockLabels[B] == nullptr then we haven't seen B yet |
||
| 276 | // * if BlockLabels[B] == B then: |
||
| 277 | // - B is a join point of disjoint paths from X, or, |
||
| 278 | // - B is an immediate successor of X (initial value), or, |
||
| 279 | // - B is X |
||
| 280 | using BlockLabelMap = DenseMap<const BlockT *, const BlockT *>; |
||
| 281 | |||
| 282 | /// Information discovered by the sync dependence analysis for each |
||
| 283 | /// divergent branch. |
||
| 284 | struct DivergenceDescriptor { |
||
| 285 | // Join points of diverged paths. |
||
| 286 | ConstBlockSet JoinDivBlocks; |
||
| 287 | // Divergent cycle exits |
||
| 288 | ConstBlockSet CycleDivBlocks; |
||
| 289 | // Labels assigned to blocks on diverged paths. |
||
| 290 | BlockLabelMap BlockLabels; |
||
| 291 | }; |
||
| 292 | |||
| 293 | using DivergencePropagatorT = DivergencePropagator<ContextT>; |
||
| 294 | |||
| 295 | GenericSyncDependenceAnalysis(const ContextT &Context, |
||
| 296 | const DominatorTreeT &DT, const CycleInfoT &CI); |
||
| 297 | |||
| 298 | /// \brief Computes divergent join points and cycle exits caused by branch |
||
| 299 | /// divergence in \p Term. |
||
| 300 | /// |
||
| 301 | /// This returns a pair of sets: |
||
| 302 | /// * The set of blocks which are reachable by disjoint paths from |
||
| 303 | /// \p Term. |
||
| 304 | /// * The set also contains cycle exits if there two disjoint paths: |
||
| 305 | /// one from \p Term to the cycle exit and another from \p Term to |
||
| 306 | /// the cycle header. |
||
| 307 | const DivergenceDescriptor &getJoinBlocks(const BlockT *DivTermBlock); |
||
| 308 | |||
| 309 | private: |
||
| 310 | static DivergenceDescriptor EmptyDivergenceDesc; |
||
| 311 | |||
| 312 | ModifiedPO CyclePO; |
||
| 313 | |||
| 314 | const DominatorTreeT &DT; |
||
| 315 | const CycleInfoT &CI; |
||
| 316 | |||
| 317 | DenseMap<const BlockT *, std::unique_ptr<DivergenceDescriptor>> |
||
| 318 | CachedControlDivDescs; |
||
| 319 | }; |
||
| 320 | |||
| 321 | /// \brief Analysis that identifies uniform values in a data-parallel |
||
| 322 | /// execution. |
||
| 323 | /// |
||
| 324 | /// This analysis propagates divergence in a data-parallel context |
||
| 325 | /// from sources of divergence to all users. It can be instantiated |
||
| 326 | /// for an IR that provides a suitable SSAContext. |
||
| 327 | template <typename ContextT> class GenericUniformityAnalysisImpl { |
||
| 328 | public: |
||
| 329 | using BlockT = typename ContextT::BlockT; |
||
| 330 | using FunctionT = typename ContextT::FunctionT; |
||
| 331 | using ValueRefT = typename ContextT::ValueRefT; |
||
| 332 | using ConstValueRefT = typename ContextT::ConstValueRefT; |
||
| 333 | using InstructionT = typename ContextT::InstructionT; |
||
| 334 | using DominatorTreeT = typename ContextT::DominatorTreeT; |
||
| 335 | |||
| 336 | using CycleInfoT = GenericCycleInfo<ContextT>; |
||
| 337 | using CycleT = typename CycleInfoT::CycleT; |
||
| 338 | |||
| 339 | using SyncDependenceAnalysisT = GenericSyncDependenceAnalysis<ContextT>; |
||
| 340 | using DivergenceDescriptorT = |
||
| 341 | typename SyncDependenceAnalysisT::DivergenceDescriptor; |
||
| 342 | using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap; |
||
| 343 | |||
| 344 | GenericUniformityAnalysisImpl(const FunctionT &F, const DominatorTreeT &DT, |
||
| 345 | const CycleInfoT &CI, |
||
| 346 | const TargetTransformInfo *TTI) |
||
| 347 | : Context(CI.getSSAContext()), F(F), CI(CI), TTI(TTI), DT(DT), |
||
| 348 | SDA(Context, DT, CI) {} |
||
| 349 | |||
| 350 | void initialize(); |
||
| 351 | |||
| 352 | const FunctionT &getFunction() const { return F; } |
||
| 353 | |||
| 354 | /// \brief Mark \p UniVal as a value that is always uniform. |
||
| 355 | void addUniformOverride(const InstructionT &Instr); |
||
| 356 | |||
| 357 | /// \brief Mark \p DivVal as a value that is always divergent. |
||
| 358 | /// \returns Whether the tracked divergence state of \p DivVal changed. |
||
| 359 | bool markDivergent(const InstructionT &I); |
||
| 360 | bool markDivergent(ConstValueRefT DivVal); |
||
| 361 | bool markDefsDivergent(const InstructionT &Instr, |
||
| 362 | bool AllDefsDivergent = true); |
||
| 363 | |||
| 364 | /// \brief Propagate divergence to all instructions in the region. |
||
| 365 | /// Divergence is seeded by calls to \p markDivergent. |
||
| 366 | void compute(); |
||
| 367 | |||
| 368 | /// \brief Whether any value was marked or analyzed to be divergent. |
||
| 369 | bool hasDivergence() const { return !DivergentValues.empty(); } |
||
| 370 | |||
| 371 | /// \brief Whether \p Val will always return a uniform value regardless of its |
||
| 372 | /// operands |
||
| 373 | bool isAlwaysUniform(const InstructionT &Instr) const; |
||
| 374 | |||
| 375 | bool hasDivergentDefs(const InstructionT &I) const; |
||
| 376 | |||
| 377 | bool isDivergent(const InstructionT &I) const { |
||
| 378 | if (I.isTerminator()) { |
||
| 379 | return DivergentTermBlocks.contains(I.getParent()); |
||
| 380 | } |
||
| 381 | return hasDivergentDefs(I); |
||
| 382 | }; |
||
| 383 | |||
| 384 | /// \brief Whether \p Val is divergent at its definition. |
||
| 385 | bool isDivergent(ConstValueRefT V) const { return DivergentValues.count(V); } |
||
| 386 | |||
| 387 | bool hasDivergentTerminator(const BlockT &B) const { |
||
| 388 | return DivergentTermBlocks.contains(&B); |
||
| 389 | } |
||
| 390 | |||
| 391 | void print(raw_ostream &out) const; |
||
| 392 | |||
| 393 | protected: |
||
| 394 | /// \brief Value/block pair representing a single phi input. |
||
| 395 | struct PhiInput { |
||
| 396 | ConstValueRefT value; |
||
| 397 | BlockT *predBlock; |
||
| 398 | |||
| 399 | PhiInput(ConstValueRefT value, BlockT *predBlock) |
||
| 400 | : value(value), predBlock(predBlock) {} |
||
| 401 | }; |
||
| 402 | |||
| 403 | const ContextT &Context; |
||
| 404 | const FunctionT &F; |
||
| 405 | const CycleInfoT &CI; |
||
| 406 | const TargetTransformInfo *TTI = nullptr; |
||
| 407 | |||
| 408 | // Detected/marked divergent values. |
||
| 409 | std::set<ConstValueRefT> DivergentValues; |
||
| 410 | SmallPtrSet<const BlockT *, 32> DivergentTermBlocks; |
||
| 411 | |||
| 412 | // Internal worklist for divergence propagation. |
||
| 413 | std::vector<const InstructionT *> Worklist; |
||
| 414 | |||
| 415 | /// \brief Mark \p Term as divergent and push all Instructions that become |
||
| 416 | /// divergent as a result on the worklist. |
||
| 417 | void analyzeControlDivergence(const InstructionT &Term); |
||
| 418 | |||
| 419 | private: |
||
| 420 | const DominatorTreeT &DT; |
||
| 421 | |||
| 422 | // Recognized cycles with divergent exits. |
||
| 423 | SmallPtrSet<const CycleT *, 16> DivergentExitCycles; |
||
| 424 | |||
| 425 | // Cycles assumed to be divergent. |
||
| 426 | // |
||
| 427 | // We don't use a set here because every insertion needs an explicit |
||
| 428 | // traversal of all existing members. |
||
| 429 | SmallVector<const CycleT *> AssumedDivergent; |
||
| 430 | |||
| 431 | // The SDA links divergent branches to divergent control-flow joins. |
||
| 432 | SyncDependenceAnalysisT SDA; |
||
| 433 | |||
| 434 | // Set of known-uniform values. |
||
| 435 | SmallPtrSet<const InstructionT *, 32> UniformOverrides; |
||
| 436 | |||
| 437 | /// \brief Mark all nodes in \p JoinBlock as divergent and push them on |
||
| 438 | /// the worklist. |
||
| 439 | void taintAndPushAllDefs(const BlockT &JoinBlock); |
||
| 440 | |||
| 441 | /// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on |
||
| 442 | /// the worklist. |
||
| 443 | void taintAndPushPhiNodes(const BlockT &JoinBlock); |
||
| 444 | |||
| 445 | /// \brief Identify all Instructions that become divergent because \p DivExit |
||
| 446 | /// is a divergent cycle exit of \p DivCycle. Mark those instructions as |
||
| 447 | /// divergent and push them on the worklist. |
||
| 448 | void propagateCycleExitDivergence(const BlockT &DivExit, |
||
| 449 | const CycleT &DivCycle); |
||
| 450 | |||
| 451 | /// \brief Internal implementation function for propagateCycleExitDivergence. |
||
| 452 | void analyzeCycleExitDivergence(const CycleT &OuterDivCycle); |
||
| 453 | |||
| 454 | /// \brief Mark all instruction as divergent that use a value defined in \p |
||
| 455 | /// OuterDivCycle. Push their users on the worklist. |
||
| 456 | void analyzeTemporalDivergence(const InstructionT &I, |
||
| 457 | const CycleT &OuterDivCycle); |
||
| 458 | |||
| 459 | /// \brief Push all users of \p Val (in the region) to the worklist. |
||
| 460 | void pushUsers(const InstructionT &I); |
||
| 461 | void pushUsers(ConstValueRefT V); |
||
| 462 | |||
| 463 | bool usesValueFromCycle(const InstructionT &I, const CycleT &DefCycle) const; |
||
| 464 | |||
| 465 | /// \brief Whether \p Val is divergent when read in \p ObservingBlock. |
||
| 466 | bool isTemporalDivergent(const BlockT &ObservingBlock, |
||
| 467 | ConstValueRefT Val) const; |
||
| 468 | }; |
||
| 469 | |||
| 470 | template <typename ImplT> |
||
| 471 | void GenericUniformityAnalysisImplDeleter<ImplT>::operator()(ImplT *Impl) { |
||
| 472 | delete Impl; |
||
| 473 | } |
||
| 474 | |||
| 475 | /// Compute divergence starting with a divergent branch. |
||
| 476 | template <typename ContextT> class DivergencePropagator { |
||
| 477 | public: |
||
| 478 | using BlockT = typename ContextT::BlockT; |
||
| 479 | using DominatorTreeT = typename ContextT::DominatorTreeT; |
||
| 480 | using FunctionT = typename ContextT::FunctionT; |
||
| 481 | using ValueRefT = typename ContextT::ValueRefT; |
||
| 482 | |||
| 483 | using CycleInfoT = GenericCycleInfo<ContextT>; |
||
| 484 | using CycleT = typename CycleInfoT::CycleT; |
||
| 485 | |||
| 486 | using ModifiedPO = ModifiedPostOrder<ContextT>; |
||
| 487 | using SyncDependenceAnalysisT = GenericSyncDependenceAnalysis<ContextT>; |
||
| 488 | using DivergenceDescriptorT = |
||
| 489 | typename SyncDependenceAnalysisT::DivergenceDescriptor; |
||
| 490 | using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap; |
||
| 491 | |||
| 492 | const ModifiedPO &CyclePOT; |
||
| 493 | const DominatorTreeT &DT; |
||
| 494 | const CycleInfoT &CI; |
||
| 495 | const BlockT &DivTermBlock; |
||
| 496 | const ContextT &Context; |
||
| 497 | |||
| 498 | // Track blocks that receive a new label. Every time we relabel a |
||
| 499 | // cycle header, we another pass over the modified post-order in |
||
| 500 | // order to propagate the header label. The bit vector also allows |
||
| 501 | // us to skip labels that have not changed. |
||
| 502 | SparseBitVector<> FreshLabels; |
||
| 503 | |||
| 504 | // divergent join and cycle exit descriptor. |
||
| 505 | std::unique_ptr<DivergenceDescriptorT> DivDesc; |
||
| 506 | BlockLabelMapT &BlockLabels; |
||
| 507 | |||
| 508 | DivergencePropagator(const ModifiedPO &CyclePOT, const DominatorTreeT &DT, |
||
| 509 | const CycleInfoT &CI, const BlockT &DivTermBlock) |
||
| 510 | : CyclePOT(CyclePOT), DT(DT), CI(CI), DivTermBlock(DivTermBlock), |
||
| 511 | Context(CI.getSSAContext()), DivDesc(new DivergenceDescriptorT), |
||
| 512 | BlockLabels(DivDesc->BlockLabels) {} |
||
| 513 | |||
| 514 | void printDefs(raw_ostream &Out) { |
||
| 515 | Out << "Propagator::BlockLabels {\n"; |
||
| 516 | for (int BlockIdx = (int)CyclePOT.size() - 1; BlockIdx >= 0; --BlockIdx) { |
||
| 517 | const auto *Block = CyclePOT[BlockIdx]; |
||
| 518 | const auto *Label = BlockLabels[Block]; |
||
| 519 | Out << Context.print(Block) << "(" << BlockIdx << ") : "; |
||
| 520 | if (!Label) { |
||
| 521 | Out << "<null>\n"; |
||
| 522 | } else { |
||
| 523 | Out << Context.print(Label) << "\n"; |
||
| 524 | } |
||
| 525 | } |
||
| 526 | Out << "}\n"; |
||
| 527 | } |
||
| 528 | |||
| 529 | // Push a definition (\p PushedLabel) to \p SuccBlock and return whether this |
||
| 530 | // causes a divergent join. |
||
| 531 | bool computeJoin(const BlockT &SuccBlock, const BlockT &PushedLabel) { |
||
| 532 | const auto *OldLabel = BlockLabels[&SuccBlock]; |
||
| 533 | |||
| 534 | LLVM_DEBUG(dbgs() << "labeling " << Context.print(&SuccBlock) << ":\n" |
||
| 535 | << "\tpushed label: " << Context.print(&PushedLabel) |
||
| 536 | << "\n" |
||
| 537 | << "\told label: " << Context.print(OldLabel) << "\n"); |
||
| 538 | |||
| 539 | // Early exit if there is no change in the label. |
||
| 540 | if (OldLabel == &PushedLabel) |
||
| 541 | return false; |
||
| 542 | |||
| 543 | if (OldLabel != &SuccBlock) { |
||
| 544 | auto SuccIdx = CyclePOT.getIndex(&SuccBlock); |
||
| 545 | // Assigning a new label, mark this in FreshLabels. |
||
| 546 | LLVM_DEBUG(dbgs() << "\tfresh label: " << SuccIdx << "\n"); |
||
| 547 | FreshLabels.set(SuccIdx); |
||
| 548 | } |
||
| 549 | |||
| 550 | // This is not a join if the succ was previously unlabeled. |
||
| 551 | if (!OldLabel) { |
||
| 552 | LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&PushedLabel) |
||
| 553 | << "\n"); |
||
| 554 | BlockLabels[&SuccBlock] = &PushedLabel; |
||
| 555 | return false; |
||
| 556 | } |
||
| 557 | |||
| 558 | // This is a new join. Label the join block as itself, and not as |
||
| 559 | // the pushed label. |
||
| 560 | LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&SuccBlock) << "\n"); |
||
| 561 | BlockLabels[&SuccBlock] = &SuccBlock; |
||
| 562 | |||
| 563 | return true; |
||
| 564 | } |
||
| 565 | |||
| 566 | // visiting a virtual cycle exit edge from the cycle header --> temporal |
||
| 567 | // divergence on join |
||
| 568 | bool visitCycleExitEdge(const BlockT &ExitBlock, const BlockT &Label) { |
||
| 569 | if (!computeJoin(ExitBlock, Label)) |
||
| 570 | return false; |
||
| 571 | |||
| 572 | // Identified a divergent cycle exit |
||
| 573 | DivDesc->CycleDivBlocks.insert(&ExitBlock); |
||
| 574 | LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(&ExitBlock) |
||
| 575 | << "\n"); |
||
| 576 | return true; |
||
| 577 | } |
||
| 578 | |||
| 579 | // process \p SuccBlock with reaching definition \p Label |
||
| 580 | bool visitEdge(const BlockT &SuccBlock, const BlockT &Label) { |
||
| 581 | if (!computeJoin(SuccBlock, Label)) |
||
| 582 | return false; |
||
| 583 | |||
| 584 | // Divergent, disjoint paths join. |
||
| 585 | DivDesc->JoinDivBlocks.insert(&SuccBlock); |
||
| 586 | LLVM_DEBUG(dbgs() << "\tDivergent join: " << Context.print(&SuccBlock) |
||
| 587 | << "\n"); |
||
| 588 | return true; |
||
| 589 | } |
||
| 590 | |||
| 591 | std::unique_ptr<DivergenceDescriptorT> computeJoinPoints() { |
||
| 592 | assert(DivDesc); |
||
| 593 | |||
| 594 | LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: " |
||
| 595 | << Context.print(&DivTermBlock) << "\n"); |
||
| 596 | |||
| 597 | // Early stopping criterion |
||
| 598 | int FloorIdx = CyclePOT.size() - 1; |
||
| 599 | const BlockT *FloorLabel = nullptr; |
||
| 600 | int DivTermIdx = CyclePOT.getIndex(&DivTermBlock); |
||
| 601 | |||
| 602 | // Bootstrap with branch targets |
||
| 603 | auto const *DivTermCycle = CI.getCycle(&DivTermBlock); |
||
| 604 | for (const auto *SuccBlock : successors(&DivTermBlock)) { |
||
| 605 | if (DivTermCycle && !DivTermCycle->contains(SuccBlock)) { |
||
| 606 | // If DivTerm exits the cycle immediately, computeJoin() might |
||
| 607 | // not reach SuccBlock with a different label. We need to |
||
| 608 | // check for this exit now. |
||
| 609 | DivDesc->CycleDivBlocks.insert(SuccBlock); |
||
| 610 | LLVM_DEBUG(dbgs() << "\tImmediate divergent cycle exit: " |
||
| 611 | << Context.print(SuccBlock) << "\n"); |
||
| 612 | } |
||
| 613 | auto SuccIdx = CyclePOT.getIndex(SuccBlock); |
||
| 614 | visitEdge(*SuccBlock, *SuccBlock); |
||
| 615 | FloorIdx = std::min<int>(FloorIdx, SuccIdx); |
||
| 616 | } |
||
| 617 | |||
| 618 | while (true) { |
||
| 619 | auto BlockIdx = FreshLabels.find_last(); |
||
| 620 | if (BlockIdx == -1 || BlockIdx < FloorIdx) |
||
| 621 | break; |
||
| 622 | |||
| 623 | LLVM_DEBUG(dbgs() << "Current labels:\n"; printDefs(dbgs())); |
||
| 624 | |||
| 625 | FreshLabels.reset(BlockIdx); |
||
| 626 | if (BlockIdx == DivTermIdx) { |
||
| 627 | LLVM_DEBUG(dbgs() << "Skipping DivTermBlock\n"); |
||
| 628 | continue; |
||
| 629 | } |
||
| 630 | |||
| 631 | const auto *Block = CyclePOT[BlockIdx]; |
||
| 632 | LLVM_DEBUG(dbgs() << "visiting " << Context.print(Block) << " at index " |
||
| 633 | << BlockIdx << "\n"); |
||
| 634 | |||
| 635 | const auto *Label = BlockLabels[Block]; |
||
| 636 | assert(Label); |
||
| 637 | |||
| 638 | bool CausedJoin = false; |
||
| 639 | int LoweredFloorIdx = FloorIdx; |
||
| 640 | |||
| 641 | // If the current block is the header of a reducible cycle that |
||
| 642 | // contains the divergent branch, then the label should be |
||
| 643 | // propagated to the cycle exits. Such a header is the "last |
||
| 644 | // possible join" of any disjoint paths within this cycle. This |
||
| 645 | // prevents detection of spurious joins at the entries of any |
||
| 646 | // irreducible child cycles. |
||
| 647 | // |
||
| 648 | // This conclusion about the header is true for any choice of DFS: |
||
| 649 | // |
||
| 650 | // If some DFS has a reducible cycle C with header H, then for |
||
| 651 | // any other DFS, H is the header of a cycle C' that is a |
||
| 652 | // superset of C. For a divergent branch inside the subgraph |
||
| 653 | // C, any join node inside C is either H, or some node |
||
| 654 | // encountered without passing through H. |
||
| 655 | // |
||
| 656 | auto getReducibleParent = [&](const BlockT *Block) -> const CycleT * { |
||
| 657 | if (!CyclePOT.isReducibleCycleHeader(Block)) |
||
| 658 | return nullptr; |
||
| 659 | const auto *BlockCycle = CI.getCycle(Block); |
||
| 660 | if (BlockCycle->contains(&DivTermBlock)) |
||
| 661 | return BlockCycle; |
||
| 662 | return nullptr; |
||
| 663 | }; |
||
| 664 | |||
| 665 | if (const auto *BlockCycle = getReducibleParent(Block)) { |
||
| 666 | SmallVector<BlockT *, 4> BlockCycleExits; |
||
| 667 | BlockCycle->getExitBlocks(BlockCycleExits); |
||
| 668 | for (auto *BlockCycleExit : BlockCycleExits) { |
||
| 669 | CausedJoin |= visitCycleExitEdge(*BlockCycleExit, *Label); |
||
| 670 | LoweredFloorIdx = |
||
| 671 | std::min<int>(LoweredFloorIdx, CyclePOT.getIndex(BlockCycleExit)); |
||
| 672 | } |
||
| 673 | } else { |
||
| 674 | for (const auto *SuccBlock : successors(Block)) { |
||
| 675 | CausedJoin |= visitEdge(*SuccBlock, *Label); |
||
| 676 | LoweredFloorIdx = |
||
| 677 | std::min<int>(LoweredFloorIdx, CyclePOT.getIndex(SuccBlock)); |
||
| 678 | } |
||
| 679 | } |
||
| 680 | |||
| 681 | // Floor update |
||
| 682 | if (CausedJoin) { |
||
| 683 | // 1. Different labels pushed to successors |
||
| 684 | FloorIdx = LoweredFloorIdx; |
||
| 685 | } else if (FloorLabel != Label) { |
||
| 686 | // 2. No join caused BUT we pushed a label that is different than the |
||
| 687 | // last pushed label |
||
| 688 | FloorIdx = LoweredFloorIdx; |
||
| 689 | FloorLabel = Label; |
||
| 690 | } |
||
| 691 | } |
||
| 692 | |||
| 693 | LLVM_DEBUG(dbgs() << "Final labeling:\n"; printDefs(dbgs())); |
||
| 694 | |||
| 695 | // Check every cycle containing DivTermBlock for exit divergence. |
||
| 696 | // A cycle has exit divergence if the label of an exit block does |
||
| 697 | // not match the label of its header. |
||
| 698 | for (const auto *Cycle = CI.getCycle(&DivTermBlock); Cycle; |
||
| 699 | Cycle = Cycle->getParentCycle()) { |
||
| 700 | if (Cycle->isReducible()) { |
||
| 701 | // The exit divergence of a reducible cycle is recorded while |
||
| 702 | // propagating labels. |
||
| 703 | continue; |
||
| 704 | } |
||
| 705 | SmallVector<BlockT *> Exits; |
||
| 706 | Cycle->getExitBlocks(Exits); |
||
| 707 | auto *Header = Cycle->getHeader(); |
||
| 708 | auto *HeaderLabel = BlockLabels[Header]; |
||
| 709 | for (const auto *Exit : Exits) { |
||
| 710 | if (BlockLabels[Exit] != HeaderLabel) { |
||
| 711 | // Identified a divergent cycle exit |
||
| 712 | DivDesc->CycleDivBlocks.insert(Exit); |
||
| 713 | LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(Exit) |
||
| 714 | << "\n"); |
||
| 715 | } |
||
| 716 | } |
||
| 717 | } |
||
| 718 | |||
| 719 | return std::move(DivDesc); |
||
| 720 | } |
||
| 721 | }; |
||
| 722 | |||
| 723 | template <typename ContextT> |
||
| 724 | typename llvm::GenericSyncDependenceAnalysis<ContextT>::DivergenceDescriptor |
||
| 725 | llvm::GenericSyncDependenceAnalysis<ContextT>::EmptyDivergenceDesc; |
||
| 726 | |||
| 727 | template <typename ContextT> |
||
| 728 | llvm::GenericSyncDependenceAnalysis<ContextT>::GenericSyncDependenceAnalysis( |
||
| 729 | const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI) |
||
| 730 | : CyclePO(Context), DT(DT), CI(CI) { |
||
| 731 | CyclePO.compute(CI); |
||
| 732 | } |
||
| 733 | |||
| 734 | template <typename ContextT> |
||
| 735 | auto llvm::GenericSyncDependenceAnalysis<ContextT>::getJoinBlocks( |
||
| 736 | const BlockT *DivTermBlock) -> const DivergenceDescriptor & { |
||
| 737 | // trivial case |
||
| 738 | if (succ_size(DivTermBlock) <= 1) { |
||
| 739 | return EmptyDivergenceDesc; |
||
| 740 | } |
||
| 741 | |||
| 742 | // already available in cache? |
||
| 743 | auto ItCached = CachedControlDivDescs.find(DivTermBlock); |
||
| 744 | if (ItCached != CachedControlDivDescs.end()) |
||
| 745 | return *ItCached->second; |
||
| 746 | |||
| 747 | // compute all join points |
||
| 748 | DivergencePropagatorT Propagator(CyclePO, DT, CI, *DivTermBlock); |
||
| 749 | auto DivDesc = Propagator.computeJoinPoints(); |
||
| 750 | |||
| 751 | auto printBlockSet = [&](ConstBlockSet &Blocks) { |
||
| 752 | return Printable([&](raw_ostream &Out) { |
||
| 753 | Out << "["; |
||
| 754 | ListSeparator LS; |
||
| 755 | for (const auto *BB : Blocks) { |
||
| 756 | Out << LS << CI.getSSAContext().print(BB); |
||
| 757 | } |
||
| 758 | Out << "]\n"; |
||
| 759 | }); |
||
| 760 | }; |
||
| 761 | |||
| 762 | LLVM_DEBUG( |
||
| 763 | dbgs() << "\nResult (" << CI.getSSAContext().print(DivTermBlock) |
||
| 764 | << "):\n JoinDivBlocks: " << printBlockSet(DivDesc->JoinDivBlocks) |
||
| 765 | << " CycleDivBlocks: " << printBlockSet(DivDesc->CycleDivBlocks) |
||
| 766 | << "\n"); |
||
| 767 | (void)printBlockSet; |
||
| 768 | |||
| 769 | auto ItInserted = |
||
| 770 | CachedControlDivDescs.try_emplace(DivTermBlock, std::move(DivDesc)); |
||
| 771 | assert(ItInserted.second); |
||
| 772 | return *ItInserted.first->second; |
||
| 773 | } |
||
| 774 | |||
| 775 | template <typename ContextT> |
||
| 776 | bool GenericUniformityAnalysisImpl<ContextT>::markDivergent( |
||
| 777 | const InstructionT &I) { |
||
| 778 | if (I.isTerminator()) { |
||
| 779 | if (DivergentTermBlocks.insert(I.getParent()).second) { |
||
| 780 | LLVM_DEBUG(dbgs() << "marked divergent term block: " |
||
| 781 | << Context.print(I.getParent()) << "\n"); |
||
| 782 | return true; |
||
| 783 | } |
||
| 784 | return false; |
||
| 785 | } |
||
| 786 | |||
| 787 | return markDefsDivergent(I); |
||
| 788 | } |
||
| 789 | |||
| 790 | template <typename ContextT> |
||
| 791 | bool GenericUniformityAnalysisImpl<ContextT>::markDivergent( |
||
| 792 | ConstValueRefT Val) { |
||
| 793 | if (DivergentValues.insert(Val).second) { |
||
| 794 | LLVM_DEBUG(dbgs() << "marked divergent: " << Context.print(Val) << "\n"); |
||
| 795 | return true; |
||
| 796 | } |
||
| 797 | return false; |
||
| 798 | } |
||
| 799 | |||
| 800 | template <typename ContextT> |
||
| 801 | void GenericUniformityAnalysisImpl<ContextT>::addUniformOverride( |
||
| 802 | const InstructionT &Instr) { |
||
| 803 | UniformOverrides.insert(&Instr); |
||
| 804 | } |
||
| 805 | |||
| 806 | template <typename ContextT> |
||
| 807 | void GenericUniformityAnalysisImpl<ContextT>::analyzeTemporalDivergence( |
||
| 808 | const InstructionT &I, const CycleT &OuterDivCycle) { |
||
| 809 | if (isDivergent(I)) |
||
| 810 | return; |
||
| 811 | |||
| 812 | LLVM_DEBUG(dbgs() << "Analyze temporal divergence: " << Context.print(&I) |
||
| 813 | << "\n"); |
||
| 814 | if (!usesValueFromCycle(I, OuterDivCycle)) |
||
| 815 | return; |
||
| 816 | |||
| 817 | if (isAlwaysUniform(I)) |
||
| 818 | return; |
||
| 819 | |||
| 820 | if (markDivergent(I)) |
||
| 821 | Worklist.push_back(&I); |
||
| 822 | } |
||
| 823 | |||
| 824 | // Mark all external users of values defined inside \param |
||
| 825 | // OuterDivCycle as divergent. |
||
| 826 | // |
||
| 827 | // This follows all live out edges wherever they may lead. Potential |
||
| 828 | // users of values defined inside DivCycle could be anywhere in the |
||
| 829 | // dominance region of DivCycle (including its fringes for phi nodes). |
||
| 830 | // A cycle C dominates a block B iff every path from the entry block |
||
| 831 | // to B must pass through a block contained in C. If C is a reducible |
||
| 832 | // cycle (or natural loop), C dominates B iff the header of C |
||
| 833 | // dominates B. But in general, we iteratively examine cycle cycle |
||
| 834 | // exits and their successors. |
||
| 835 | template <typename ContextT> |
||
| 836 | void GenericUniformityAnalysisImpl<ContextT>::analyzeCycleExitDivergence( |
||
| 837 | const CycleT &OuterDivCycle) { |
||
| 838 | // Set of blocks that are dominated by the cycle, i.e., each is only |
||
| 839 | // reachable from paths that pass through the cycle. |
||
| 840 | SmallPtrSet<BlockT *, 16> DomRegion; |
||
| 841 | |||
| 842 | // The boundary of DomRegion, formed by blocks that are not |
||
| 843 | // dominated by the cycle. |
||
| 844 | SmallVector<BlockT *> DomFrontier; |
||
| 845 | OuterDivCycle.getExitBlocks(DomFrontier); |
||
| 846 | |||
| 847 | // Returns true if BB is dominated by the cycle. |
||
| 848 | auto isInDomRegion = [&](BlockT *BB) { |
||
| 849 | for (auto *P : predecessors(BB)) { |
||
| 850 | if (OuterDivCycle.contains(P)) |
||
| 851 | continue; |
||
| 852 | if (DomRegion.count(P)) |
||
| 853 | continue; |
||
| 854 | return false; |
||
| 855 | } |
||
| 856 | return true; |
||
| 857 | }; |
||
| 858 | |||
| 859 | // Keep advancing the frontier along successor edges, while |
||
| 860 | // promoting blocks to DomRegion. |
||
| 861 | while (true) { |
||
| 862 | bool Promoted = false; |
||
| 863 | SmallVector<BlockT *> Temp; |
||
| 864 | for (auto *W : DomFrontier) { |
||
| 865 | if (!isInDomRegion(W)) { |
||
| 866 | Temp.push_back(W); |
||
| 867 | continue; |
||
| 868 | } |
||
| 869 | DomRegion.insert(W); |
||
| 870 | Promoted = true; |
||
| 871 | for (auto *Succ : successors(W)) { |
||
| 872 | if (DomRegion.contains(Succ)) |
||
| 873 | continue; |
||
| 874 | Temp.push_back(Succ); |
||
| 875 | } |
||
| 876 | } |
||
| 877 | if (!Promoted) |
||
| 878 | break; |
||
| 879 | DomFrontier = Temp; |
||
| 880 | } |
||
| 881 | |||
| 882 | // At DomFrontier, only the PHI nodes are affected by temporal |
||
| 883 | // divergence. |
||
| 884 | for (const auto *UserBlock : DomFrontier) { |
||
| 885 | LLVM_DEBUG(dbgs() << "Analyze phis after cycle exit: " |
||
| 886 | << Context.print(UserBlock) << "\n"); |
||
| 887 | for (const auto &Phi : UserBlock->phis()) { |
||
| 888 | LLVM_DEBUG(dbgs() << " " << Context.print(&Phi) << "\n"); |
||
| 889 | analyzeTemporalDivergence(Phi, OuterDivCycle); |
||
| 890 | } |
||
| 891 | } |
||
| 892 | |||
| 893 | // All instructions inside the dominance region are affected by |
||
| 894 | // temporal divergence. |
||
| 895 | for (const auto *UserBlock : DomRegion) { |
||
| 896 | LLVM_DEBUG(dbgs() << "Analyze non-phi users after cycle exit: " |
||
| 897 | << Context.print(UserBlock) << "\n"); |
||
| 898 | for (const auto &I : *UserBlock) { |
||
| 899 | LLVM_DEBUG(dbgs() << " " << Context.print(&I) << "\n"); |
||
| 900 | analyzeTemporalDivergence(I, OuterDivCycle); |
||
| 901 | } |
||
| 902 | } |
||
| 903 | } |
||
| 904 | |||
| 905 | template <typename ContextT> |
||
| 906 | void GenericUniformityAnalysisImpl<ContextT>::propagateCycleExitDivergence( |
||
| 907 | const BlockT &DivExit, const CycleT &InnerDivCycle) { |
||
| 908 | LLVM_DEBUG(dbgs() << "\tpropCycleExitDiv " << Context.print(&DivExit) |
||
| 909 | << "\n"); |
||
| 910 | auto *DivCycle = &InnerDivCycle; |
||
| 911 | auto *OuterDivCycle = DivCycle; |
||
| 912 | auto *ExitLevelCycle = CI.getCycle(&DivExit); |
||
| 913 | const unsigned CycleExitDepth = |
||
| 914 | ExitLevelCycle ? ExitLevelCycle->getDepth() : 0; |
||
| 915 | |||
| 916 | // Find outer-most cycle that does not contain \p DivExit |
||
| 917 | while (DivCycle && DivCycle->getDepth() > CycleExitDepth) { |
||
| 918 | LLVM_DEBUG(dbgs() << " Found exiting cycle: " |
||
| 919 | << Context.print(DivCycle->getHeader()) << "\n"); |
||
| 920 | OuterDivCycle = DivCycle; |
||
| 921 | DivCycle = DivCycle->getParentCycle(); |
||
| 922 | } |
||
| 923 | LLVM_DEBUG(dbgs() << "\tOuter-most exiting cycle: " |
||
| 924 | << Context.print(OuterDivCycle->getHeader()) << "\n"); |
||
| 925 | |||
| 926 | if (!DivergentExitCycles.insert(OuterDivCycle).second) |
||
| 927 | return; |
||
| 928 | |||
| 929 | // Exit divergence does not matter if the cycle itself is assumed to |
||
| 930 | // be divergent. |
||
| 931 | for (const auto *C : AssumedDivergent) { |
||
| 932 | if (C->contains(OuterDivCycle)) |
||
| 933 | return; |
||
| 934 | } |
||
| 935 | |||
| 936 | analyzeCycleExitDivergence(*OuterDivCycle); |
||
| 937 | } |
||
| 938 | |||
| 939 | template <typename ContextT> |
||
| 940 | void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs( |
||
| 941 | const BlockT &BB) { |
||
| 942 | LLVM_DEBUG(dbgs() << "taintAndPushAllDefs " << Context.print(&BB) << "\n"); |
||
| 943 | for (const auto &I : instrs(BB)) { |
||
| 944 | // Terminators do not produce values; they are divergent only if |
||
| 945 | // the condition is divergent. That is handled when the divergent |
||
| 946 | // condition is placed in the worklist. |
||
| 947 | if (I.isTerminator()) |
||
| 948 | break; |
||
| 949 | |||
| 950 | // Mark this as divergent. We don't check if the instruction is |
||
| 951 | // always uniform. In a cycle where the thread convergence is not |
||
| 952 | // statically known, the instruction is not statically converged, |
||
| 953 | // and its outputs cannot be statically uniform. |
||
| 954 | if (markDivergent(I)) |
||
| 955 | Worklist.push_back(&I); |
||
| 956 | } |
||
| 957 | } |
||
| 958 | |||
| 959 | /// Mark divergent phi nodes in a join block |
||
| 960 | template <typename ContextT> |
||
| 961 | void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes( |
||
| 962 | const BlockT &JoinBlock) { |
||
| 963 | LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << Context.print(&JoinBlock) |
||
| 964 | << "\n"); |
||
| 965 | for (const auto &Phi : JoinBlock.phis()) { |
||
| 966 | if (ContextT::isConstantValuePhi(Phi)) |
||
| 967 | continue; |
||
| 968 | if (markDivergent(Phi)) |
||
| 969 | Worklist.push_back(&Phi); |
||
| 970 | } |
||
| 971 | } |
||
| 972 | |||
| 973 | /// Add \p Candidate to \p Cycles if it is not already contained in \p Cycles. |
||
| 974 | /// |
||
| 975 | /// \return true iff \p Candidate was added to \p Cycles. |
||
| 976 | template <typename CycleT> |
||
| 977 | static bool insertIfNotContained(SmallVector<CycleT *> &Cycles, |
||
| 978 | CycleT *Candidate) { |
||
| 979 | if (llvm::any_of(Cycles, |
||
| 980 | [Candidate](CycleT *C) { return C->contains(Candidate); })) |
||
| 981 | return false; |
||
| 982 | Cycles.push_back(Candidate); |
||
| 983 | return true; |
||
| 984 | } |
||
| 985 | |||
| 986 | /// Return the outermost cycle made divergent by branch outside it. |
||
| 987 | /// |
||
| 988 | /// If two paths that diverged outside an irreducible cycle join |
||
| 989 | /// inside that cycle, then that whole cycle is assumed to be |
||
| 990 | /// divergent. This does not apply if the cycle is reducible. |
||
| 991 | template <typename CycleT, typename BlockT> |
||
| 992 | static const CycleT *getExtDivCycle(const CycleT *Cycle, |
||
| 993 | const BlockT *DivTermBlock, |
||
| 994 | const BlockT *JoinBlock) { |
||
| 995 | assert(Cycle); |
||
| 996 | assert(Cycle->contains(JoinBlock)); |
||
| 997 | |||
| 998 | if (Cycle->contains(DivTermBlock)) |
||
| 999 | return nullptr; |
||
| 1000 | |||
| 1001 | if (Cycle->isReducible()) { |
||
| 1002 | assert(Cycle->getHeader() == JoinBlock); |
||
| 1003 | return nullptr; |
||
| 1004 | } |
||
| 1005 | |||
| 1006 | const auto *Parent = Cycle->getParentCycle(); |
||
| 1007 | while (Parent && !Parent->contains(DivTermBlock)) { |
||
| 1008 | // If the join is inside a child, then the parent must be |
||
| 1009 | // irreducible. The only join in a reducible cyle is its own |
||
| 1010 | // header. |
||
| 1011 | assert(!Parent->isReducible()); |
||
| 1012 | Cycle = Parent; |
||
| 1013 | Parent = Cycle->getParentCycle(); |
||
| 1014 | } |
||
| 1015 | |||
| 1016 | LLVM_DEBUG(dbgs() << "cycle made divergent by external branch\n"); |
||
| 1017 | return Cycle; |
||
| 1018 | } |
||
| 1019 | |||
| 1020 | /// Return the outermost cycle made divergent by branch inside it. |
||
| 1021 | /// |
||
| 1022 | /// This checks the "diverged entry" criterion defined in the |
||
| 1023 | /// docs/ConvergenceAnalysis.html. |
||
| 1024 | template <typename ContextT, typename CycleT, typename BlockT, |
||
| 1025 | typename DominatorTreeT> |
||
| 1026 | static const CycleT * |
||
| 1027 | getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock, |
||
| 1028 | const BlockT *JoinBlock, const DominatorTreeT &DT, |
||
| 1029 | ContextT &Context) { |
||
| 1030 | LLVM_DEBUG(dbgs() << "examine join " << Context.print(JoinBlock) |
||
| 1031 | << "for internal branch " << Context.print(DivTermBlock) |
||
| 1032 | << "\n"); |
||
| 1033 | if (DT.properlyDominates(DivTermBlock, JoinBlock)) |
||
| 1034 | return nullptr; |
||
| 1035 | |||
| 1036 | // Find the smallest common cycle, if one exists. |
||
| 1037 | assert(Cycle && Cycle->contains(JoinBlock)); |
||
| 1038 | while (Cycle && !Cycle->contains(DivTermBlock)) { |
||
| 1039 | Cycle = Cycle->getParentCycle(); |
||
| 1040 | } |
||
| 1041 | if (!Cycle || Cycle->isReducible()) |
||
| 1042 | return nullptr; |
||
| 1043 | |||
| 1044 | if (DT.properlyDominates(Cycle->getHeader(), JoinBlock)) |
||
| 1045 | return nullptr; |
||
| 1046 | |||
| 1047 | LLVM_DEBUG(dbgs() << " header " << Context.print(Cycle->getHeader()) |
||
| 1048 | << " does not dominate join\n"); |
||
| 1049 | |||
| 1050 | const auto *Parent = Cycle->getParentCycle(); |
||
| 1051 | while (Parent && !DT.properlyDominates(Parent->getHeader(), JoinBlock)) { |
||
| 1052 | LLVM_DEBUG(dbgs() << " header " << Context.print(Parent->getHeader()) |
||
| 1053 | << " does not dominate join\n"); |
||
| 1054 | Cycle = Parent; |
||
| 1055 | Parent = Parent->getParentCycle(); |
||
| 1056 | } |
||
| 1057 | |||
| 1058 | LLVM_DEBUG(dbgs() << " cycle made divergent by internal branch\n"); |
||
| 1059 | return Cycle; |
||
| 1060 | } |
||
| 1061 | |||
| 1062 | template <typename ContextT, typename CycleT, typename BlockT, |
||
| 1063 | typename DominatorTreeT> |
||
| 1064 | static const CycleT * |
||
| 1065 | getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock, |
||
| 1066 | const BlockT *JoinBlock, const DominatorTreeT &DT, |
||
| 1067 | ContextT &Context) { |
||
| 1068 | if (!Cycle) |
||
| 1069 | return nullptr; |
||
| 1070 | |||
| 1071 | // First try to expand Cycle to the largest that contains JoinBlock |
||
| 1072 | // but not DivTermBlock. |
||
| 1073 | const auto *Ext = getExtDivCycle(Cycle, DivTermBlock, JoinBlock); |
||
| 1074 | |||
| 1075 | // Continue expanding to the largest cycle that contains both. |
||
| 1076 | const auto *Int = getIntDivCycle(Cycle, DivTermBlock, JoinBlock, DT, Context); |
||
| 1077 | |||
| 1078 | if (Int) |
||
| 1079 | return Int; |
||
| 1080 | return Ext; |
||
| 1081 | } |
||
| 1082 | |||
| 1083 | template <typename ContextT> |
||
| 1084 | void GenericUniformityAnalysisImpl<ContextT>::analyzeControlDivergence( |
||
| 1085 | const InstructionT &Term) { |
||
| 1086 | const auto *DivTermBlock = Term.getParent(); |
||
| 1087 | DivergentTermBlocks.insert(DivTermBlock); |
||
| 1088 | LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Context.print(DivTermBlock) |
||
| 1089 | << "\n"); |
||
| 1090 | |||
| 1091 | // Don't propagate divergence from unreachable blocks. |
||
| 1092 | if (!DT.isReachableFromEntry(DivTermBlock)) |
||
| 1093 | return; |
||
| 1094 | |||
| 1095 | const auto &DivDesc = SDA.getJoinBlocks(DivTermBlock); |
||
| 1096 | SmallVector<const CycleT *> DivCycles; |
||
| 1097 | |||
| 1098 | // Iterate over all blocks now reachable by a disjoint path join |
||
| 1099 | for (const auto *JoinBlock : DivDesc.JoinDivBlocks) { |
||
| 1100 | const auto *Cycle = CI.getCycle(JoinBlock); |
||
| 1101 | LLVM_DEBUG(dbgs() << "visiting join block " << Context.print(JoinBlock) |
||
| 1102 | << "\n"); |
||
| 1103 | if (const auto *Outermost = getOutermostDivergentCycle( |
||
| 1104 | Cycle, DivTermBlock, JoinBlock, DT, Context)) { |
||
| 1105 | LLVM_DEBUG(dbgs() << "found divergent cycle\n"); |
||
| 1106 | DivCycles.push_back(Outermost); |
||
| 1107 | continue; |
||
| 1108 | } |
||
| 1109 | taintAndPushPhiNodes(*JoinBlock); |
||
| 1110 | } |
||
| 1111 | |||
| 1112 | // Sort by order of decreasing depth. This allows later cycles to be skipped |
||
| 1113 | // because they are already contained in earlier ones. |
||
| 1114 | llvm::sort(DivCycles, [](const CycleT *A, const CycleT *B) { |
||
| 1115 | return A->getDepth() > B->getDepth(); |
||
| 1116 | }); |
||
| 1117 | |||
| 1118 | // Cycles that are assumed divergent due to the diverged entry |
||
| 1119 | // criterion potentially contain temporal divergence depending on |
||
| 1120 | // the DFS chosen. Conservatively, all values produced in such a |
||
| 1121 | // cycle are assumed divergent. "Cycle invariant" values may be |
||
| 1122 | // assumed uniform, but that requires further analysis. |
||
| 1123 | for (auto *C : DivCycles) { |
||
| 1124 | if (!insertIfNotContained(AssumedDivergent, C)) |
||
| 1125 | continue; |
||
| 1126 | LLVM_DEBUG(dbgs() << "process divergent cycle\n"); |
||
| 1127 | for (const BlockT *BB : C->blocks()) { |
||
| 1128 | taintAndPushAllDefs(*BB); |
||
| 1129 | } |
||
| 1130 | } |
||
| 1131 | |||
| 1132 | const auto *BranchCycle = CI.getCycle(DivTermBlock); |
||
| 1133 | assert(DivDesc.CycleDivBlocks.empty() || BranchCycle); |
||
| 1134 | for (const auto *DivExitBlock : DivDesc.CycleDivBlocks) { |
||
| 1135 | propagateCycleExitDivergence(*DivExitBlock, *BranchCycle); |
||
| 1136 | } |
||
| 1137 | } |
||
| 1138 | |||
| 1139 | template <typename ContextT> |
||
| 1140 | void GenericUniformityAnalysisImpl<ContextT>::compute() { |
||
| 1141 | // Initialize worklist. |
||
| 1142 | auto DivValuesCopy = DivergentValues; |
||
| 1143 | for (const auto DivVal : DivValuesCopy) { |
||
| 1144 | assert(isDivergent(DivVal) && "Worklist invariant violated!"); |
||
| 1145 | pushUsers(DivVal); |
||
| 1146 | } |
||
| 1147 | |||
| 1148 | // All values on the Worklist are divergent. |
||
| 1149 | // Their users may not have been updated yet. |
||
| 1150 | while (!Worklist.empty()) { |
||
| 1151 | const InstructionT *I = Worklist.back(); |
||
| 1152 | Worklist.pop_back(); |
||
| 1153 | |||
| 1154 | LLVM_DEBUG(dbgs() << "worklist pop: " << Context.print(I) << "\n"); |
||
| 1155 | |||
| 1156 | if (I->isTerminator()) { |
||
| 1157 | analyzeControlDivergence(*I); |
||
| 1158 | continue; |
||
| 1159 | } |
||
| 1160 | |||
| 1161 | // propagate value divergence to users |
||
| 1162 | assert(isDivergent(*I) && "Worklist invariant violated!"); |
||
| 1163 | pushUsers(*I); |
||
| 1164 | } |
||
| 1165 | } |
||
| 1166 | |||
| 1167 | template <typename ContextT> |
||
| 1168 | bool GenericUniformityAnalysisImpl<ContextT>::isAlwaysUniform( |
||
| 1169 | const InstructionT &Instr) const { |
||
| 1170 | return UniformOverrides.contains(&Instr); |
||
| 1171 | } |
||
| 1172 | |||
| 1173 | template <typename ContextT> |
||
| 1174 | GenericUniformityInfo<ContextT>::GenericUniformityInfo( |
||
| 1175 | FunctionT &Func, const DominatorTreeT &DT, const CycleInfoT &CI, |
||
| 1176 | const TargetTransformInfo *TTI) |
||
| 1177 | : F(&Func) { |
||
| 1178 | DA.reset(new ImplT{Func, DT, CI, TTI}); |
||
| 1179 | DA->initialize(); |
||
| 1180 | DA->compute(); |
||
| 1181 | } |
||
| 1182 | |||
| 1183 | template <typename ContextT> |
||
| 1184 | void GenericUniformityAnalysisImpl<ContextT>::print(raw_ostream &OS) const { |
||
| 1185 | bool haveDivergentArgs = false; |
||
| 1186 | |||
| 1187 | if (DivergentValues.empty()) { |
||
| 1188 | assert(DivergentTermBlocks.empty()); |
||
| 1189 | assert(DivergentExitCycles.empty()); |
||
| 1190 | OS << "ALL VALUES UNIFORM\n"; |
||
| 1191 | return; |
||
| 1192 | } |
||
| 1193 | |||
| 1194 | for (const auto &entry : DivergentValues) { |
||
| 1195 | const BlockT *parent = Context.getDefBlock(entry); |
||
| 1196 | if (!parent) { |
||
| 1197 | if (!haveDivergentArgs) { |
||
| 1198 | OS << "DIVERGENT ARGUMENTS:\n"; |
||
| 1199 | haveDivergentArgs = true; |
||
| 1200 | } |
||
| 1201 | OS << " DIVERGENT: " << Context.print(entry) << '\n'; |
||
| 1202 | } |
||
| 1203 | } |
||
| 1204 | |||
| 1205 | if (!AssumedDivergent.empty()) { |
||
| 1206 | OS << "CYCLES ASSSUMED DIVERGENT:\n"; |
||
| 1207 | for (const CycleT *cycle : AssumedDivergent) { |
||
| 1208 | OS << " " << cycle->print(Context) << '\n'; |
||
| 1209 | } |
||
| 1210 | } |
||
| 1211 | |||
| 1212 | if (!DivergentExitCycles.empty()) { |
||
| 1213 | OS << "CYCLES WITH DIVERGENT EXIT:\n"; |
||
| 1214 | for (const CycleT *cycle : DivergentExitCycles) { |
||
| 1215 | OS << " " << cycle->print(Context) << '\n'; |
||
| 1216 | } |
||
| 1217 | } |
||
| 1218 | |||
| 1219 | for (auto &block : F) { |
||
| 1220 | OS << "\nBLOCK " << Context.print(&block) << '\n'; |
||
| 1221 | |||
| 1222 | OS << "DEFINITIONS\n"; |
||
| 1223 | SmallVector<ConstValueRefT, 16> defs; |
||
| 1224 | Context.appendBlockDefs(defs, block); |
||
| 1225 | for (auto value : defs) { |
||
| 1226 | if (isDivergent(value)) |
||
| 1227 | OS << " DIVERGENT: "; |
||
| 1228 | else |
||
| 1229 | OS << " "; |
||
| 1230 | OS << Context.print(value) << '\n'; |
||
| 1231 | } |
||
| 1232 | |||
| 1233 | OS << "TERMINATORS\n"; |
||
| 1234 | SmallVector<const InstructionT *, 8> terms; |
||
| 1235 | Context.appendBlockTerms(terms, block); |
||
| 1236 | bool divergentTerminators = hasDivergentTerminator(block); |
||
| 1237 | for (auto *T : terms) { |
||
| 1238 | if (divergentTerminators) |
||
| 1239 | OS << " DIVERGENT: "; |
||
| 1240 | else |
||
| 1241 | OS << " "; |
||
| 1242 | OS << Context.print(T) << '\n'; |
||
| 1243 | } |
||
| 1244 | |||
| 1245 | OS << "END BLOCK\n"; |
||
| 1246 | } |
||
| 1247 | } |
||
| 1248 | |||
| 1249 | template <typename ContextT> |
||
| 1250 | bool GenericUniformityInfo<ContextT>::hasDivergence() const { |
||
| 1251 | return DA->hasDivergence(); |
||
| 1252 | } |
||
| 1253 | |||
| 1254 | /// Whether \p V is divergent at its definition. |
||
| 1255 | template <typename ContextT> |
||
| 1256 | bool GenericUniformityInfo<ContextT>::isDivergent(ConstValueRefT V) const { |
||
| 1257 | return DA->isDivergent(V); |
||
| 1258 | } |
||
| 1259 | |||
| 1260 | template <typename ContextT> |
||
| 1261 | bool GenericUniformityInfo<ContextT>::hasDivergentTerminator(const BlockT &B) { |
||
| 1262 | return DA->hasDivergentTerminator(B); |
||
| 1263 | } |
||
| 1264 | |||
| 1265 | /// \brief T helper function for printing. |
||
| 1266 | template <typename ContextT> |
||
| 1267 | void GenericUniformityInfo<ContextT>::print(raw_ostream &out) const { |
||
| 1268 | DA->print(out); |
||
| 1269 | } |
||
| 1270 | |||
| 1271 | template <typename ContextT> |
||
| 1272 | void llvm::ModifiedPostOrder<ContextT>::computeStackPO( |
||
| 1273 | SmallVectorImpl<BlockT *> &Stack, const CycleInfoT &CI, const CycleT *Cycle, |
||
| 1274 | SmallPtrSetImpl<BlockT *> &Finalized) { |
||
| 1275 | LLVM_DEBUG(dbgs() << "inside computeStackPO\n"); |
||
| 1276 | while (!Stack.empty()) { |
||
| 1277 | auto *NextBB = Stack.back(); |
||
| 1278 | if (Finalized.count(NextBB)) { |
||
| 1279 | Stack.pop_back(); |
||
| 1280 | continue; |
||
| 1281 | } |
||
| 1282 | LLVM_DEBUG(dbgs() << " visiting " << CI.getSSAContext().print(NextBB) |
||
| 1283 | << "\n"); |
||
| 1284 | auto *NestedCycle = CI.getCycle(NextBB); |
||
| 1285 | if (Cycle != NestedCycle && (!Cycle || Cycle->contains(NestedCycle))) { |
||
| 1286 | LLVM_DEBUG(dbgs() << " found a cycle\n"); |
||
| 1287 | while (NestedCycle->getParentCycle() != Cycle) |
||
| 1288 | NestedCycle = NestedCycle->getParentCycle(); |
||
| 1289 | |||
| 1290 | SmallVector<BlockT *, 3> NestedExits; |
||
| 1291 | NestedCycle->getExitBlocks(NestedExits); |
||
| 1292 | bool PushedNodes = false; |
||
| 1293 | for (auto *NestedExitBB : NestedExits) { |
||
| 1294 | LLVM_DEBUG(dbgs() << " examine exit: " |
||
| 1295 | << CI.getSSAContext().print(NestedExitBB) << "\n"); |
||
| 1296 | if (Cycle && !Cycle->contains(NestedExitBB)) |
||
| 1297 | continue; |
||
| 1298 | if (Finalized.count(NestedExitBB)) |
||
| 1299 | continue; |
||
| 1300 | PushedNodes = true; |
||
| 1301 | Stack.push_back(NestedExitBB); |
||
| 1302 | LLVM_DEBUG(dbgs() << " pushed exit: " |
||
| 1303 | << CI.getSSAContext().print(NestedExitBB) << "\n"); |
||
| 1304 | } |
||
| 1305 | if (!PushedNodes) { |
||
| 1306 | // All loop exits finalized -> finish this node |
||
| 1307 | Stack.pop_back(); |
||
| 1308 | computeCyclePO(CI, NestedCycle, Finalized); |
||
| 1309 | } |
||
| 1310 | continue; |
||
| 1311 | } |
||
| 1312 | |||
| 1313 | LLVM_DEBUG(dbgs() << " no nested cycle, going into DAG\n"); |
||
| 1314 | // DAG-style |
||
| 1315 | bool PushedNodes = false; |
||
| 1316 | for (auto *SuccBB : successors(NextBB)) { |
||
| 1317 | LLVM_DEBUG(dbgs() << " examine succ: " |
||
| 1318 | << CI.getSSAContext().print(SuccBB) << "\n"); |
||
| 1319 | if (Cycle && !Cycle->contains(SuccBB)) |
||
| 1320 | continue; |
||
| 1321 | if (Finalized.count(SuccBB)) |
||
| 1322 | continue; |
||
| 1323 | PushedNodes = true; |
||
| 1324 | Stack.push_back(SuccBB); |
||
| 1325 | LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(SuccBB) |
||
| 1326 | << "\n"); |
||
| 1327 | } |
||
| 1328 | if (!PushedNodes) { |
||
| 1329 | // Never push nodes twice |
||
| 1330 | LLVM_DEBUG(dbgs() << " finishing node: " |
||
| 1331 | << CI.getSSAContext().print(NextBB) << "\n"); |
||
| 1332 | Stack.pop_back(); |
||
| 1333 | Finalized.insert(NextBB); |
||
| 1334 | appendBlock(*NextBB); |
||
| 1335 | } |
||
| 1336 | } |
||
| 1337 | LLVM_DEBUG(dbgs() << "exited computeStackPO\n"); |
||
| 1338 | } |
||
| 1339 | |||
| 1340 | template <typename ContextT> |
||
| 1341 | void ModifiedPostOrder<ContextT>::computeCyclePO( |
||
| 1342 | const CycleInfoT &CI, const CycleT *Cycle, |
||
| 1343 | SmallPtrSetImpl<BlockT *> &Finalized) { |
||
| 1344 | LLVM_DEBUG(dbgs() << "inside computeCyclePO\n"); |
||
| 1345 | SmallVector<BlockT *> Stack; |
||
| 1346 | auto *CycleHeader = Cycle->getHeader(); |
||
| 1347 | |||
| 1348 | LLVM_DEBUG(dbgs() << " noted header: " |
||
| 1349 | << CI.getSSAContext().print(CycleHeader) << "\n"); |
||
| 1350 | assert(!Finalized.count(CycleHeader)); |
||
| 1351 | Finalized.insert(CycleHeader); |
||
| 1352 | |||
| 1353 | // Visit the header last |
||
| 1354 | LLVM_DEBUG(dbgs() << " finishing header: " |
||
| 1355 | << CI.getSSAContext().print(CycleHeader) << "\n"); |
||
| 1356 | appendBlock(*CycleHeader, Cycle->isReducible()); |
||
| 1357 | |||
| 1358 | // Initialize with immediate successors |
||
| 1359 | for (auto *BB : successors(CycleHeader)) { |
||
| 1360 | LLVM_DEBUG(dbgs() << " examine succ: " << CI.getSSAContext().print(BB) |
||
| 1361 | << "\n"); |
||
| 1362 | if (!Cycle->contains(BB)) |
||
| 1363 | continue; |
||
| 1364 | if (BB == CycleHeader) |
||
| 1365 | continue; |
||
| 1366 | if (!Finalized.count(BB)) { |
||
| 1367 | LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(BB) |
||
| 1368 | << "\n"); |
||
| 1369 | Stack.push_back(BB); |
||
| 1370 | } |
||
| 1371 | } |
||
| 1372 | |||
| 1373 | // Compute PO inside region |
||
| 1374 | computeStackPO(Stack, CI, Cycle, Finalized); |
||
| 1375 | |||
| 1376 | LLVM_DEBUG(dbgs() << "exited computeCyclePO\n"); |
||
| 1377 | } |
||
| 1378 | |||
| 1379 | /// \brief Generically compute the modified post order. |
||
| 1380 | template <typename ContextT> |
||
| 1381 | void llvm::ModifiedPostOrder<ContextT>::compute(const CycleInfoT &CI) { |
||
| 1382 | SmallPtrSet<BlockT *, 32> Finalized; |
||
| 1383 | SmallVector<BlockT *> Stack; |
||
| 1384 | auto *F = CI.getFunction(); |
||
| 1385 | Stack.reserve(24); // FIXME made-up number |
||
| 1386 | Stack.push_back(GraphTraits<FunctionT *>::getEntryNode(F)); |
||
| 1387 | computeStackPO(Stack, CI, nullptr, Finalized); |
||
| 1388 | } |
||
| 1389 | |||
| 1390 | } // namespace llvm |
||
| 1391 | |||
| 1392 | #undef DEBUG_TYPE |
||
| 1393 | |||
| 1394 | #endif // LLVM_ADT_GENERICUNIFORMITYIMPL_H |