Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | //===- llvm/Transforms/Utils/LoopUtils.h - Loop utilities -------*- C++ -*-===// |
| 2 | // |
||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 6 | // |
||
| 7 | //===----------------------------------------------------------------------===// |
||
| 8 | // |
||
| 9 | // This file defines some loop transformation utilities. |
||
| 10 | // |
||
| 11 | //===----------------------------------------------------------------------===// |
||
| 12 | |||
| 13 | #ifndef LLVM_TRANSFORMS_UTILS_LOOPUTILS_H |
||
| 14 | #define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H |
||
| 15 | |||
| 16 | #include "llvm/Analysis/IVDescriptors.h" |
||
| 17 | #include "llvm/Analysis/LoopAccessAnalysis.h" |
||
| 18 | #include "llvm/Transforms/Utils/ValueMapper.h" |
||
| 19 | |||
| 20 | namespace llvm { |
||
| 21 | |||
| 22 | template <typename T> class DomTreeNodeBase; |
||
| 23 | using DomTreeNode = DomTreeNodeBase<BasicBlock>; |
||
| 24 | class AssumptionCache; |
||
| 25 | class StringRef; |
||
| 26 | class AnalysisUsage; |
||
| 27 | class TargetTransformInfo; |
||
| 28 | class AAResults; |
||
| 29 | class BasicBlock; |
||
| 30 | class ICFLoopSafetyInfo; |
||
| 31 | class IRBuilderBase; |
||
| 32 | class Loop; |
||
| 33 | class LoopInfo; |
||
| 34 | class MemoryAccess; |
||
| 35 | class MemorySSA; |
||
| 36 | class MemorySSAUpdater; |
||
| 37 | class OptimizationRemarkEmitter; |
||
| 38 | class PredIteratorCache; |
||
| 39 | class ScalarEvolution; |
||
| 40 | class SCEV; |
||
| 41 | class SCEVExpander; |
||
| 42 | class TargetLibraryInfo; |
||
| 43 | class LPPassManager; |
||
| 44 | class Instruction; |
||
| 45 | struct RuntimeCheckingPtrGroup; |
||
| 46 | typedef std::pair<const RuntimeCheckingPtrGroup *, |
||
| 47 | const RuntimeCheckingPtrGroup *> |
||
| 48 | RuntimePointerCheck; |
||
| 49 | |||
| 50 | template <typename T, unsigned N> class SmallSetVector; |
||
| 51 | template <typename T, unsigned N> class SmallPriorityWorklist; |
||
| 52 | |||
| 53 | BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, |
||
| 54 | MemorySSAUpdater *MSSAU, bool PreserveLCSSA); |
||
| 55 | |||
| 56 | /// Ensure that all exit blocks of the loop are dedicated exits. |
||
| 57 | /// |
||
| 58 | /// For any loop exit block with non-loop predecessors, we split the loop |
||
| 59 | /// predecessors to use a dedicated loop exit block. We update the dominator |
||
| 60 | /// tree and loop info if provided, and will preserve LCSSA if requested. |
||
| 61 | bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, |
||
| 62 | MemorySSAUpdater *MSSAU, bool PreserveLCSSA); |
||
| 63 | |||
| 64 | /// Ensures LCSSA form for every instruction from the Worklist in the scope of |
||
| 65 | /// innermost containing loop. |
||
| 66 | /// |
||
| 67 | /// For the given instruction which have uses outside of the loop, an LCSSA PHI |
||
| 68 | /// node is inserted and the uses outside the loop are rewritten to use this |
||
| 69 | /// node. |
||
| 70 | /// |
||
| 71 | /// LoopInfo and DominatorTree are required and, since the routine makes no |
||
| 72 | /// changes to CFG, preserved. |
||
| 73 | /// |
||
| 74 | /// Returns true if any modifications are made. |
||
| 75 | /// |
||
| 76 | /// This function may introduce unused PHI nodes. If \p PHIsToRemove is not |
||
| 77 | /// nullptr, those are added to it (before removing, the caller has to check if |
||
| 78 | /// they still do not have any uses). Otherwise the PHIs are directly removed. |
||
| 79 | bool formLCSSAForInstructions( |
||
| 80 | SmallVectorImpl<Instruction *> &Worklist, const DominatorTree &DT, |
||
| 81 | const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder, |
||
| 82 | SmallVectorImpl<PHINode *> *PHIsToRemove = nullptr); |
||
| 83 | |||
| 84 | /// Put loop into LCSSA form. |
||
| 85 | /// |
||
| 86 | /// Looks at all instructions in the loop which have uses outside of the |
||
| 87 | /// current loop. For each, an LCSSA PHI node is inserted and the uses outside |
||
| 88 | /// the loop are rewritten to use this node. Sub-loops must be in LCSSA form |
||
| 89 | /// already. |
||
| 90 | /// |
||
| 91 | /// LoopInfo and DominatorTree are required and preserved. |
||
| 92 | /// |
||
| 93 | /// If ScalarEvolution is passed in, it will be preserved. |
||
| 94 | /// |
||
| 95 | /// Returns true if any modifications are made to the loop. |
||
| 96 | bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, |
||
| 97 | ScalarEvolution *SE); |
||
| 98 | |||
| 99 | /// Put a loop nest into LCSSA form. |
||
| 100 | /// |
||
| 101 | /// This recursively forms LCSSA for a loop nest. |
||
| 102 | /// |
||
| 103 | /// LoopInfo and DominatorTree are required and preserved. |
||
| 104 | /// |
||
| 105 | /// If ScalarEvolution is passed in, it will be preserved. |
||
| 106 | /// |
||
| 107 | /// Returns true if any modifications are made to the loop. |
||
| 108 | bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, |
||
| 109 | ScalarEvolution *SE); |
||
| 110 | |||
| 111 | /// Flags controlling how much is checked when sinking or hoisting |
||
| 112 | /// instructions. The number of memory access in the loop (and whether there |
||
| 113 | /// are too many) is determined in the constructors when using MemorySSA. |
||
| 114 | class SinkAndHoistLICMFlags { |
||
| 115 | public: |
||
| 116 | // Explicitly set limits. |
||
| 117 | SinkAndHoistLICMFlags(unsigned LicmMssaOptCap, |
||
| 118 | unsigned LicmMssaNoAccForPromotionCap, bool IsSink, |
||
| 119 | Loop *L = nullptr, MemorySSA *MSSA = nullptr); |
||
| 120 | // Use default limits. |
||
| 121 | SinkAndHoistLICMFlags(bool IsSink, Loop *L = nullptr, |
||
| 122 | MemorySSA *MSSA = nullptr); |
||
| 123 | |||
| 124 | void setIsSink(bool B) { IsSink = B; } |
||
| 125 | bool getIsSink() { return IsSink; } |
||
| 126 | bool tooManyMemoryAccesses() { return NoOfMemAccTooLarge; } |
||
| 127 | bool tooManyClobberingCalls() { return LicmMssaOptCounter >= LicmMssaOptCap; } |
||
| 128 | void incrementClobberingCalls() { ++LicmMssaOptCounter; } |
||
| 129 | |||
| 130 | protected: |
||
| 131 | bool NoOfMemAccTooLarge = false; |
||
| 132 | unsigned LicmMssaOptCounter = 0; |
||
| 133 | unsigned LicmMssaOptCap; |
||
| 134 | unsigned LicmMssaNoAccForPromotionCap; |
||
| 135 | bool IsSink; |
||
| 136 | }; |
||
| 137 | |||
| 138 | /// Walk the specified region of the CFG (defined by all blocks |
||
| 139 | /// dominated by the specified block, and that are in the current loop) in |
||
| 140 | /// reverse depth first order w.r.t the DominatorTree. This allows us to visit |
||
| 141 | /// uses before definitions, allowing us to sink a loop body in one pass without |
||
| 142 | /// iteration. Takes DomTreeNode, AAResults, LoopInfo, DominatorTree, |
||
| 143 | /// TargetLibraryInfo, Loop, AliasSet information for all |
||
| 144 | /// instructions of the loop and loop safety information as |
||
| 145 | /// arguments. Diagnostics is emitted via \p ORE. It returns changed status. |
||
| 146 | /// \p CurLoop is a loop to do sinking on. \p OutermostLoop is used only when |
||
| 147 | /// this function is called by \p sinkRegionForLoopNest. |
||
| 148 | bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, |
||
| 149 | TargetLibraryInfo *, TargetTransformInfo *, Loop *CurLoop, |
||
| 150 | MemorySSAUpdater &, ICFLoopSafetyInfo *, |
||
| 151 | SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, |
||
| 152 | Loop *OutermostLoop = nullptr); |
||
| 153 | |||
| 154 | /// Call sinkRegion on loops contained within the specified loop |
||
| 155 | /// in order from innermost to outermost. |
||
| 156 | bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *, |
||
| 157 | DominatorTree *, TargetLibraryInfo *, |
||
| 158 | TargetTransformInfo *, Loop *, MemorySSAUpdater &, |
||
| 159 | ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, |
||
| 160 | OptimizationRemarkEmitter *); |
||
| 161 | |||
| 162 | /// Walk the specified region of the CFG (defined by all blocks |
||
| 163 | /// dominated by the specified block, and that are in the current loop) in depth |
||
| 164 | /// first order w.r.t the DominatorTree. This allows us to visit definitions |
||
| 165 | /// before uses, allowing us to hoist a loop body in one pass without iteration. |
||
| 166 | /// Takes DomTreeNode, AAResults, LoopInfo, DominatorTree, |
||
| 167 | /// TargetLibraryInfo, Loop, AliasSet information for all |
||
| 168 | /// instructions of the loop and loop safety information as arguments. |
||
| 169 | /// Diagnostics is emitted via \p ORE. It returns changed status. |
||
| 170 | /// \p AllowSpeculation is whether values should be hoisted even if they are not |
||
| 171 | /// guaranteed to execute in the loop, but are safe to speculatively execute. |
||
| 172 | bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, |
||
| 173 | AssumptionCache *, TargetLibraryInfo *, Loop *, |
||
| 174 | MemorySSAUpdater &, ScalarEvolution *, ICFLoopSafetyInfo *, |
||
| 175 | SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool, |
||
| 176 | bool AllowSpeculation); |
||
| 177 | |||
| 178 | /// This function deletes dead loops. The caller of this function needs to |
||
| 179 | /// guarantee that the loop is infact dead. |
||
| 180 | /// The function requires a bunch or prerequisites to be present: |
||
| 181 | /// - The loop needs to be in LCSSA form |
||
| 182 | /// - The loop needs to have a Preheader |
||
| 183 | /// - A unique dedicated exit block must exist |
||
| 184 | /// |
||
| 185 | /// This also updates the relevant analysis information in \p DT, \p SE, \p LI |
||
| 186 | /// and \p MSSA if pointers to those are provided. |
||
| 187 | /// It also updates the loop PM if an updater struct is provided. |
||
| 188 | |||
| 189 | void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, |
||
| 190 | LoopInfo *LI, MemorySSA *MSSA = nullptr); |
||
| 191 | |||
| 192 | /// Remove the backedge of the specified loop. Handles loop nests and general |
||
| 193 | /// loop structures subject to the precondition that the loop has no parent |
||
| 194 | /// loop and has a single latch block. Preserves all listed analyses. |
||
| 195 | void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, |
||
| 196 | LoopInfo &LI, MemorySSA *MSSA); |
||
| 197 | |||
| 198 | /// Try to promote memory values to scalars by sinking stores out of |
||
| 199 | /// the loop and moving loads to before the loop. We do this by looping over |
||
| 200 | /// the stores in the loop, looking for stores to Must pointers which are |
||
| 201 | /// loop invariant. It takes a set of must-alias values, Loop exit blocks |
||
| 202 | /// vector, loop exit blocks insertion point vector, PredIteratorCache, |
||
| 203 | /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions |
||
| 204 | /// of the loop and loop safety information as arguments. |
||
| 205 | /// Diagnostics is emitted via \p ORE. It returns changed status. |
||
| 206 | /// \p AllowSpeculation is whether values should be hoisted even if they are not |
||
| 207 | /// guaranteed to execute in the loop, but are safe to speculatively execute. |
||
| 208 | bool promoteLoopAccessesToScalars( |
||
| 209 | const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &, |
||
| 210 | SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &, |
||
| 211 | PredIteratorCache &, LoopInfo *, DominatorTree *, AssumptionCache *AC, |
||
| 212 | const TargetLibraryInfo *, TargetTransformInfo *, Loop *, |
||
| 213 | MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *, |
||
| 214 | bool AllowSpeculation, bool HasReadsOutsideSet); |
||
| 215 | |||
| 216 | /// Does a BFS from a given node to all of its children inside a given loop. |
||
| 217 | /// The returned vector of nodes includes the starting point. |
||
| 218 | SmallVector<DomTreeNode *, 16> collectChildrenInLoop(DomTreeNode *N, |
||
| 219 | const Loop *CurLoop); |
||
| 220 | |||
| 221 | /// Returns the instructions that use values defined in the loop. |
||
| 222 | SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L); |
||
| 223 | |||
| 224 | /// Find a combination of metadata ("llvm.loop.vectorize.width" and |
||
| 225 | /// "llvm.loop.vectorize.scalable.enable") for a loop and use it to construct a |
||
| 226 | /// ElementCount. If the metadata "llvm.loop.vectorize.width" cannot be found |
||
| 227 | /// then std::nullopt is returned. |
||
| 228 | std::optional<ElementCount> |
||
| 229 | getOptionalElementCountLoopAttribute(const Loop *TheLoop); |
||
| 230 | |||
| 231 | /// Create a new loop identifier for a loop created from a loop transformation. |
||
| 232 | /// |
||
| 233 | /// @param OrigLoopID The loop ID of the loop before the transformation. |
||
| 234 | /// @param FollowupAttrs List of attribute names that contain attributes to be |
||
| 235 | /// added to the new loop ID. |
||
| 236 | /// @param InheritOptionsAttrsPrefix Selects which attributes should be inherited |
||
| 237 | /// from the original loop. The following values |
||
| 238 | /// are considered: |
||
| 239 | /// nullptr : Inherit all attributes from @p OrigLoopID. |
||
| 240 | /// "" : Do not inherit any attribute from @p OrigLoopID; only use |
||
| 241 | /// those specified by a followup attribute. |
||
| 242 | /// "<prefix>": Inherit all attributes except those which start with |
||
| 243 | /// <prefix>; commonly used to remove metadata for the |
||
| 244 | /// applied transformation. |
||
| 245 | /// @param AlwaysNew If true, do not try to reuse OrigLoopID and never return |
||
| 246 | /// std::nullopt. |
||
| 247 | /// |
||
| 248 | /// @return The loop ID for the after-transformation loop. The following values |
||
| 249 | /// can be returned: |
||
| 250 | /// std::nullopt : No followup attribute was found; it is up to the |
||
| 251 | /// transformation to choose attributes that make sense. |
||
| 252 | /// @p OrigLoopID: The original identifier can be reused. |
||
| 253 | /// nullptr : The new loop has no attributes. |
||
| 254 | /// MDNode* : A new unique loop identifier. |
||
| 255 | std::optional<MDNode *> |
||
| 256 | makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef<StringRef> FollowupAttrs, |
||
| 257 | const char *InheritOptionsAttrsPrefix = "", |
||
| 258 | bool AlwaysNew = false); |
||
| 259 | |||
| 260 | /// Look for the loop attribute that disables all transformation heuristic. |
||
| 261 | bool hasDisableAllTransformsHint(const Loop *L); |
||
| 262 | |||
| 263 | /// Look for the loop attribute that disables the LICM transformation heuristics. |
||
| 264 | bool hasDisableLICMTransformsHint(const Loop *L); |
||
| 265 | |||
| 266 | /// The mode sets how eager a transformation should be applied. |
||
| 267 | enum TransformationMode { |
||
| 268 | /// The pass can use heuristics to determine whether a transformation should |
||
| 269 | /// be applied. |
||
| 270 | TM_Unspecified, |
||
| 271 | |||
| 272 | /// The transformation should be applied without considering a cost model. |
||
| 273 | TM_Enable, |
||
| 274 | |||
| 275 | /// The transformation should not be applied. |
||
| 276 | TM_Disable, |
||
| 277 | |||
| 278 | /// Force is a flag and should not be used alone. |
||
| 279 | TM_Force = 0x04, |
||
| 280 | |||
| 281 | /// The transformation was directed by the user, e.g. by a #pragma in |
||
| 282 | /// the source code. If the transformation could not be applied, a |
||
| 283 | /// warning should be emitted. |
||
| 284 | TM_ForcedByUser = TM_Enable | TM_Force, |
||
| 285 | |||
| 286 | /// The transformation must not be applied. For instance, `#pragma clang loop |
||
| 287 | /// unroll(disable)` explicitly forbids any unrolling to take place. Unlike |
||
| 288 | /// general loop metadata, it must not be dropped. Most passes should not |
||
| 289 | /// behave differently under TM_Disable and TM_SuppressedByUser. |
||
| 290 | TM_SuppressedByUser = TM_Disable | TM_Force |
||
| 291 | }; |
||
| 292 | |||
| 293 | /// @{ |
||
| 294 | /// Get the mode for LLVM's supported loop transformations. |
||
| 295 | TransformationMode hasUnrollTransformation(const Loop *L); |
||
| 296 | TransformationMode hasUnrollAndJamTransformation(const Loop *L); |
||
| 297 | TransformationMode hasVectorizeTransformation(const Loop *L); |
||
| 298 | TransformationMode hasDistributeTransformation(const Loop *L); |
||
| 299 | TransformationMode hasLICMVersioningTransformation(const Loop *L); |
||
| 300 | /// @} |
||
| 301 | |||
| 302 | /// Set input string into loop metadata by keeping other values intact. |
||
| 303 | /// If the string is already in loop metadata update value if it is |
||
| 304 | /// different. |
||
| 305 | void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, |
||
| 306 | unsigned V = 0); |
||
| 307 | |||
| 308 | /// Returns a loop's estimated trip count based on branch weight metadata. |
||
| 309 | /// In addition if \p EstimatedLoopInvocationWeight is not null it is |
||
| 310 | /// initialized with weight of loop's latch leading to the exit. |
||
| 311 | /// Returns 0 when the count is estimated to be 0, or std::nullopt when a |
||
| 312 | /// meaningful estimate can not be made. |
||
| 313 | std::optional<unsigned> |
||
| 314 | getLoopEstimatedTripCount(Loop *L, |
||
| 315 | unsigned *EstimatedLoopInvocationWeight = nullptr); |
||
| 316 | |||
| 317 | /// Set a loop's branch weight metadata to reflect that loop has \p |
||
| 318 | /// EstimatedTripCount iterations and \p EstimatedLoopInvocationWeight exits |
||
| 319 | /// through latch. Returns true if metadata is successfully updated, false |
||
| 320 | /// otherwise. Note that loop must have a latch block which controls loop exit |
||
| 321 | /// in order to succeed. |
||
| 322 | bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, |
||
| 323 | unsigned EstimatedLoopInvocationWeight); |
||
| 324 | |||
| 325 | /// Check inner loop (L) backedge count is known to be invariant on all |
||
| 326 | /// iterations of its outer loop. If the loop has no parent, this is trivially |
||
| 327 | /// true. |
||
| 328 | bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE); |
||
| 329 | |||
| 330 | /// Helper to consistently add the set of standard passes to a loop pass's \c |
||
| 331 | /// AnalysisUsage. |
||
| 332 | /// |
||
| 333 | /// All loop passes should call this as part of implementing their \c |
||
| 334 | /// getAnalysisUsage. |
||
| 335 | void getLoopAnalysisUsage(AnalysisUsage &AU); |
||
| 336 | |||
| 337 | /// Returns true if is legal to hoist or sink this instruction disregarding the |
||
| 338 | /// possible introduction of faults. Reasoning about potential faulting |
||
| 339 | /// instructions is the responsibility of the caller since it is challenging to |
||
| 340 | /// do efficiently from within this routine. |
||
| 341 | /// \p TargetExecutesOncePerLoop is true only when it is guaranteed that the |
||
| 342 | /// target executes at most once per execution of the loop body. This is used |
||
| 343 | /// to assess the legality of duplicating atomic loads. Generally, this is |
||
| 344 | /// true when moving out of loop and not true when moving into loops. |
||
| 345 | /// If \p ORE is set use it to emit optimization remarks. |
||
| 346 | bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, |
||
| 347 | Loop *CurLoop, MemorySSAUpdater &MSSAU, |
||
| 348 | bool TargetExecutesOncePerLoop, |
||
| 349 | SinkAndHoistLICMFlags &LICMFlags, |
||
| 350 | OptimizationRemarkEmitter *ORE = nullptr); |
||
| 351 | |||
| 352 | /// Returns the comparison predicate used when expanding a min/max reduction. |
||
| 353 | CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK); |
||
| 354 | |||
| 355 | /// See RecurrenceDescriptor::isSelectCmpPattern for a description of the |
||
| 356 | /// pattern we are trying to match. In this pattern we are only ever selecting |
||
| 357 | /// between two values: 1) an initial PHI start value, and 2) a loop invariant |
||
| 358 | /// value. This function uses \p LoopExitInst to determine 2), which we then use |
||
| 359 | /// to select between \p Left and \p Right. Any lane value in \p Left that |
||
| 360 | /// matches 2) will be merged into \p Right. |
||
| 361 | Value *createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK, |
||
| 362 | Value *Left, Value *Right); |
||
| 363 | |||
| 364 | /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. |
||
| 365 | /// The Builder's fast-math-flags must be set to propagate the expected values. |
||
| 366 | Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, |
||
| 367 | Value *Right); |
||
| 368 | |||
| 369 | /// Generates an ordered vector reduction using extracts to reduce the value. |
||
| 370 | Value *getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, |
||
| 371 | unsigned Op, RecurKind MinMaxKind = RecurKind::None); |
||
| 372 | |||
| 373 | /// Generates a vector reduction using shufflevectors to reduce the value. |
||
| 374 | /// Fast-math-flags are propagated using the IRBuilder's setting. |
||
| 375 | Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, |
||
| 376 | RecurKind MinMaxKind = RecurKind::None); |
||
| 377 | |||
| 378 | /// Create a target reduction of the given vector. The reduction operation |
||
| 379 | /// is described by the \p Opcode parameter. min/max reductions require |
||
| 380 | /// additional information supplied in \p RdxKind. |
||
| 381 | /// The target is queried to determine if intrinsics or shuffle sequences are |
||
| 382 | /// required to implement the reduction. |
||
| 383 | /// Fast-math-flags are propagated using the IRBuilder's setting. |
||
| 384 | Value *createSimpleTargetReduction(IRBuilderBase &B, |
||
| 385 | const TargetTransformInfo *TTI, Value *Src, |
||
| 386 | RecurKind RdxKind); |
||
| 387 | |||
| 388 | /// Create a target reduction of the given vector \p Src for a reduction of the |
||
| 389 | /// kind RecurKind::SelectICmp or RecurKind::SelectFCmp. The reduction operation |
||
| 390 | /// is described by \p Desc. |
||
| 391 | Value *createSelectCmpTargetReduction(IRBuilderBase &B, |
||
| 392 | const TargetTransformInfo *TTI, |
||
| 393 | Value *Src, |
||
| 394 | const RecurrenceDescriptor &Desc, |
||
| 395 | PHINode *OrigPhi); |
||
| 396 | |||
| 397 | /// Create a generic target reduction using a recurrence descriptor \p Desc |
||
| 398 | /// The target is queried to determine if intrinsics or shuffle sequences are |
||
| 399 | /// required to implement the reduction. |
||
| 400 | /// Fast-math-flags are propagated using the RecurrenceDescriptor. |
||
| 401 | Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, |
||
| 402 | const RecurrenceDescriptor &Desc, Value *Src, |
||
| 403 | PHINode *OrigPhi = nullptr); |
||
| 404 | |||
| 405 | /// Create an ordered reduction intrinsic using the given recurrence |
||
| 406 | /// descriptor \p Desc. |
||
| 407 | Value *createOrderedReduction(IRBuilderBase &B, |
||
| 408 | const RecurrenceDescriptor &Desc, Value *Src, |
||
| 409 | Value *Start); |
||
| 410 | |||
| 411 | /// Get the intersection (logical and) of all of the potential IR flags |
||
| 412 | /// of each scalar operation (VL) that will be converted into a vector (I). |
||
| 413 | /// If OpValue is non-null, we only consider operations similar to OpValue |
||
| 414 | /// when intersecting. |
||
| 415 | /// Flag set: NSW, NUW (if IncludeWrapFlags is true), exact, and all of |
||
| 416 | /// fast-math. |
||
| 417 | void propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue = nullptr, |
||
| 418 | bool IncludeWrapFlags = true); |
||
| 419 | |||
| 420 | /// Returns true if we can prove that \p S is defined and always negative in |
||
| 421 | /// loop \p L. |
||
| 422 | bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE); |
||
| 423 | |||
| 424 | /// Returns true if we can prove that \p S is defined and always non-negative in |
||
| 425 | /// loop \p L. |
||
| 426 | bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, |
||
| 427 | ScalarEvolution &SE); |
||
| 428 | |||
| 429 | /// Returns true if \p S is defined and never is equal to signed/unsigned max. |
||
| 430 | bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, |
||
| 431 | bool Signed); |
||
| 432 | |||
| 433 | /// Returns true if \p S is defined and never is equal to signed/unsigned min. |
||
| 434 | bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, |
||
| 435 | bool Signed); |
||
| 436 | |||
| 437 | enum ReplaceExitVal { |
||
| 438 | NeverRepl, |
||
| 439 | OnlyCheapRepl, |
||
| 440 | NoHardUse, |
||
| 441 | UnusedIndVarInLoop, |
||
| 442 | AlwaysRepl |
||
| 443 | }; |
||
| 444 | |||
| 445 | /// If the final value of any expressions that are recurrent in the loop can |
||
| 446 | /// be computed, substitute the exit values from the loop into any instructions |
||
| 447 | /// outside of the loop that use the final values of the current expressions. |
||
| 448 | /// Return the number of loop exit values that have been replaced, and the |
||
| 449 | /// corresponding phi node will be added to DeadInsts. |
||
| 450 | int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, |
||
| 451 | ScalarEvolution *SE, const TargetTransformInfo *TTI, |
||
| 452 | SCEVExpander &Rewriter, DominatorTree *DT, |
||
| 453 | ReplaceExitVal ReplaceExitValue, |
||
| 454 | SmallVector<WeakTrackingVH, 16> &DeadInsts); |
||
| 455 | |||
| 456 | /// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for |
||
| 457 | /// \p OrigLoop and the following distribution of \p OrigLoop iteration among \p |
||
| 458 | /// UnrolledLoop and \p RemainderLoop. \p UnrolledLoop receives weights that |
||
| 459 | /// reflect TC/UF iterations, and \p RemainderLoop receives weights that reflect |
||
| 460 | /// the remaining TC%UF iterations. |
||
| 461 | /// |
||
| 462 | /// Note that \p OrigLoop may be equal to either \p UnrolledLoop or \p |
||
| 463 | /// RemainderLoop in which case weights for \p OrigLoop are updated accordingly. |
||
| 464 | /// Note also behavior is undefined if \p UnrolledLoop and \p RemainderLoop are |
||
| 465 | /// equal. \p UF must be greater than zero. |
||
| 466 | /// If \p OrigLoop has no profile info associated nothing happens. |
||
| 467 | /// |
||
| 468 | /// This utility may be useful for such optimizations as unroller and |
||
| 469 | /// vectorizer as it's typical transformation for them. |
||
| 470 | void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop, |
||
| 471 | Loop *RemainderLoop, uint64_t UF); |
||
| 472 | |||
| 473 | /// Utility that implements appending of loops onto a worklist given a range. |
||
| 474 | /// We want to process loops in postorder, but the worklist is a LIFO data |
||
| 475 | /// structure, so we append to it in *reverse* postorder. |
||
| 476 | /// For trees, a preorder traversal is a viable reverse postorder, so we |
||
| 477 | /// actually append using a preorder walk algorithm. |
||
| 478 | template <typename RangeT> |
||
| 479 | void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist<Loop *, 4> &); |
||
| 480 | /// Utility that implements appending of loops onto a worklist given a range. |
||
| 481 | /// It has the same behavior as appendLoopsToWorklist, but assumes the range of |
||
| 482 | /// loops has already been reversed, so it processes loops in the given order. |
||
| 483 | template <typename RangeT> |
||
| 484 | void appendReversedLoopsToWorklist(RangeT &&, |
||
| 485 | SmallPriorityWorklist<Loop *, 4> &); |
||
| 486 | |||
| 487 | /// Utility that implements appending of loops onto a worklist given LoopInfo. |
||
| 488 | /// Calls the templated utility taking a Range of loops, handing it the Loops |
||
| 489 | /// in LoopInfo, iterated in reverse. This is because the loops are stored in |
||
| 490 | /// RPO w.r.t. the control flow graph in LoopInfo. For the purpose of unrolling, |
||
| 491 | /// loop deletion, and LICM, we largely want to work forward across the CFG so |
||
| 492 | /// that we visit defs before uses and can propagate simplifications from one |
||
| 493 | /// loop nest into the next. Calls appendReversedLoopsToWorklist with the |
||
| 494 | /// already reversed loops in LI. |
||
| 495 | /// FIXME: Consider changing the order in LoopInfo. |
||
| 496 | void appendLoopsToWorklist(LoopInfo &, SmallPriorityWorklist<Loop *, 4> &); |
||
| 497 | |||
| 498 | /// Recursively clone the specified loop and all of its children, |
||
| 499 | /// mapping the blocks with the specified map. |
||
| 500 | Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, |
||
| 501 | LoopInfo *LI, LPPassManager *LPM); |
||
| 502 | |||
| 503 | /// Add code that checks at runtime if the accessed arrays in \p PointerChecks |
||
| 504 | /// overlap. Returns the final comparator value or NULL if no check is needed. |
||
| 505 | Value * |
||
| 506 | addRuntimeChecks(Instruction *Loc, Loop *TheLoop, |
||
| 507 | const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, |
||
| 508 | SCEVExpander &Expander); |
||
| 509 | |||
| 510 | Value *addDiffRuntimeChecks( |
||
| 511 | Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander, |
||
| 512 | function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC); |
||
| 513 | |||
| 514 | /// Struct to hold information about a partially invariant condition. |
||
| 515 | struct IVConditionInfo { |
||
| 516 | /// Instructions that need to be duplicated and checked for the unswitching |
||
| 517 | /// condition. |
||
| 518 | SmallVector<Instruction *> InstToDuplicate; |
||
| 519 | |||
| 520 | /// Constant to indicate for which value the condition is invariant. |
||
| 521 | Constant *KnownValue = nullptr; |
||
| 522 | |||
| 523 | /// True if the partially invariant path is no-op (=does not have any |
||
| 524 | /// side-effects and no loop value is used outside the loop). |
||
| 525 | bool PathIsNoop = true; |
||
| 526 | |||
| 527 | /// If the partially invariant path reaches a single exit block, ExitForPath |
||
| 528 | /// is set to that block. Otherwise it is nullptr. |
||
| 529 | BasicBlock *ExitForPath = nullptr; |
||
| 530 | }; |
||
| 531 | |||
| 532 | /// Check if the loop header has a conditional branch that is not |
||
| 533 | /// loop-invariant, because it involves load instructions. If all paths from |
||
| 534 | /// either the true or false successor to the header or loop exists do not |
||
| 535 | /// modify the memory feeding the condition, perform 'partial unswitching'. That |
||
| 536 | /// is, duplicate the instructions feeding the condition in the pre-header. Then |
||
| 537 | /// unswitch on the duplicated condition. The condition is now known in the |
||
| 538 | /// unswitched version for the 'invariant' path through the original loop. |
||
| 539 | /// |
||
| 540 | /// If the branch condition of the header is partially invariant, return a pair |
||
| 541 | /// containing the instructions to duplicate and a boolean Constant to update |
||
| 542 | /// the condition in the loops created for the true or false successors. |
||
| 543 | std::optional<IVConditionInfo> hasPartialIVCondition(const Loop &L, |
||
| 544 | unsigned MSSAThreshold, |
||
| 545 | const MemorySSA &MSSA, |
||
| 546 | AAResults &AA); |
||
| 547 | |||
| 548 | } // end namespace llvm |
||
| 549 | |||
| 550 | #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H |