Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line | 
|---|---|---|---|
| 14 | pmbaty | 1 | //===- llvm/Transforms/Utils/LoopUtils.h - Loop utilities -------*- C++ -*-===// | 
| 2 | // | ||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | ||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| 6 | // | ||
| 7 | //===----------------------------------------------------------------------===// | ||
| 8 | // | ||
| 9 | // This file defines some loop transformation utilities. | ||
| 10 | // | ||
| 11 | //===----------------------------------------------------------------------===// | ||
| 12 | |||
| 13 | #ifndef LLVM_TRANSFORMS_UTILS_LOOPUTILS_H | ||
| 14 | #define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H | ||
| 15 | |||
| 16 | #include "llvm/Analysis/IVDescriptors.h" | ||
| 17 | #include "llvm/Analysis/LoopAccessAnalysis.h" | ||
| 18 | #include "llvm/Transforms/Utils/ValueMapper.h" | ||
| 19 | |||
| 20 | namespace llvm { | ||
| 21 | |||
| 22 | template <typename T> class DomTreeNodeBase; | ||
| 23 | using DomTreeNode = DomTreeNodeBase<BasicBlock>; | ||
| 24 | class AssumptionCache; | ||
| 25 | class StringRef; | ||
| 26 | class AnalysisUsage; | ||
| 27 | class TargetTransformInfo; | ||
| 28 | class AAResults; | ||
| 29 | class BasicBlock; | ||
| 30 | class ICFLoopSafetyInfo; | ||
| 31 | class IRBuilderBase; | ||
| 32 | class Loop; | ||
| 33 | class LoopInfo; | ||
| 34 | class MemoryAccess; | ||
| 35 | class MemorySSA; | ||
| 36 | class MemorySSAUpdater; | ||
| 37 | class OptimizationRemarkEmitter; | ||
| 38 | class PredIteratorCache; | ||
| 39 | class ScalarEvolution; | ||
| 40 | class SCEV; | ||
| 41 | class SCEVExpander; | ||
| 42 | class TargetLibraryInfo; | ||
| 43 | class LPPassManager; | ||
| 44 | class Instruction; | ||
| 45 | struct RuntimeCheckingPtrGroup; | ||
| 46 | typedef std::pair<const RuntimeCheckingPtrGroup *, | ||
| 47 | const RuntimeCheckingPtrGroup *> | ||
| 48 |     RuntimePointerCheck; | ||
| 49 | |||
| 50 | template <typename T, unsigned N> class SmallSetVector; | ||
| 51 | template <typename T, unsigned N> class SmallPriorityWorklist; | ||
| 52 | |||
| 53 | BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, | ||
| 54 | MemorySSAUpdater *MSSAU, bool PreserveLCSSA); | ||
| 55 | |||
| 56 | /// Ensure that all exit blocks of the loop are dedicated exits. | ||
| 57 | /// | ||
| 58 | /// For any loop exit block with non-loop predecessors, we split the loop | ||
| 59 | /// predecessors to use a dedicated loop exit block. We update the dominator | ||
| 60 | /// tree and loop info if provided, and will preserve LCSSA if requested. | ||
| 61 | bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, | ||
| 62 | MemorySSAUpdater *MSSAU, bool PreserveLCSSA); | ||
| 63 | |||
| 64 | /// Ensures LCSSA form for every instruction from the Worklist in the scope of | ||
| 65 | /// innermost containing loop. | ||
| 66 | /// | ||
| 67 | /// For the given instruction which have uses outside of the loop, an LCSSA PHI | ||
| 68 | /// node is inserted and the uses outside the loop are rewritten to use this | ||
| 69 | /// node. | ||
| 70 | /// | ||
| 71 | /// LoopInfo and DominatorTree are required and, since the routine makes no | ||
| 72 | /// changes to CFG, preserved. | ||
| 73 | /// | ||
| 74 | /// Returns true if any modifications are made. | ||
| 75 | /// | ||
| 76 | /// This function may introduce unused PHI nodes. If \p PHIsToRemove is not | ||
| 77 | /// nullptr, those are added to it (before removing, the caller has to check if | ||
| 78 | /// they still do not have any uses). Otherwise the PHIs are directly removed. | ||
| 79 | bool formLCSSAForInstructions( | ||
| 80 | SmallVectorImpl<Instruction *> &Worklist, const DominatorTree &DT, | ||
| 81 | const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder, | ||
| 82 | SmallVectorImpl<PHINode *> *PHIsToRemove = nullptr); | ||
| 83 | |||
| 84 | /// Put loop into LCSSA form. | ||
| 85 | /// | ||
| 86 | /// Looks at all instructions in the loop which have uses outside of the | ||
| 87 | /// current loop. For each, an LCSSA PHI node is inserted and the uses outside | ||
| 88 | /// the loop are rewritten to use this node. Sub-loops must be in LCSSA form | ||
| 89 | /// already. | ||
| 90 | /// | ||
| 91 | /// LoopInfo and DominatorTree are required and preserved. | ||
| 92 | /// | ||
| 93 | /// If ScalarEvolution is passed in, it will be preserved. | ||
| 94 | /// | ||
| 95 | /// Returns true if any modifications are made to the loop. | ||
| 96 | bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, | ||
| 97 | ScalarEvolution *SE); | ||
| 98 | |||
| 99 | /// Put a loop nest into LCSSA form. | ||
| 100 | /// | ||
| 101 | /// This recursively forms LCSSA for a loop nest. | ||
| 102 | /// | ||
| 103 | /// LoopInfo and DominatorTree are required and preserved. | ||
| 104 | /// | ||
| 105 | /// If ScalarEvolution is passed in, it will be preserved. | ||
| 106 | /// | ||
| 107 | /// Returns true if any modifications are made to the loop. | ||
| 108 | bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, | ||
| 109 | ScalarEvolution *SE); | ||
| 110 | |||
| 111 | /// Flags controlling how much is checked when sinking or hoisting | ||
| 112 | /// instructions.  The number of memory access in the loop (and whether there | ||
| 113 | /// are too many) is determined in the constructors when using MemorySSA. | ||
| 114 | class SinkAndHoistLICMFlags { | ||
| 115 | public: | ||
| 116 |   // Explicitly set limits. | ||
| 117 | SinkAndHoistLICMFlags(unsigned LicmMssaOptCap, | ||
| 118 | unsigned LicmMssaNoAccForPromotionCap, bool IsSink, | ||
| 119 | Loop *L = nullptr, MemorySSA *MSSA = nullptr); | ||
| 120 |   // Use default limits. | ||
| 121 | SinkAndHoistLICMFlags(bool IsSink, Loop *L = nullptr, | ||
| 122 | MemorySSA *MSSA = nullptr); | ||
| 123 | |||
| 124 | void setIsSink(bool B) { IsSink = B; } | ||
| 125 | bool getIsSink() { return IsSink; } | ||
| 126 | bool tooManyMemoryAccesses() { return NoOfMemAccTooLarge; } | ||
| 127 | bool tooManyClobberingCalls() { return LicmMssaOptCounter >= LicmMssaOptCap; } | ||
| 128 | void incrementClobberingCalls() { ++LicmMssaOptCounter; } | ||
| 129 | |||
| 130 | protected: | ||
| 131 | bool NoOfMemAccTooLarge = false; | ||
| 132 | unsigned LicmMssaOptCounter = 0; | ||
| 133 | unsigned LicmMssaOptCap; | ||
| 134 | unsigned LicmMssaNoAccForPromotionCap; | ||
| 135 | bool IsSink; | ||
| 136 | }; | ||
| 137 | |||
| 138 | /// Walk the specified region of the CFG (defined by all blocks | ||
| 139 | /// dominated by the specified block, and that are in the current loop) in | ||
| 140 | /// reverse depth first order w.r.t the DominatorTree. This allows us to visit | ||
| 141 | /// uses before definitions, allowing us to sink a loop body in one pass without | ||
| 142 | /// iteration. Takes DomTreeNode, AAResults, LoopInfo, DominatorTree, | ||
| 143 | /// TargetLibraryInfo, Loop, AliasSet information for all | ||
| 144 | /// instructions of the loop and loop safety information as | ||
| 145 | /// arguments. Diagnostics is emitted via \p ORE. It returns changed status. | ||
| 146 | /// \p CurLoop is a loop to do sinking on. \p OutermostLoop is used only when | ||
| 147 | /// this function is called by \p sinkRegionForLoopNest. | ||
| 148 | bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, | ||
| 149 | TargetLibraryInfo *, TargetTransformInfo *, Loop *CurLoop, | ||
| 150 | MemorySSAUpdater &, ICFLoopSafetyInfo *, | ||
| 151 | SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, | ||
| 152 | Loop *OutermostLoop = nullptr); | ||
| 153 | |||
| 154 | /// Call sinkRegion on loops contained within the specified loop | ||
| 155 | /// in order from innermost to outermost. | ||
| 156 | bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *, | ||
| 157 | DominatorTree *, TargetLibraryInfo *, | ||
| 158 | TargetTransformInfo *, Loop *, MemorySSAUpdater &, | ||
| 159 | ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, | ||
| 160 | OptimizationRemarkEmitter *); | ||
| 161 | |||
| 162 | /// Walk the specified region of the CFG (defined by all blocks | ||
| 163 | /// dominated by the specified block, and that are in the current loop) in depth | ||
| 164 | /// first order w.r.t the DominatorTree.  This allows us to visit definitions | ||
| 165 | /// before uses, allowing us to hoist a loop body in one pass without iteration. | ||
| 166 | /// Takes DomTreeNode, AAResults, LoopInfo, DominatorTree, | ||
| 167 | /// TargetLibraryInfo, Loop, AliasSet information for all | ||
| 168 | /// instructions of the loop and loop safety information as arguments. | ||
| 169 | /// Diagnostics is emitted via \p ORE. It returns changed status. | ||
| 170 | /// \p AllowSpeculation is whether values should be hoisted even if they are not | ||
| 171 | /// guaranteed to execute in the loop, but are safe to speculatively execute. | ||
| 172 | bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, | ||
| 173 | AssumptionCache *, TargetLibraryInfo *, Loop *, | ||
| 174 | MemorySSAUpdater &, ScalarEvolution *, ICFLoopSafetyInfo *, | ||
| 175 | SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool, | ||
| 176 | bool AllowSpeculation); | ||
| 177 | |||
| 178 | /// This function deletes dead loops. The caller of this function needs to | ||
| 179 | /// guarantee that the loop is infact dead. | ||
| 180 | /// The function requires a bunch or prerequisites to be present: | ||
| 181 | ///   - The loop needs to be in LCSSA form | ||
| 182 | ///   - The loop needs to have a Preheader | ||
| 183 | ///   - A unique dedicated exit block must exist | ||
| 184 | /// | ||
| 185 | /// This also updates the relevant analysis information in \p DT, \p SE, \p LI | ||
| 186 | /// and \p MSSA if pointers to those are provided. | ||
| 187 | /// It also updates the loop PM if an updater struct is provided. | ||
| 188 | |||
| 189 | void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, | ||
| 190 | LoopInfo *LI, MemorySSA *MSSA = nullptr); | ||
| 191 | |||
| 192 | /// Remove the backedge of the specified loop.  Handles loop nests and general | ||
| 193 | /// loop structures subject to the precondition that the loop has no parent | ||
| 194 | /// loop and has a single latch block.  Preserves all listed analyses. | ||
| 195 | void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, | ||
| 196 | LoopInfo &LI, MemorySSA *MSSA); | ||
| 197 | |||
| 198 | /// Try to promote memory values to scalars by sinking stores out of | ||
| 199 | /// the loop and moving loads to before the loop.  We do this by looping over | ||
| 200 | /// the stores in the loop, looking for stores to Must pointers which are | ||
| 201 | /// loop invariant. It takes a set of must-alias values, Loop exit blocks | ||
| 202 | /// vector, loop exit blocks insertion point vector, PredIteratorCache, | ||
| 203 | /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions | ||
| 204 | /// of the loop and loop safety information as arguments. | ||
| 205 | /// Diagnostics is emitted via \p ORE. It returns changed status. | ||
| 206 | /// \p AllowSpeculation is whether values should be hoisted even if they are not | ||
| 207 | /// guaranteed to execute in the loop, but are safe to speculatively execute. | ||
| 208 | bool promoteLoopAccessesToScalars( | ||
| 209 | const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &, | ||
| 210 | SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &, | ||
| 211 | PredIteratorCache &, LoopInfo *, DominatorTree *, AssumptionCache *AC, | ||
| 212 | const TargetLibraryInfo *, TargetTransformInfo *, Loop *, | ||
| 213 | MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *, | ||
| 214 | bool AllowSpeculation, bool HasReadsOutsideSet); | ||
| 215 | |||
| 216 | /// Does a BFS from a given node to all of its children inside a given loop. | ||
| 217 | /// The returned vector of nodes includes the starting point. | ||
| 218 | SmallVector<DomTreeNode *, 16> collectChildrenInLoop(DomTreeNode *N, | ||
| 219 | const Loop *CurLoop); | ||
| 220 | |||
| 221 | /// Returns the instructions that use values defined in the loop. | ||
| 222 | SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L); | ||
| 223 | |||
| 224 | /// Find a combination of metadata ("llvm.loop.vectorize.width" and | ||
| 225 | /// "llvm.loop.vectorize.scalable.enable") for a loop and use it to construct a | ||
| 226 | /// ElementCount. If the metadata "llvm.loop.vectorize.width" cannot be found | ||
| 227 | /// then std::nullopt is returned. | ||
| 228 | std::optional<ElementCount> | ||
| 229 | getOptionalElementCountLoopAttribute(const Loop *TheLoop); | ||
| 230 | |||
| 231 | /// Create a new loop identifier for a loop created from a loop transformation. | ||
| 232 | /// | ||
| 233 | /// @param OrigLoopID The loop ID of the loop before the transformation. | ||
| 234 | /// @param FollowupAttrs List of attribute names that contain attributes to be | ||
| 235 | ///                      added to the new loop ID. | ||
| 236 | /// @param InheritOptionsAttrsPrefix Selects which attributes should be inherited | ||
| 237 | ///                                  from the original loop. The following values | ||
| 238 | ///                                  are considered: | ||
| 239 | ///        nullptr   : Inherit all attributes from @p OrigLoopID. | ||
| 240 | ///        ""        : Do not inherit any attribute from @p OrigLoopID; only use | ||
| 241 | ///                    those specified by a followup attribute. | ||
| 242 | ///        "<prefix>": Inherit all attributes except those which start with | ||
| 243 | ///                    <prefix>; commonly used to remove metadata for the | ||
| 244 | ///                    applied transformation. | ||
| 245 | /// @param AlwaysNew If true, do not try to reuse OrigLoopID and never return | ||
| 246 | ///                  std::nullopt. | ||
| 247 | /// | ||
| 248 | /// @return The loop ID for the after-transformation loop. The following values | ||
| 249 | ///         can be returned: | ||
| 250 | ///         std::nullopt : No followup attribute was found; it is up to the | ||
| 251 | ///                        transformation to choose attributes that make sense. | ||
| 252 | ///         @p OrigLoopID: The original identifier can be reused. | ||
| 253 | ///         nullptr      : The new loop has no attributes. | ||
| 254 | ///         MDNode*      : A new unique loop identifier. | ||
| 255 | std::optional<MDNode *> | ||
| 256 | makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef<StringRef> FollowupAttrs, | ||
| 257 | const char *InheritOptionsAttrsPrefix = "", | ||
| 258 | bool AlwaysNew = false); | ||
| 259 | |||
| 260 | /// Look for the loop attribute that disables all transformation heuristic. | ||
| 261 | bool hasDisableAllTransformsHint(const Loop *L); | ||
| 262 | |||
| 263 | /// Look for the loop attribute that disables the LICM transformation heuristics. | ||
| 264 | bool hasDisableLICMTransformsHint(const Loop *L); | ||
| 265 | |||
| 266 | /// The mode sets how eager a transformation should be applied. | ||
| 267 | enum TransformationMode { | ||
| 268 |   /// The pass can use heuristics to determine whether a transformation should | ||
| 269 |   /// be applied. | ||
| 270 | TM_Unspecified, | ||
| 271 | |||
| 272 |   /// The transformation should be applied without considering a cost model. | ||
| 273 | TM_Enable, | ||
| 274 | |||
| 275 |   /// The transformation should not be applied. | ||
| 276 | TM_Disable, | ||
| 277 | |||
| 278 |   /// Force is a flag and should not be used alone. | ||
| 279 | TM_Force = 0x04, | ||
| 280 | |||
| 281 |   /// The transformation was directed by the user, e.g. by a #pragma in | ||
| 282 |   /// the source code. If the transformation could not be applied, a | ||
| 283 |   /// warning should be emitted. | ||
| 284 | TM_ForcedByUser = TM_Enable | TM_Force, | ||
| 285 | |||
| 286 |   /// The transformation must not be applied. For instance, `#pragma clang loop | ||
| 287 |   /// unroll(disable)` explicitly forbids any unrolling to take place. Unlike | ||
| 288 |   /// general loop metadata, it must not be dropped. Most passes should not | ||
| 289 |   /// behave differently under TM_Disable and TM_SuppressedByUser. | ||
| 290 | TM_SuppressedByUser = TM_Disable | TM_Force | ||
| 291 | }; | ||
| 292 | |||
| 293 | /// @{ | ||
| 294 | /// Get the mode for LLVM's supported loop transformations. | ||
| 295 | TransformationMode hasUnrollTransformation(const Loop *L); | ||
| 296 | TransformationMode hasUnrollAndJamTransformation(const Loop *L); | ||
| 297 | TransformationMode hasVectorizeTransformation(const Loop *L); | ||
| 298 | TransformationMode hasDistributeTransformation(const Loop *L); | ||
| 299 | TransformationMode hasLICMVersioningTransformation(const Loop *L); | ||
| 300 | /// @} | ||
| 301 | |||
| 302 | /// Set input string into loop metadata by keeping other values intact. | ||
| 303 | /// If the string is already in loop metadata update value if it is | ||
| 304 | /// different. | ||
| 305 | void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, | ||
| 306 | unsigned V = 0); | ||
| 307 | |||
| 308 | /// Returns a loop's estimated trip count based on branch weight metadata. | ||
| 309 | /// In addition if \p EstimatedLoopInvocationWeight is not null it is | ||
| 310 | /// initialized with weight of loop's latch leading to the exit. | ||
| 311 | /// Returns 0 when the count is estimated to be 0, or std::nullopt when a | ||
| 312 | /// meaningful estimate can not be made. | ||
| 313 | std::optional<unsigned> | ||
| 314 | getLoopEstimatedTripCount(Loop *L, | ||
| 315 | unsigned *EstimatedLoopInvocationWeight = nullptr); | ||
| 316 | |||
| 317 | /// Set a loop's branch weight metadata to reflect that loop has \p | ||
| 318 | /// EstimatedTripCount iterations and \p EstimatedLoopInvocationWeight exits | ||
| 319 | /// through latch. Returns true if metadata is successfully updated, false | ||
| 320 | /// otherwise. Note that loop must have a latch block which controls loop exit | ||
| 321 | /// in order to succeed. | ||
| 322 | bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, | ||
| 323 | unsigned EstimatedLoopInvocationWeight); | ||
| 324 | |||
| 325 | /// Check inner loop (L) backedge count is known to be invariant on all | ||
| 326 | /// iterations of its outer loop. If the loop has no parent, this is trivially | ||
| 327 | /// true. | ||
| 328 | bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE); | ||
| 329 | |||
| 330 | /// Helper to consistently add the set of standard passes to a loop pass's \c | ||
| 331 | /// AnalysisUsage. | ||
| 332 | /// | ||
| 333 | /// All loop passes should call this as part of implementing their \c | ||
| 334 | /// getAnalysisUsage. | ||
| 335 | void getLoopAnalysisUsage(AnalysisUsage &AU); | ||
| 336 | |||
| 337 | /// Returns true if is legal to hoist or sink this instruction disregarding the | ||
| 338 | /// possible introduction of faults.  Reasoning about potential faulting | ||
| 339 | /// instructions is the responsibility of the caller since it is challenging to | ||
| 340 | /// do efficiently from within this routine. | ||
| 341 | /// \p TargetExecutesOncePerLoop is true only when it is guaranteed that the | ||
| 342 | /// target executes at most once per execution of the loop body.  This is used | ||
| 343 | /// to assess the legality of duplicating atomic loads.  Generally, this is | ||
| 344 | /// true when moving out of loop and not true when moving into loops. | ||
| 345 | /// If \p ORE is set use it to emit optimization remarks. | ||
| 346 | bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, | ||
| 347 | Loop *CurLoop, MemorySSAUpdater &MSSAU, | ||
| 348 |                         bool TargetExecutesOncePerLoop, | ||
| 349 |                         SinkAndHoistLICMFlags &LICMFlags, | ||
| 350 | OptimizationRemarkEmitter *ORE = nullptr); | ||
| 351 | |||
| 352 | /// Returns the comparison predicate used when expanding a min/max reduction. | ||
| 353 | CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK); | ||
| 354 | |||
| 355 | /// See RecurrenceDescriptor::isSelectCmpPattern for a description of the | ||
| 356 | /// pattern we are trying to match. In this pattern we are only ever selecting | ||
| 357 | /// between two values: 1) an initial PHI start value, and 2) a loop invariant | ||
| 358 | /// value. This function uses \p LoopExitInst to determine 2), which we then use | ||
| 359 | /// to select between \p Left and \p Right. Any lane value in \p Left that | ||
| 360 | /// matches 2) will be merged into \p Right. | ||
| 361 | Value *createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK, | ||
| 362 | Value *Left, Value *Right); | ||
| 363 | |||
| 364 | /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. | ||
| 365 | /// The Builder's fast-math-flags must be set to propagate the expected values. | ||
| 366 | Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, | ||
| 367 | Value *Right); | ||
| 368 | |||
| 369 | /// Generates an ordered vector reduction using extracts to reduce the value. | ||
| 370 | Value *getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, | ||
| 371 | unsigned Op, RecurKind MinMaxKind = RecurKind::None); | ||
| 372 | |||
| 373 | /// Generates a vector reduction using shufflevectors to reduce the value. | ||
| 374 | /// Fast-math-flags are propagated using the IRBuilder's setting. | ||
| 375 | Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, | ||
| 376 | RecurKind MinMaxKind = RecurKind::None); | ||
| 377 | |||
| 378 | /// Create a target reduction of the given vector. The reduction operation | ||
| 379 | /// is described by the \p Opcode parameter. min/max reductions require | ||
| 380 | /// additional information supplied in \p RdxKind. | ||
| 381 | /// The target is queried to determine if intrinsics or shuffle sequences are | ||
| 382 | /// required to implement the reduction. | ||
| 383 | /// Fast-math-flags are propagated using the IRBuilder's setting. | ||
| 384 | Value *createSimpleTargetReduction(IRBuilderBase &B, | ||
| 385 | const TargetTransformInfo *TTI, Value *Src, | ||
| 386 | RecurKind RdxKind); | ||
| 387 | |||
| 388 | /// Create a target reduction of the given vector \p Src for a reduction of the | ||
| 389 | /// kind RecurKind::SelectICmp or RecurKind::SelectFCmp. The reduction operation | ||
| 390 | /// is described by \p Desc. | ||
| 391 | Value *createSelectCmpTargetReduction(IRBuilderBase &B, | ||
| 392 | const TargetTransformInfo *TTI, | ||
| 393 |                                       Value *Src, | ||
| 394 | const RecurrenceDescriptor &Desc, | ||
| 395 | PHINode *OrigPhi); | ||
| 396 | |||
| 397 | /// Create a generic target reduction using a recurrence descriptor \p Desc | ||
| 398 | /// The target is queried to determine if intrinsics or shuffle sequences are | ||
| 399 | /// required to implement the reduction. | ||
| 400 | /// Fast-math-flags are propagated using the RecurrenceDescriptor. | ||
| 401 | Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, | ||
| 402 | const RecurrenceDescriptor &Desc, Value *Src, | ||
| 403 | PHINode *OrigPhi = nullptr); | ||
| 404 | |||
| 405 | /// Create an ordered reduction intrinsic using the given recurrence | ||
| 406 | /// descriptor \p Desc. | ||
| 407 | Value *createOrderedReduction(IRBuilderBase &B, | ||
| 408 | const RecurrenceDescriptor &Desc, Value *Src, | ||
| 409 | Value *Start); | ||
| 410 | |||
| 411 | /// Get the intersection (logical and) of all of the potential IR flags | ||
| 412 | /// of each scalar operation (VL) that will be converted into a vector (I). | ||
| 413 | /// If OpValue is non-null, we only consider operations similar to OpValue | ||
| 414 | /// when intersecting. | ||
| 415 | /// Flag set: NSW, NUW (if IncludeWrapFlags is true), exact, and all of | ||
| 416 | /// fast-math. | ||
| 417 | void propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue = nullptr, | ||
| 418 | bool IncludeWrapFlags = true); | ||
| 419 | |||
| 420 | /// Returns true if we can prove that \p S is defined and always negative in | ||
| 421 | /// loop \p L. | ||
| 422 | bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE); | ||
| 423 | |||
| 424 | /// Returns true if we can prove that \p S is defined and always non-negative in | ||
| 425 | /// loop \p L. | ||
| 426 | bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, | ||
| 427 | ScalarEvolution &SE); | ||
| 428 | |||
| 429 | /// Returns true if \p S is defined and never is equal to signed/unsigned max. | ||
| 430 | bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, | ||
| 431 | bool Signed); | ||
| 432 | |||
| 433 | /// Returns true if \p S is defined and never is equal to signed/unsigned min. | ||
| 434 | bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, | ||
| 435 | bool Signed); | ||
| 436 | |||
| 437 | enum ReplaceExitVal { | ||
| 438 | NeverRepl, | ||
| 439 | OnlyCheapRepl, | ||
| 440 | NoHardUse, | ||
| 441 | UnusedIndVarInLoop, | ||
| 442 | AlwaysRepl | ||
| 443 | }; | ||
| 444 | |||
| 445 | /// If the final value of any expressions that are recurrent in the loop can | ||
| 446 | /// be computed, substitute the exit values from the loop into any instructions | ||
| 447 | /// outside of the loop that use the final values of the current expressions. | ||
| 448 | /// Return the number of loop exit values that have been replaced, and the | ||
| 449 | /// corresponding phi node will be added to DeadInsts. | ||
| 450 | int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, | ||
| 451 | ScalarEvolution *SE, const TargetTransformInfo *TTI, | ||
| 452 | SCEVExpander &Rewriter, DominatorTree *DT, | ||
| 453 | ReplaceExitVal ReplaceExitValue, | ||
| 454 | SmallVector<WeakTrackingVH, 16> &DeadInsts); | ||
| 455 | |||
| 456 | /// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for | ||
| 457 | /// \p OrigLoop and the following distribution of \p OrigLoop iteration among \p | ||
| 458 | /// UnrolledLoop and \p RemainderLoop. \p UnrolledLoop receives weights that | ||
| 459 | /// reflect TC/UF iterations, and \p RemainderLoop receives weights that reflect | ||
| 460 | /// the remaining TC%UF iterations. | ||
| 461 | /// | ||
| 462 | /// Note that \p OrigLoop may be equal to either \p UnrolledLoop or \p | ||
| 463 | /// RemainderLoop in which case weights for \p OrigLoop are updated accordingly. | ||
| 464 | /// Note also behavior is undefined if \p UnrolledLoop and \p RemainderLoop are | ||
| 465 | /// equal. \p UF must be greater than zero. | ||
| 466 | /// If \p OrigLoop has no profile info associated nothing happens. | ||
| 467 | /// | ||
| 468 | /// This utility may be useful for such optimizations as unroller and | ||
| 469 | /// vectorizer as it's typical transformation for them. | ||
| 470 | void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop, | ||
| 471 | Loop *RemainderLoop, uint64_t UF); | ||
| 472 | |||
| 473 | /// Utility that implements appending of loops onto a worklist given a range. | ||
| 474 | /// We want to process loops in postorder, but the worklist is a LIFO data | ||
| 475 | /// structure, so we append to it in *reverse* postorder. | ||
| 476 | /// For trees, a preorder traversal is a viable reverse postorder, so we | ||
| 477 | /// actually append using a preorder walk algorithm. | ||
| 478 | template <typename RangeT> | ||
| 479 | void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist<Loop *, 4> &); | ||
| 480 | /// Utility that implements appending of loops onto a worklist given a range. | ||
| 481 | /// It has the same behavior as appendLoopsToWorklist, but assumes the range of | ||
| 482 | /// loops has already been reversed, so it processes loops in the given order. | ||
| 483 | template <typename RangeT> | ||
| 484 | void appendReversedLoopsToWorklist(RangeT &&, | ||
| 485 | SmallPriorityWorklist<Loop *, 4> &); | ||
| 486 | |||
| 487 | /// Utility that implements appending of loops onto a worklist given LoopInfo. | ||
| 488 | /// Calls the templated utility taking a Range of loops, handing it the Loops | ||
| 489 | /// in LoopInfo, iterated in reverse. This is because the loops are stored in | ||
| 490 | /// RPO w.r.t. the control flow graph in LoopInfo. For the purpose of unrolling, | ||
| 491 | /// loop deletion, and LICM, we largely want to work forward across the CFG so | ||
| 492 | /// that we visit defs before uses and can propagate simplifications from one | ||
| 493 | /// loop nest into the next. Calls appendReversedLoopsToWorklist with the | ||
| 494 | /// already reversed loops in LI. | ||
| 495 | /// FIXME: Consider changing the order in LoopInfo. | ||
| 496 | void appendLoopsToWorklist(LoopInfo &, SmallPriorityWorklist<Loop *, 4> &); | ||
| 497 | |||
| 498 | /// Recursively clone the specified loop and all of its children, | ||
| 499 | /// mapping the blocks with the specified map. | ||
| 500 | Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, | ||
| 501 | LoopInfo *LI, LPPassManager *LPM); | ||
| 502 | |||
| 503 | /// Add code that checks at runtime if the accessed arrays in \p PointerChecks | ||
| 504 | /// overlap. Returns the final comparator value or NULL if no check is needed. | ||
| 505 | Value * | ||
| 506 | addRuntimeChecks(Instruction *Loc, Loop *TheLoop, | ||
| 507 | const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, | ||
| 508 | SCEVExpander &Expander); | ||
| 509 | |||
| 510 | Value *addDiffRuntimeChecks( | ||
| 511 | Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander, | ||
| 512 | function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC); | ||
| 513 | |||
| 514 | /// Struct to hold information about a partially invariant condition. | ||
| 515 | struct IVConditionInfo { | ||
| 516 |   /// Instructions that need to be duplicated and checked for the unswitching | ||
| 517 |   /// condition. | ||
| 518 | SmallVector<Instruction *> InstToDuplicate; | ||
| 519 | |||
| 520 |   /// Constant to indicate for which value the condition is invariant. | ||
| 521 | Constant *KnownValue = nullptr; | ||
| 522 | |||
| 523 |   /// True if the partially invariant path is no-op (=does not have any | ||
| 524 |   /// side-effects and no loop value is used outside the loop). | ||
| 525 | bool PathIsNoop = true; | ||
| 526 | |||
| 527 |   /// If the partially invariant path reaches a single exit block, ExitForPath | ||
| 528 |   /// is set to that block. Otherwise it is nullptr. | ||
| 529 | BasicBlock *ExitForPath = nullptr; | ||
| 530 | }; | ||
| 531 | |||
| 532 | /// Check if the loop header has a conditional branch that is not | ||
| 533 | /// loop-invariant, because it involves load instructions. If all paths from | ||
| 534 | /// either the true or false successor to the header or loop exists do not | ||
| 535 | /// modify the memory feeding the condition, perform 'partial unswitching'. That | ||
| 536 | /// is, duplicate the instructions feeding the condition in the pre-header. Then | ||
| 537 | /// unswitch on the duplicated condition. The condition is now known in the | ||
| 538 | /// unswitched version for the 'invariant' path through the original loop. | ||
| 539 | /// | ||
| 540 | /// If the branch condition of the header is partially invariant, return a pair | ||
| 541 | /// containing the instructions to duplicate and a boolean Constant to update | ||
| 542 | /// the condition in the loops created for the true or false successors. | ||
| 543 | std::optional<IVConditionInfo> hasPartialIVCondition(const Loop &L, | ||
| 544 |                                                      unsigned MSSAThreshold, | ||
| 545 | const MemorySSA &MSSA, | ||
| 546 | AAResults &AA); | ||
| 547 | |||
| 548 | } // end namespace llvm | ||
| 549 | |||
| 550 | #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H |