Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===- llvm/Transforms/Utils/LoopUtils.h - Loop utilities -------*- C++ -*-===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // |
||
9 | // This file defines some loop transformation utilities. |
||
10 | // |
||
11 | //===----------------------------------------------------------------------===// |
||
12 | |||
13 | #ifndef LLVM_TRANSFORMS_UTILS_LOOPUTILS_H |
||
14 | #define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H |
||
15 | |||
16 | #include "llvm/Analysis/IVDescriptors.h" |
||
17 | #include "llvm/Analysis/LoopAccessAnalysis.h" |
||
18 | #include "llvm/Transforms/Utils/ValueMapper.h" |
||
19 | |||
20 | namespace llvm { |
||
21 | |||
22 | template <typename T> class DomTreeNodeBase; |
||
23 | using DomTreeNode = DomTreeNodeBase<BasicBlock>; |
||
24 | class AssumptionCache; |
||
25 | class StringRef; |
||
26 | class AnalysisUsage; |
||
27 | class TargetTransformInfo; |
||
28 | class AAResults; |
||
29 | class BasicBlock; |
||
30 | class ICFLoopSafetyInfo; |
||
31 | class IRBuilderBase; |
||
32 | class Loop; |
||
33 | class LoopInfo; |
||
34 | class MemoryAccess; |
||
35 | class MemorySSA; |
||
36 | class MemorySSAUpdater; |
||
37 | class OptimizationRemarkEmitter; |
||
38 | class PredIteratorCache; |
||
39 | class ScalarEvolution; |
||
40 | class SCEV; |
||
41 | class SCEVExpander; |
||
42 | class TargetLibraryInfo; |
||
43 | class LPPassManager; |
||
44 | class Instruction; |
||
45 | struct RuntimeCheckingPtrGroup; |
||
46 | typedef std::pair<const RuntimeCheckingPtrGroup *, |
||
47 | const RuntimeCheckingPtrGroup *> |
||
48 | RuntimePointerCheck; |
||
49 | |||
50 | template <typename T, unsigned N> class SmallSetVector; |
||
51 | template <typename T, unsigned N> class SmallPriorityWorklist; |
||
52 | |||
53 | BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, |
||
54 | MemorySSAUpdater *MSSAU, bool PreserveLCSSA); |
||
55 | |||
56 | /// Ensure that all exit blocks of the loop are dedicated exits. |
||
57 | /// |
||
58 | /// For any loop exit block with non-loop predecessors, we split the loop |
||
59 | /// predecessors to use a dedicated loop exit block. We update the dominator |
||
60 | /// tree and loop info if provided, and will preserve LCSSA if requested. |
||
61 | bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, |
||
62 | MemorySSAUpdater *MSSAU, bool PreserveLCSSA); |
||
63 | |||
64 | /// Ensures LCSSA form for every instruction from the Worklist in the scope of |
||
65 | /// innermost containing loop. |
||
66 | /// |
||
67 | /// For the given instruction which have uses outside of the loop, an LCSSA PHI |
||
68 | /// node is inserted and the uses outside the loop are rewritten to use this |
||
69 | /// node. |
||
70 | /// |
||
71 | /// LoopInfo and DominatorTree are required and, since the routine makes no |
||
72 | /// changes to CFG, preserved. |
||
73 | /// |
||
74 | /// Returns true if any modifications are made. |
||
75 | /// |
||
76 | /// This function may introduce unused PHI nodes. If \p PHIsToRemove is not |
||
77 | /// nullptr, those are added to it (before removing, the caller has to check if |
||
78 | /// they still do not have any uses). Otherwise the PHIs are directly removed. |
||
79 | bool formLCSSAForInstructions( |
||
80 | SmallVectorImpl<Instruction *> &Worklist, const DominatorTree &DT, |
||
81 | const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder, |
||
82 | SmallVectorImpl<PHINode *> *PHIsToRemove = nullptr); |
||
83 | |||
84 | /// Put loop into LCSSA form. |
||
85 | /// |
||
86 | /// Looks at all instructions in the loop which have uses outside of the |
||
87 | /// current loop. For each, an LCSSA PHI node is inserted and the uses outside |
||
88 | /// the loop are rewritten to use this node. Sub-loops must be in LCSSA form |
||
89 | /// already. |
||
90 | /// |
||
91 | /// LoopInfo and DominatorTree are required and preserved. |
||
92 | /// |
||
93 | /// If ScalarEvolution is passed in, it will be preserved. |
||
94 | /// |
||
95 | /// Returns true if any modifications are made to the loop. |
||
96 | bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, |
||
97 | ScalarEvolution *SE); |
||
98 | |||
99 | /// Put a loop nest into LCSSA form. |
||
100 | /// |
||
101 | /// This recursively forms LCSSA for a loop nest. |
||
102 | /// |
||
103 | /// LoopInfo and DominatorTree are required and preserved. |
||
104 | /// |
||
105 | /// If ScalarEvolution is passed in, it will be preserved. |
||
106 | /// |
||
107 | /// Returns true if any modifications are made to the loop. |
||
108 | bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, |
||
109 | ScalarEvolution *SE); |
||
110 | |||
111 | /// Flags controlling how much is checked when sinking or hoisting |
||
112 | /// instructions. The number of memory access in the loop (and whether there |
||
113 | /// are too many) is determined in the constructors when using MemorySSA. |
||
114 | class SinkAndHoistLICMFlags { |
||
115 | public: |
||
116 | // Explicitly set limits. |
||
117 | SinkAndHoistLICMFlags(unsigned LicmMssaOptCap, |
||
118 | unsigned LicmMssaNoAccForPromotionCap, bool IsSink, |
||
119 | Loop *L = nullptr, MemorySSA *MSSA = nullptr); |
||
120 | // Use default limits. |
||
121 | SinkAndHoistLICMFlags(bool IsSink, Loop *L = nullptr, |
||
122 | MemorySSA *MSSA = nullptr); |
||
123 | |||
124 | void setIsSink(bool B) { IsSink = B; } |
||
125 | bool getIsSink() { return IsSink; } |
||
126 | bool tooManyMemoryAccesses() { return NoOfMemAccTooLarge; } |
||
127 | bool tooManyClobberingCalls() { return LicmMssaOptCounter >= LicmMssaOptCap; } |
||
128 | void incrementClobberingCalls() { ++LicmMssaOptCounter; } |
||
129 | |||
130 | protected: |
||
131 | bool NoOfMemAccTooLarge = false; |
||
132 | unsigned LicmMssaOptCounter = 0; |
||
133 | unsigned LicmMssaOptCap; |
||
134 | unsigned LicmMssaNoAccForPromotionCap; |
||
135 | bool IsSink; |
||
136 | }; |
||
137 | |||
138 | /// Walk the specified region of the CFG (defined by all blocks |
||
139 | /// dominated by the specified block, and that are in the current loop) in |
||
140 | /// reverse depth first order w.r.t the DominatorTree. This allows us to visit |
||
141 | /// uses before definitions, allowing us to sink a loop body in one pass without |
||
142 | /// iteration. Takes DomTreeNode, AAResults, LoopInfo, DominatorTree, |
||
143 | /// TargetLibraryInfo, Loop, AliasSet information for all |
||
144 | /// instructions of the loop and loop safety information as |
||
145 | /// arguments. Diagnostics is emitted via \p ORE. It returns changed status. |
||
146 | /// \p CurLoop is a loop to do sinking on. \p OutermostLoop is used only when |
||
147 | /// this function is called by \p sinkRegionForLoopNest. |
||
148 | bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, |
||
149 | TargetLibraryInfo *, TargetTransformInfo *, Loop *CurLoop, |
||
150 | MemorySSAUpdater &, ICFLoopSafetyInfo *, |
||
151 | SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, |
||
152 | Loop *OutermostLoop = nullptr); |
||
153 | |||
154 | /// Call sinkRegion on loops contained within the specified loop |
||
155 | /// in order from innermost to outermost. |
||
156 | bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *, |
||
157 | DominatorTree *, TargetLibraryInfo *, |
||
158 | TargetTransformInfo *, Loop *, MemorySSAUpdater &, |
||
159 | ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, |
||
160 | OptimizationRemarkEmitter *); |
||
161 | |||
162 | /// Walk the specified region of the CFG (defined by all blocks |
||
163 | /// dominated by the specified block, and that are in the current loop) in depth |
||
164 | /// first order w.r.t the DominatorTree. This allows us to visit definitions |
||
165 | /// before uses, allowing us to hoist a loop body in one pass without iteration. |
||
166 | /// Takes DomTreeNode, AAResults, LoopInfo, DominatorTree, |
||
167 | /// TargetLibraryInfo, Loop, AliasSet information for all |
||
168 | /// instructions of the loop and loop safety information as arguments. |
||
169 | /// Diagnostics is emitted via \p ORE. It returns changed status. |
||
170 | /// \p AllowSpeculation is whether values should be hoisted even if they are not |
||
171 | /// guaranteed to execute in the loop, but are safe to speculatively execute. |
||
172 | bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, |
||
173 | AssumptionCache *, TargetLibraryInfo *, Loop *, |
||
174 | MemorySSAUpdater &, ScalarEvolution *, ICFLoopSafetyInfo *, |
||
175 | SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool, |
||
176 | bool AllowSpeculation); |
||
177 | |||
178 | /// This function deletes dead loops. The caller of this function needs to |
||
179 | /// guarantee that the loop is infact dead. |
||
180 | /// The function requires a bunch or prerequisites to be present: |
||
181 | /// - The loop needs to be in LCSSA form |
||
182 | /// - The loop needs to have a Preheader |
||
183 | /// - A unique dedicated exit block must exist |
||
184 | /// |
||
185 | /// This also updates the relevant analysis information in \p DT, \p SE, \p LI |
||
186 | /// and \p MSSA if pointers to those are provided. |
||
187 | /// It also updates the loop PM if an updater struct is provided. |
||
188 | |||
189 | void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, |
||
190 | LoopInfo *LI, MemorySSA *MSSA = nullptr); |
||
191 | |||
192 | /// Remove the backedge of the specified loop. Handles loop nests and general |
||
193 | /// loop structures subject to the precondition that the loop has no parent |
||
194 | /// loop and has a single latch block. Preserves all listed analyses. |
||
195 | void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, |
||
196 | LoopInfo &LI, MemorySSA *MSSA); |
||
197 | |||
198 | /// Try to promote memory values to scalars by sinking stores out of |
||
199 | /// the loop and moving loads to before the loop. We do this by looping over |
||
200 | /// the stores in the loop, looking for stores to Must pointers which are |
||
201 | /// loop invariant. It takes a set of must-alias values, Loop exit blocks |
||
202 | /// vector, loop exit blocks insertion point vector, PredIteratorCache, |
||
203 | /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions |
||
204 | /// of the loop and loop safety information as arguments. |
||
205 | /// Diagnostics is emitted via \p ORE. It returns changed status. |
||
206 | /// \p AllowSpeculation is whether values should be hoisted even if they are not |
||
207 | /// guaranteed to execute in the loop, but are safe to speculatively execute. |
||
208 | bool promoteLoopAccessesToScalars( |
||
209 | const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &, |
||
210 | SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &, |
||
211 | PredIteratorCache &, LoopInfo *, DominatorTree *, AssumptionCache *AC, |
||
212 | const TargetLibraryInfo *, TargetTransformInfo *, Loop *, |
||
213 | MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *, |
||
214 | bool AllowSpeculation, bool HasReadsOutsideSet); |
||
215 | |||
216 | /// Does a BFS from a given node to all of its children inside a given loop. |
||
217 | /// The returned vector of nodes includes the starting point. |
||
218 | SmallVector<DomTreeNode *, 16> collectChildrenInLoop(DomTreeNode *N, |
||
219 | const Loop *CurLoop); |
||
220 | |||
221 | /// Returns the instructions that use values defined in the loop. |
||
222 | SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L); |
||
223 | |||
224 | /// Find a combination of metadata ("llvm.loop.vectorize.width" and |
||
225 | /// "llvm.loop.vectorize.scalable.enable") for a loop and use it to construct a |
||
226 | /// ElementCount. If the metadata "llvm.loop.vectorize.width" cannot be found |
||
227 | /// then std::nullopt is returned. |
||
228 | std::optional<ElementCount> |
||
229 | getOptionalElementCountLoopAttribute(const Loop *TheLoop); |
||
230 | |||
231 | /// Create a new loop identifier for a loop created from a loop transformation. |
||
232 | /// |
||
233 | /// @param OrigLoopID The loop ID of the loop before the transformation. |
||
234 | /// @param FollowupAttrs List of attribute names that contain attributes to be |
||
235 | /// added to the new loop ID. |
||
236 | /// @param InheritOptionsAttrsPrefix Selects which attributes should be inherited |
||
237 | /// from the original loop. The following values |
||
238 | /// are considered: |
||
239 | /// nullptr : Inherit all attributes from @p OrigLoopID. |
||
240 | /// "" : Do not inherit any attribute from @p OrigLoopID; only use |
||
241 | /// those specified by a followup attribute. |
||
242 | /// "<prefix>": Inherit all attributes except those which start with |
||
243 | /// <prefix>; commonly used to remove metadata for the |
||
244 | /// applied transformation. |
||
245 | /// @param AlwaysNew If true, do not try to reuse OrigLoopID and never return |
||
246 | /// std::nullopt. |
||
247 | /// |
||
248 | /// @return The loop ID for the after-transformation loop. The following values |
||
249 | /// can be returned: |
||
250 | /// std::nullopt : No followup attribute was found; it is up to the |
||
251 | /// transformation to choose attributes that make sense. |
||
252 | /// @p OrigLoopID: The original identifier can be reused. |
||
253 | /// nullptr : The new loop has no attributes. |
||
254 | /// MDNode* : A new unique loop identifier. |
||
255 | std::optional<MDNode *> |
||
256 | makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef<StringRef> FollowupAttrs, |
||
257 | const char *InheritOptionsAttrsPrefix = "", |
||
258 | bool AlwaysNew = false); |
||
259 | |||
260 | /// Look for the loop attribute that disables all transformation heuristic. |
||
261 | bool hasDisableAllTransformsHint(const Loop *L); |
||
262 | |||
263 | /// Look for the loop attribute that disables the LICM transformation heuristics. |
||
264 | bool hasDisableLICMTransformsHint(const Loop *L); |
||
265 | |||
266 | /// The mode sets how eager a transformation should be applied. |
||
267 | enum TransformationMode { |
||
268 | /// The pass can use heuristics to determine whether a transformation should |
||
269 | /// be applied. |
||
270 | TM_Unspecified, |
||
271 | |||
272 | /// The transformation should be applied without considering a cost model. |
||
273 | TM_Enable, |
||
274 | |||
275 | /// The transformation should not be applied. |
||
276 | TM_Disable, |
||
277 | |||
278 | /// Force is a flag and should not be used alone. |
||
279 | TM_Force = 0x04, |
||
280 | |||
281 | /// The transformation was directed by the user, e.g. by a #pragma in |
||
282 | /// the source code. If the transformation could not be applied, a |
||
283 | /// warning should be emitted. |
||
284 | TM_ForcedByUser = TM_Enable | TM_Force, |
||
285 | |||
286 | /// The transformation must not be applied. For instance, `#pragma clang loop |
||
287 | /// unroll(disable)` explicitly forbids any unrolling to take place. Unlike |
||
288 | /// general loop metadata, it must not be dropped. Most passes should not |
||
289 | /// behave differently under TM_Disable and TM_SuppressedByUser. |
||
290 | TM_SuppressedByUser = TM_Disable | TM_Force |
||
291 | }; |
||
292 | |||
293 | /// @{ |
||
294 | /// Get the mode for LLVM's supported loop transformations. |
||
295 | TransformationMode hasUnrollTransformation(const Loop *L); |
||
296 | TransformationMode hasUnrollAndJamTransformation(const Loop *L); |
||
297 | TransformationMode hasVectorizeTransformation(const Loop *L); |
||
298 | TransformationMode hasDistributeTransformation(const Loop *L); |
||
299 | TransformationMode hasLICMVersioningTransformation(const Loop *L); |
||
300 | /// @} |
||
301 | |||
302 | /// Set input string into loop metadata by keeping other values intact. |
||
303 | /// If the string is already in loop metadata update value if it is |
||
304 | /// different. |
||
305 | void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, |
||
306 | unsigned V = 0); |
||
307 | |||
308 | /// Returns a loop's estimated trip count based on branch weight metadata. |
||
309 | /// In addition if \p EstimatedLoopInvocationWeight is not null it is |
||
310 | /// initialized with weight of loop's latch leading to the exit. |
||
311 | /// Returns 0 when the count is estimated to be 0, or std::nullopt when a |
||
312 | /// meaningful estimate can not be made. |
||
313 | std::optional<unsigned> |
||
314 | getLoopEstimatedTripCount(Loop *L, |
||
315 | unsigned *EstimatedLoopInvocationWeight = nullptr); |
||
316 | |||
317 | /// Set a loop's branch weight metadata to reflect that loop has \p |
||
318 | /// EstimatedTripCount iterations and \p EstimatedLoopInvocationWeight exits |
||
319 | /// through latch. Returns true if metadata is successfully updated, false |
||
320 | /// otherwise. Note that loop must have a latch block which controls loop exit |
||
321 | /// in order to succeed. |
||
322 | bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, |
||
323 | unsigned EstimatedLoopInvocationWeight); |
||
324 | |||
325 | /// Check inner loop (L) backedge count is known to be invariant on all |
||
326 | /// iterations of its outer loop. If the loop has no parent, this is trivially |
||
327 | /// true. |
||
328 | bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE); |
||
329 | |||
330 | /// Helper to consistently add the set of standard passes to a loop pass's \c |
||
331 | /// AnalysisUsage. |
||
332 | /// |
||
333 | /// All loop passes should call this as part of implementing their \c |
||
334 | /// getAnalysisUsage. |
||
335 | void getLoopAnalysisUsage(AnalysisUsage &AU); |
||
336 | |||
337 | /// Returns true if is legal to hoist or sink this instruction disregarding the |
||
338 | /// possible introduction of faults. Reasoning about potential faulting |
||
339 | /// instructions is the responsibility of the caller since it is challenging to |
||
340 | /// do efficiently from within this routine. |
||
341 | /// \p TargetExecutesOncePerLoop is true only when it is guaranteed that the |
||
342 | /// target executes at most once per execution of the loop body. This is used |
||
343 | /// to assess the legality of duplicating atomic loads. Generally, this is |
||
344 | /// true when moving out of loop and not true when moving into loops. |
||
345 | /// If \p ORE is set use it to emit optimization remarks. |
||
346 | bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, |
||
347 | Loop *CurLoop, MemorySSAUpdater &MSSAU, |
||
348 | bool TargetExecutesOncePerLoop, |
||
349 | SinkAndHoistLICMFlags &LICMFlags, |
||
350 | OptimizationRemarkEmitter *ORE = nullptr); |
||
351 | |||
352 | /// Returns the comparison predicate used when expanding a min/max reduction. |
||
353 | CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK); |
||
354 | |||
355 | /// See RecurrenceDescriptor::isSelectCmpPattern for a description of the |
||
356 | /// pattern we are trying to match. In this pattern we are only ever selecting |
||
357 | /// between two values: 1) an initial PHI start value, and 2) a loop invariant |
||
358 | /// value. This function uses \p LoopExitInst to determine 2), which we then use |
||
359 | /// to select between \p Left and \p Right. Any lane value in \p Left that |
||
360 | /// matches 2) will be merged into \p Right. |
||
361 | Value *createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK, |
||
362 | Value *Left, Value *Right); |
||
363 | |||
364 | /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. |
||
365 | /// The Builder's fast-math-flags must be set to propagate the expected values. |
||
366 | Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, |
||
367 | Value *Right); |
||
368 | |||
369 | /// Generates an ordered vector reduction using extracts to reduce the value. |
||
370 | Value *getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, |
||
371 | unsigned Op, RecurKind MinMaxKind = RecurKind::None); |
||
372 | |||
373 | /// Generates a vector reduction using shufflevectors to reduce the value. |
||
374 | /// Fast-math-flags are propagated using the IRBuilder's setting. |
||
375 | Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, |
||
376 | RecurKind MinMaxKind = RecurKind::None); |
||
377 | |||
378 | /// Create a target reduction of the given vector. The reduction operation |
||
379 | /// is described by the \p Opcode parameter. min/max reductions require |
||
380 | /// additional information supplied in \p RdxKind. |
||
381 | /// The target is queried to determine if intrinsics or shuffle sequences are |
||
382 | /// required to implement the reduction. |
||
383 | /// Fast-math-flags are propagated using the IRBuilder's setting. |
||
384 | Value *createSimpleTargetReduction(IRBuilderBase &B, |
||
385 | const TargetTransformInfo *TTI, Value *Src, |
||
386 | RecurKind RdxKind); |
||
387 | |||
388 | /// Create a target reduction of the given vector \p Src for a reduction of the |
||
389 | /// kind RecurKind::SelectICmp or RecurKind::SelectFCmp. The reduction operation |
||
390 | /// is described by \p Desc. |
||
391 | Value *createSelectCmpTargetReduction(IRBuilderBase &B, |
||
392 | const TargetTransformInfo *TTI, |
||
393 | Value *Src, |
||
394 | const RecurrenceDescriptor &Desc, |
||
395 | PHINode *OrigPhi); |
||
396 | |||
397 | /// Create a generic target reduction using a recurrence descriptor \p Desc |
||
398 | /// The target is queried to determine if intrinsics or shuffle sequences are |
||
399 | /// required to implement the reduction. |
||
400 | /// Fast-math-flags are propagated using the RecurrenceDescriptor. |
||
401 | Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, |
||
402 | const RecurrenceDescriptor &Desc, Value *Src, |
||
403 | PHINode *OrigPhi = nullptr); |
||
404 | |||
405 | /// Create an ordered reduction intrinsic using the given recurrence |
||
406 | /// descriptor \p Desc. |
||
407 | Value *createOrderedReduction(IRBuilderBase &B, |
||
408 | const RecurrenceDescriptor &Desc, Value *Src, |
||
409 | Value *Start); |
||
410 | |||
411 | /// Get the intersection (logical and) of all of the potential IR flags |
||
412 | /// of each scalar operation (VL) that will be converted into a vector (I). |
||
413 | /// If OpValue is non-null, we only consider operations similar to OpValue |
||
414 | /// when intersecting. |
||
415 | /// Flag set: NSW, NUW (if IncludeWrapFlags is true), exact, and all of |
||
416 | /// fast-math. |
||
417 | void propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue = nullptr, |
||
418 | bool IncludeWrapFlags = true); |
||
419 | |||
420 | /// Returns true if we can prove that \p S is defined and always negative in |
||
421 | /// loop \p L. |
||
422 | bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE); |
||
423 | |||
424 | /// Returns true if we can prove that \p S is defined and always non-negative in |
||
425 | /// loop \p L. |
||
426 | bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, |
||
427 | ScalarEvolution &SE); |
||
428 | |||
429 | /// Returns true if \p S is defined and never is equal to signed/unsigned max. |
||
430 | bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, |
||
431 | bool Signed); |
||
432 | |||
433 | /// Returns true if \p S is defined and never is equal to signed/unsigned min. |
||
434 | bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, |
||
435 | bool Signed); |
||
436 | |||
437 | enum ReplaceExitVal { |
||
438 | NeverRepl, |
||
439 | OnlyCheapRepl, |
||
440 | NoHardUse, |
||
441 | UnusedIndVarInLoop, |
||
442 | AlwaysRepl |
||
443 | }; |
||
444 | |||
445 | /// If the final value of any expressions that are recurrent in the loop can |
||
446 | /// be computed, substitute the exit values from the loop into any instructions |
||
447 | /// outside of the loop that use the final values of the current expressions. |
||
448 | /// Return the number of loop exit values that have been replaced, and the |
||
449 | /// corresponding phi node will be added to DeadInsts. |
||
450 | int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, |
||
451 | ScalarEvolution *SE, const TargetTransformInfo *TTI, |
||
452 | SCEVExpander &Rewriter, DominatorTree *DT, |
||
453 | ReplaceExitVal ReplaceExitValue, |
||
454 | SmallVector<WeakTrackingVH, 16> &DeadInsts); |
||
455 | |||
456 | /// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for |
||
457 | /// \p OrigLoop and the following distribution of \p OrigLoop iteration among \p |
||
458 | /// UnrolledLoop and \p RemainderLoop. \p UnrolledLoop receives weights that |
||
459 | /// reflect TC/UF iterations, and \p RemainderLoop receives weights that reflect |
||
460 | /// the remaining TC%UF iterations. |
||
461 | /// |
||
462 | /// Note that \p OrigLoop may be equal to either \p UnrolledLoop or \p |
||
463 | /// RemainderLoop in which case weights for \p OrigLoop are updated accordingly. |
||
464 | /// Note also behavior is undefined if \p UnrolledLoop and \p RemainderLoop are |
||
465 | /// equal. \p UF must be greater than zero. |
||
466 | /// If \p OrigLoop has no profile info associated nothing happens. |
||
467 | /// |
||
468 | /// This utility may be useful for such optimizations as unroller and |
||
469 | /// vectorizer as it's typical transformation for them. |
||
470 | void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop, |
||
471 | Loop *RemainderLoop, uint64_t UF); |
||
472 | |||
473 | /// Utility that implements appending of loops onto a worklist given a range. |
||
474 | /// We want to process loops in postorder, but the worklist is a LIFO data |
||
475 | /// structure, so we append to it in *reverse* postorder. |
||
476 | /// For trees, a preorder traversal is a viable reverse postorder, so we |
||
477 | /// actually append using a preorder walk algorithm. |
||
478 | template <typename RangeT> |
||
479 | void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist<Loop *, 4> &); |
||
480 | /// Utility that implements appending of loops onto a worklist given a range. |
||
481 | /// It has the same behavior as appendLoopsToWorklist, but assumes the range of |
||
482 | /// loops has already been reversed, so it processes loops in the given order. |
||
483 | template <typename RangeT> |
||
484 | void appendReversedLoopsToWorklist(RangeT &&, |
||
485 | SmallPriorityWorklist<Loop *, 4> &); |
||
486 | |||
487 | /// Utility that implements appending of loops onto a worklist given LoopInfo. |
||
488 | /// Calls the templated utility taking a Range of loops, handing it the Loops |
||
489 | /// in LoopInfo, iterated in reverse. This is because the loops are stored in |
||
490 | /// RPO w.r.t. the control flow graph in LoopInfo. For the purpose of unrolling, |
||
491 | /// loop deletion, and LICM, we largely want to work forward across the CFG so |
||
492 | /// that we visit defs before uses and can propagate simplifications from one |
||
493 | /// loop nest into the next. Calls appendReversedLoopsToWorklist with the |
||
494 | /// already reversed loops in LI. |
||
495 | /// FIXME: Consider changing the order in LoopInfo. |
||
496 | void appendLoopsToWorklist(LoopInfo &, SmallPriorityWorklist<Loop *, 4> &); |
||
497 | |||
498 | /// Recursively clone the specified loop and all of its children, |
||
499 | /// mapping the blocks with the specified map. |
||
500 | Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, |
||
501 | LoopInfo *LI, LPPassManager *LPM); |
||
502 | |||
503 | /// Add code that checks at runtime if the accessed arrays in \p PointerChecks |
||
504 | /// overlap. Returns the final comparator value or NULL if no check is needed. |
||
505 | Value * |
||
506 | addRuntimeChecks(Instruction *Loc, Loop *TheLoop, |
||
507 | const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, |
||
508 | SCEVExpander &Expander); |
||
509 | |||
510 | Value *addDiffRuntimeChecks( |
||
511 | Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander, |
||
512 | function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC); |
||
513 | |||
514 | /// Struct to hold information about a partially invariant condition. |
||
515 | struct IVConditionInfo { |
||
516 | /// Instructions that need to be duplicated and checked for the unswitching |
||
517 | /// condition. |
||
518 | SmallVector<Instruction *> InstToDuplicate; |
||
519 | |||
520 | /// Constant to indicate for which value the condition is invariant. |
||
521 | Constant *KnownValue = nullptr; |
||
522 | |||
523 | /// True if the partially invariant path is no-op (=does not have any |
||
524 | /// side-effects and no loop value is used outside the loop). |
||
525 | bool PathIsNoop = true; |
||
526 | |||
527 | /// If the partially invariant path reaches a single exit block, ExitForPath |
||
528 | /// is set to that block. Otherwise it is nullptr. |
||
529 | BasicBlock *ExitForPath = nullptr; |
||
530 | }; |
||
531 | |||
532 | /// Check if the loop header has a conditional branch that is not |
||
533 | /// loop-invariant, because it involves load instructions. If all paths from |
||
534 | /// either the true or false successor to the header or loop exists do not |
||
535 | /// modify the memory feeding the condition, perform 'partial unswitching'. That |
||
536 | /// is, duplicate the instructions feeding the condition in the pre-header. Then |
||
537 | /// unswitch on the duplicated condition. The condition is now known in the |
||
538 | /// unswitched version for the 'invariant' path through the original loop. |
||
539 | /// |
||
540 | /// If the branch condition of the header is partially invariant, return a pair |
||
541 | /// containing the instructions to duplicate and a boolean Constant to update |
||
542 | /// the condition in the loops created for the true or false successors. |
||
543 | std::optional<IVConditionInfo> hasPartialIVCondition(const Loop &L, |
||
544 | unsigned MSSAThreshold, |
||
545 | const MemorySSA &MSSA, |
||
546 | AAResults &AA); |
||
547 | |||
548 | } // end namespace llvm |
||
549 | |||
550 | #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H |