Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===- LoopVectorize.h ------------------------------------------*- C++ -*-===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // |
||
9 | // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops |
||
10 | // and generates target-independent LLVM-IR. |
||
11 | // The vectorizer uses the TargetTransformInfo analysis to estimate the costs |
||
12 | // of instructions in order to estimate the profitability of vectorization. |
||
13 | // |
||
14 | // The loop vectorizer combines consecutive loop iterations into a single |
||
15 | // 'wide' iteration. After this transformation the index is incremented |
||
16 | // by the SIMD vector width, and not by one. |
||
17 | // |
||
18 | // This pass has three parts: |
||
19 | // 1. The main loop pass that drives the different parts. |
||
20 | // 2. LoopVectorizationLegality - A unit that checks for the legality |
||
21 | // of the vectorization. |
||
22 | // 3. InnerLoopVectorizer - A unit that performs the actual |
||
23 | // widening of instructions. |
||
24 | // 4. LoopVectorizationCostModel - A unit that checks for the profitability |
||
25 | // of vectorization. It decides on the optimal vector width, which |
||
26 | // can be one, if vectorization is not profitable. |
||
27 | // |
||
28 | // There is a development effort going on to migrate loop vectorizer to the |
||
29 | // VPlan infrastructure and to introduce outer loop vectorization support (see |
||
30 | // docs/Proposal/VectorizationPlan.rst and |
||
31 | // http://lists.llvm.org/pipermail/llvm-dev/2017-December/119523.html). For this |
||
32 | // purpose, we temporarily introduced the VPlan-native vectorization path: an |
||
33 | // alternative vectorization path that is natively implemented on top of the |
||
34 | // VPlan infrastructure. See EnableVPlanNativePath for enabling. |
||
35 | // |
||
36 | //===----------------------------------------------------------------------===// |
||
37 | // |
||
38 | // The reduction-variable vectorization is based on the paper: |
||
39 | // D. Nuzman and R. Henderson. Multi-platform Auto-vectorization. |
||
40 | // |
||
41 | // Variable uniformity checks are inspired by: |
||
42 | // Karrenberg, R. and Hack, S. Whole Function Vectorization. |
||
43 | // |
||
44 | // The interleaved access vectorization is based on the paper: |
||
45 | // Dorit Nuzman, Ira Rosen and Ayal Zaks. Auto-Vectorization of Interleaved |
||
46 | // Data for SIMD |
||
47 | // |
||
48 | // Other ideas/concepts are from: |
||
49 | // A. Zaks and D. Nuzman. Autovectorization in GCC-two years later. |
||
50 | // |
||
51 | // S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua. An Evaluation of |
||
52 | // Vectorizing Compilers. |
||
53 | // |
||
54 | //===----------------------------------------------------------------------===// |
||
55 | |||
56 | #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H |
||
57 | #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H |
||
58 | |||
59 | #include "llvm/IR/PassManager.h" |
||
60 | #include "llvm/Support/CommandLine.h" |
||
61 | #include <functional> |
||
62 | |||
63 | namespace llvm { |
||
64 | |||
65 | class AssumptionCache; |
||
66 | class BlockFrequencyInfo; |
||
67 | class DemandedBits; |
||
68 | class DominatorTree; |
||
69 | class Function; |
||
70 | class Loop; |
||
71 | class LoopAccessInfoManager; |
||
72 | class LoopInfo; |
||
73 | class OptimizationRemarkEmitter; |
||
74 | class ProfileSummaryInfo; |
||
75 | class ScalarEvolution; |
||
76 | class TargetLibraryInfo; |
||
77 | class TargetTransformInfo; |
||
78 | |||
79 | extern cl::opt<bool> EnableLoopInterleaving; |
||
80 | extern cl::opt<bool> EnableLoopVectorization; |
||
81 | |||
82 | /// A marker to determine if extra passes after loop vectorization should be |
||
83 | /// run. |
||
84 | struct ShouldRunExtraVectorPasses |
||
85 | : public AnalysisInfoMixin<ShouldRunExtraVectorPasses> { |
||
86 | static AnalysisKey Key; |
||
87 | struct Result { |
||
88 | bool invalidate(Function &F, const PreservedAnalyses &PA, |
||
89 | FunctionAnalysisManager::Invalidator &) { |
||
90 | // Check whether the analysis has been explicitly invalidated. Otherwise, |
||
91 | // it remains preserved. |
||
92 | auto PAC = PA.getChecker<ShouldRunExtraVectorPasses>(); |
||
93 | return !PAC.preservedWhenStateless(); |
||
94 | } |
||
95 | }; |
||
96 | |||
97 | Result run(Function &F, FunctionAnalysisManager &FAM) { return Result(); } |
||
98 | }; |
||
99 | |||
100 | /// A pass manager to run a set of extra function simplification passes after |
||
101 | /// vectorization, if requested. LoopVectorize caches the |
||
102 | /// ShouldRunExtraVectorPasses analysis to request extra simplifications, if |
||
103 | /// they could be beneficial. |
||
104 | struct ExtraVectorPassManager : public FunctionPassManager { |
||
105 | PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) { |
||
106 | auto PA = PreservedAnalyses::all(); |
||
107 | if (AM.getCachedResult<ShouldRunExtraVectorPasses>(F)) |
||
108 | PA.intersect(FunctionPassManager::run(F, AM)); |
||
109 | PA.abandon<ShouldRunExtraVectorPasses>(); |
||
110 | return PA; |
||
111 | } |
||
112 | }; |
||
113 | |||
114 | struct LoopVectorizeOptions { |
||
115 | /// If false, consider all loops for interleaving. |
||
116 | /// If true, only loops that explicitly request interleaving are considered. |
||
117 | bool InterleaveOnlyWhenForced; |
||
118 | |||
119 | /// If false, consider all loops for vectorization. |
||
120 | /// If true, only loops that explicitly request vectorization are considered. |
||
121 | bool VectorizeOnlyWhenForced; |
||
122 | |||
123 | /// The current defaults when creating the pass with no arguments are: |
||
124 | /// EnableLoopInterleaving = true and EnableLoopVectorization = true. This |
||
125 | /// means that interleaving default is consistent with the cl::opt flag, while |
||
126 | /// vectorization is not. |
||
127 | /// FIXME: The default for EnableLoopVectorization in the cl::opt should be |
||
128 | /// set to true, and the corresponding change to account for this be made in |
||
129 | /// opt.cpp. The initializations below will become: |
||
130 | /// InterleaveOnlyWhenForced(!EnableLoopInterleaving) |
||
131 | /// VectorizeOnlyWhenForced(!EnableLoopVectorization). |
||
132 | LoopVectorizeOptions() |
||
133 | : InterleaveOnlyWhenForced(false), VectorizeOnlyWhenForced(false) {} |
||
134 | LoopVectorizeOptions(bool InterleaveOnlyWhenForced, |
||
135 | bool VectorizeOnlyWhenForced) |
||
136 | : InterleaveOnlyWhenForced(InterleaveOnlyWhenForced), |
||
137 | VectorizeOnlyWhenForced(VectorizeOnlyWhenForced) {} |
||
138 | |||
139 | LoopVectorizeOptions &setInterleaveOnlyWhenForced(bool Value) { |
||
140 | InterleaveOnlyWhenForced = Value; |
||
141 | return *this; |
||
142 | } |
||
143 | |||
144 | LoopVectorizeOptions &setVectorizeOnlyWhenForced(bool Value) { |
||
145 | VectorizeOnlyWhenForced = Value; |
||
146 | return *this; |
||
147 | } |
||
148 | }; |
||
149 | |||
150 | /// Storage for information about made changes. |
||
151 | struct LoopVectorizeResult { |
||
152 | bool MadeAnyChange; |
||
153 | bool MadeCFGChange; |
||
154 | |||
155 | LoopVectorizeResult(bool MadeAnyChange, bool MadeCFGChange) |
||
156 | : MadeAnyChange(MadeAnyChange), MadeCFGChange(MadeCFGChange) {} |
||
157 | }; |
||
158 | |||
159 | /// The LoopVectorize Pass. |
||
160 | struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> { |
||
161 | private: |
||
162 | /// If false, consider all loops for interleaving. |
||
163 | /// If true, only loops that explicitly request interleaving are considered. |
||
164 | bool InterleaveOnlyWhenForced; |
||
165 | |||
166 | /// If false, consider all loops for vectorization. |
||
167 | /// If true, only loops that explicitly request vectorization are considered. |
||
168 | bool VectorizeOnlyWhenForced; |
||
169 | |||
170 | public: |
||
171 | LoopVectorizePass(LoopVectorizeOptions Opts = {}); |
||
172 | |||
173 | ScalarEvolution *SE; |
||
174 | LoopInfo *LI; |
||
175 | TargetTransformInfo *TTI; |
||
176 | DominatorTree *DT; |
||
177 | BlockFrequencyInfo *BFI; |
||
178 | TargetLibraryInfo *TLI; |
||
179 | DemandedBits *DB; |
||
180 | AssumptionCache *AC; |
||
181 | LoopAccessInfoManager *LAIs; |
||
182 | OptimizationRemarkEmitter *ORE; |
||
183 | ProfileSummaryInfo *PSI; |
||
184 | |||
185 | PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); |
||
186 | void printPipeline(raw_ostream &OS, |
||
187 | function_ref<StringRef(StringRef)> MapClassName2PassName); |
||
188 | |||
189 | // Shim for old PM. |
||
190 | LoopVectorizeResult runImpl(Function &F, ScalarEvolution &SE_, LoopInfo &LI_, |
||
191 | TargetTransformInfo &TTI_, DominatorTree &DT_, |
||
192 | BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_, |
||
193 | DemandedBits &DB_, AssumptionCache &AC_, |
||
194 | LoopAccessInfoManager &LAIs_, |
||
195 | OptimizationRemarkEmitter &ORE_, |
||
196 | ProfileSummaryInfo *PSI_); |
||
197 | |||
198 | bool processLoop(Loop *L); |
||
199 | }; |
||
200 | |||
201 | /// Reports a vectorization failure: print \p DebugMsg for debugging |
||
202 | /// purposes along with the corresponding optimization remark \p RemarkName. |
||
203 | /// If \p I is passed, it is an instruction that prevents vectorization. |
||
204 | /// Otherwise, the loop \p TheLoop is used for the location of the remark. |
||
205 | void reportVectorizationFailure(const StringRef DebugMsg, |
||
206 | const StringRef OREMsg, const StringRef ORETag, |
||
207 | OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr); |
||
208 | |||
209 | /// Reports an informative message: print \p Msg for debugging purposes as well |
||
210 | /// as an optimization remark. Uses either \p I as location of the remark, or |
||
211 | /// otherwise \p TheLoop. |
||
212 | void reportVectorizationInfo(const StringRef OREMsg, const StringRef ORETag, |
||
213 | OptimizationRemarkEmitter *ORE, Loop *TheLoop, |
||
214 | Instruction *I = nullptr); |
||
215 | |||
216 | } // end namespace llvm |
||
217 | |||
218 | #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H |