Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | //===- LoopVectorize.h ------------------------------------------*- C++ -*-===// |
| 2 | // |
||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 6 | // |
||
| 7 | //===----------------------------------------------------------------------===// |
||
| 8 | // |
||
| 9 | // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops |
||
| 10 | // and generates target-independent LLVM-IR. |
||
| 11 | // The vectorizer uses the TargetTransformInfo analysis to estimate the costs |
||
| 12 | // of instructions in order to estimate the profitability of vectorization. |
||
| 13 | // |
||
| 14 | // The loop vectorizer combines consecutive loop iterations into a single |
||
| 15 | // 'wide' iteration. After this transformation the index is incremented |
||
| 16 | // by the SIMD vector width, and not by one. |
||
| 17 | // |
||
| 18 | // This pass has three parts: |
||
| 19 | // 1. The main loop pass that drives the different parts. |
||
| 20 | // 2. LoopVectorizationLegality - A unit that checks for the legality |
||
| 21 | // of the vectorization. |
||
| 22 | // 3. InnerLoopVectorizer - A unit that performs the actual |
||
| 23 | // widening of instructions. |
||
| 24 | // 4. LoopVectorizationCostModel - A unit that checks for the profitability |
||
| 25 | // of vectorization. It decides on the optimal vector width, which |
||
| 26 | // can be one, if vectorization is not profitable. |
||
| 27 | // |
||
| 28 | // There is a development effort going on to migrate loop vectorizer to the |
||
| 29 | // VPlan infrastructure and to introduce outer loop vectorization support (see |
||
| 30 | // docs/Proposal/VectorizationPlan.rst and |
||
| 31 | // http://lists.llvm.org/pipermail/llvm-dev/2017-December/119523.html). For this |
||
| 32 | // purpose, we temporarily introduced the VPlan-native vectorization path: an |
||
| 33 | // alternative vectorization path that is natively implemented on top of the |
||
| 34 | // VPlan infrastructure. See EnableVPlanNativePath for enabling. |
||
| 35 | // |
||
| 36 | //===----------------------------------------------------------------------===// |
||
| 37 | // |
||
| 38 | // The reduction-variable vectorization is based on the paper: |
||
| 39 | // D. Nuzman and R. Henderson. Multi-platform Auto-vectorization. |
||
| 40 | // |
||
| 41 | // Variable uniformity checks are inspired by: |
||
| 42 | // Karrenberg, R. and Hack, S. Whole Function Vectorization. |
||
| 43 | // |
||
| 44 | // The interleaved access vectorization is based on the paper: |
||
| 45 | // Dorit Nuzman, Ira Rosen and Ayal Zaks. Auto-Vectorization of Interleaved |
||
| 46 | // Data for SIMD |
||
| 47 | // |
||
| 48 | // Other ideas/concepts are from: |
||
| 49 | // A. Zaks and D. Nuzman. Autovectorization in GCC-two years later. |
||
| 50 | // |
||
| 51 | // S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua. An Evaluation of |
||
| 52 | // Vectorizing Compilers. |
||
| 53 | // |
||
| 54 | //===----------------------------------------------------------------------===// |
||
| 55 | |||
| 56 | #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H |
||
| 57 | #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H |
||
| 58 | |||
| 59 | #include "llvm/IR/PassManager.h" |
||
| 60 | #include "llvm/Support/CommandLine.h" |
||
| 61 | #include <functional> |
||
| 62 | |||
| 63 | namespace llvm { |
||
| 64 | |||
| 65 | class AssumptionCache; |
||
| 66 | class BlockFrequencyInfo; |
||
| 67 | class DemandedBits; |
||
| 68 | class DominatorTree; |
||
| 69 | class Function; |
||
| 70 | class Loop; |
||
| 71 | class LoopAccessInfoManager; |
||
| 72 | class LoopInfo; |
||
| 73 | class OptimizationRemarkEmitter; |
||
| 74 | class ProfileSummaryInfo; |
||
| 75 | class ScalarEvolution; |
||
| 76 | class TargetLibraryInfo; |
||
| 77 | class TargetTransformInfo; |
||
| 78 | |||
| 79 | extern cl::opt<bool> EnableLoopInterleaving; |
||
| 80 | extern cl::opt<bool> EnableLoopVectorization; |
||
| 81 | |||
| 82 | /// A marker to determine if extra passes after loop vectorization should be |
||
| 83 | /// run. |
||
| 84 | struct ShouldRunExtraVectorPasses |
||
| 85 | : public AnalysisInfoMixin<ShouldRunExtraVectorPasses> { |
||
| 86 | static AnalysisKey Key; |
||
| 87 | struct Result { |
||
| 88 | bool invalidate(Function &F, const PreservedAnalyses &PA, |
||
| 89 | FunctionAnalysisManager::Invalidator &) { |
||
| 90 | // Check whether the analysis has been explicitly invalidated. Otherwise, |
||
| 91 | // it remains preserved. |
||
| 92 | auto PAC = PA.getChecker<ShouldRunExtraVectorPasses>(); |
||
| 93 | return !PAC.preservedWhenStateless(); |
||
| 94 | } |
||
| 95 | }; |
||
| 96 | |||
| 97 | Result run(Function &F, FunctionAnalysisManager &FAM) { return Result(); } |
||
| 98 | }; |
||
| 99 | |||
| 100 | /// A pass manager to run a set of extra function simplification passes after |
||
| 101 | /// vectorization, if requested. LoopVectorize caches the |
||
| 102 | /// ShouldRunExtraVectorPasses analysis to request extra simplifications, if |
||
| 103 | /// they could be beneficial. |
||
| 104 | struct ExtraVectorPassManager : public FunctionPassManager { |
||
| 105 | PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) { |
||
| 106 | auto PA = PreservedAnalyses::all(); |
||
| 107 | if (AM.getCachedResult<ShouldRunExtraVectorPasses>(F)) |
||
| 108 | PA.intersect(FunctionPassManager::run(F, AM)); |
||
| 109 | PA.abandon<ShouldRunExtraVectorPasses>(); |
||
| 110 | return PA; |
||
| 111 | } |
||
| 112 | }; |
||
| 113 | |||
| 114 | struct LoopVectorizeOptions { |
||
| 115 | /// If false, consider all loops for interleaving. |
||
| 116 | /// If true, only loops that explicitly request interleaving are considered. |
||
| 117 | bool InterleaveOnlyWhenForced; |
||
| 118 | |||
| 119 | /// If false, consider all loops for vectorization. |
||
| 120 | /// If true, only loops that explicitly request vectorization are considered. |
||
| 121 | bool VectorizeOnlyWhenForced; |
||
| 122 | |||
| 123 | /// The current defaults when creating the pass with no arguments are: |
||
| 124 | /// EnableLoopInterleaving = true and EnableLoopVectorization = true. This |
||
| 125 | /// means that interleaving default is consistent with the cl::opt flag, while |
||
| 126 | /// vectorization is not. |
||
| 127 | /// FIXME: The default for EnableLoopVectorization in the cl::opt should be |
||
| 128 | /// set to true, and the corresponding change to account for this be made in |
||
| 129 | /// opt.cpp. The initializations below will become: |
||
| 130 | /// InterleaveOnlyWhenForced(!EnableLoopInterleaving) |
||
| 131 | /// VectorizeOnlyWhenForced(!EnableLoopVectorization). |
||
| 132 | LoopVectorizeOptions() |
||
| 133 | : InterleaveOnlyWhenForced(false), VectorizeOnlyWhenForced(false) {} |
||
| 134 | LoopVectorizeOptions(bool InterleaveOnlyWhenForced, |
||
| 135 | bool VectorizeOnlyWhenForced) |
||
| 136 | : InterleaveOnlyWhenForced(InterleaveOnlyWhenForced), |
||
| 137 | VectorizeOnlyWhenForced(VectorizeOnlyWhenForced) {} |
||
| 138 | |||
| 139 | LoopVectorizeOptions &setInterleaveOnlyWhenForced(bool Value) { |
||
| 140 | InterleaveOnlyWhenForced = Value; |
||
| 141 | return *this; |
||
| 142 | } |
||
| 143 | |||
| 144 | LoopVectorizeOptions &setVectorizeOnlyWhenForced(bool Value) { |
||
| 145 | VectorizeOnlyWhenForced = Value; |
||
| 146 | return *this; |
||
| 147 | } |
||
| 148 | }; |
||
| 149 | |||
| 150 | /// Storage for information about made changes. |
||
| 151 | struct LoopVectorizeResult { |
||
| 152 | bool MadeAnyChange; |
||
| 153 | bool MadeCFGChange; |
||
| 154 | |||
| 155 | LoopVectorizeResult(bool MadeAnyChange, bool MadeCFGChange) |
||
| 156 | : MadeAnyChange(MadeAnyChange), MadeCFGChange(MadeCFGChange) {} |
||
| 157 | }; |
||
| 158 | |||
| 159 | /// The LoopVectorize Pass. |
||
| 160 | struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> { |
||
| 161 | private: |
||
| 162 | /// If false, consider all loops for interleaving. |
||
| 163 | /// If true, only loops that explicitly request interleaving are considered. |
||
| 164 | bool InterleaveOnlyWhenForced; |
||
| 165 | |||
| 166 | /// If false, consider all loops for vectorization. |
||
| 167 | /// If true, only loops that explicitly request vectorization are considered. |
||
| 168 | bool VectorizeOnlyWhenForced; |
||
| 169 | |||
| 170 | public: |
||
| 171 | LoopVectorizePass(LoopVectorizeOptions Opts = {}); |
||
| 172 | |||
| 173 | ScalarEvolution *SE; |
||
| 174 | LoopInfo *LI; |
||
| 175 | TargetTransformInfo *TTI; |
||
| 176 | DominatorTree *DT; |
||
| 177 | BlockFrequencyInfo *BFI; |
||
| 178 | TargetLibraryInfo *TLI; |
||
| 179 | DemandedBits *DB; |
||
| 180 | AssumptionCache *AC; |
||
| 181 | LoopAccessInfoManager *LAIs; |
||
| 182 | OptimizationRemarkEmitter *ORE; |
||
| 183 | ProfileSummaryInfo *PSI; |
||
| 184 | |||
| 185 | PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); |
||
| 186 | void printPipeline(raw_ostream &OS, |
||
| 187 | function_ref<StringRef(StringRef)> MapClassName2PassName); |
||
| 188 | |||
| 189 | // Shim for old PM. |
||
| 190 | LoopVectorizeResult runImpl(Function &F, ScalarEvolution &SE_, LoopInfo &LI_, |
||
| 191 | TargetTransformInfo &TTI_, DominatorTree &DT_, |
||
| 192 | BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_, |
||
| 193 | DemandedBits &DB_, AssumptionCache &AC_, |
||
| 194 | LoopAccessInfoManager &LAIs_, |
||
| 195 | OptimizationRemarkEmitter &ORE_, |
||
| 196 | ProfileSummaryInfo *PSI_); |
||
| 197 | |||
| 198 | bool processLoop(Loop *L); |
||
| 199 | }; |
||
| 200 | |||
| 201 | /// Reports a vectorization failure: print \p DebugMsg for debugging |
||
| 202 | /// purposes along with the corresponding optimization remark \p RemarkName. |
||
| 203 | /// If \p I is passed, it is an instruction that prevents vectorization. |
||
| 204 | /// Otherwise, the loop \p TheLoop is used for the location of the remark. |
||
| 205 | void reportVectorizationFailure(const StringRef DebugMsg, |
||
| 206 | const StringRef OREMsg, const StringRef ORETag, |
||
| 207 | OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr); |
||
| 208 | |||
| 209 | /// Reports an informative message: print \p Msg for debugging purposes as well |
||
| 210 | /// as an optimization remark. Uses either \p I as location of the remark, or |
||
| 211 | /// otherwise \p TheLoop. |
||
| 212 | void reportVectorizationInfo(const StringRef OREMsg, const StringRef ORETag, |
||
| 213 | OptimizationRemarkEmitter *ORE, Loop *TheLoop, |
||
| 214 | Instruction *I = nullptr); |
||
| 215 | |||
| 216 | } // end namespace llvm |
||
| 217 | |||
| 218 | #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H |