Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // |
||
9 | // This file defines some vectorizer utilities. |
||
10 | // |
||
11 | //===----------------------------------------------------------------------===// |
||
12 | |||
13 | #ifndef LLVM_ANALYSIS_VECTORUTILS_H |
||
14 | #define LLVM_ANALYSIS_VECTORUTILS_H |
||
15 | |||
16 | #include "llvm/ADT/MapVector.h" |
||
17 | #include "llvm/ADT/SmallVector.h" |
||
18 | #include "llvm/Analysis/LoopAccessAnalysis.h" |
||
19 | #include "llvm/Support/CheckedArithmetic.h" |
||
20 | |||
21 | namespace llvm { |
||
22 | class TargetLibraryInfo; |
||
23 | |||
24 | /// Describes the type of Parameters |
||
25 | enum class VFParamKind { |
||
26 | Vector, // No semantic information. |
||
27 | OMP_Linear, // declare simd linear(i) |
||
28 | OMP_LinearRef, // declare simd linear(ref(i)) |
||
29 | OMP_LinearVal, // declare simd linear(val(i)) |
||
30 | OMP_LinearUVal, // declare simd linear(uval(i)) |
||
31 | OMP_LinearPos, // declare simd linear(i:c) uniform(c) |
||
32 | OMP_LinearValPos, // declare simd linear(val(i:c)) uniform(c) |
||
33 | OMP_LinearRefPos, // declare simd linear(ref(i:c)) uniform(c) |
||
34 | OMP_LinearUValPos, // declare simd linear(uval(i:c)) uniform(c) |
||
35 | OMP_Uniform, // declare simd uniform(i) |
||
36 | GlobalPredicate, // Global logical predicate that acts on all lanes |
||
37 | // of the input and output mask concurrently. For |
||
38 | // example, it is implied by the `M` token in the |
||
39 | // Vector Function ABI mangled name. |
||
40 | Unknown |
||
41 | }; |
||
42 | |||
43 | /// Describes the type of Instruction Set Architecture |
||
44 | enum class VFISAKind { |
||
45 | AdvancedSIMD, // AArch64 Advanced SIMD (NEON) |
||
46 | SVE, // AArch64 Scalable Vector Extension |
||
47 | SSE, // x86 SSE |
||
48 | AVX, // x86 AVX |
||
49 | AVX2, // x86 AVX2 |
||
50 | AVX512, // x86 AVX512 |
||
51 | LLVM, // LLVM internal ISA for functions that are not |
||
52 | // attached to an existing ABI via name mangling. |
||
53 | Unknown // Unknown ISA |
||
54 | }; |
||
55 | |||
56 | /// Encapsulates information needed to describe a parameter. |
||
57 | /// |
||
58 | /// The description of the parameter is not linked directly to |
||
59 | /// OpenMP or any other vector function description. This structure |
||
60 | /// is extendible to handle other paradigms that describe vector |
||
61 | /// functions and their parameters. |
||
62 | struct VFParameter { |
||
63 | unsigned ParamPos; // Parameter Position in Scalar Function. |
||
64 | VFParamKind ParamKind; // Kind of Parameter. |
||
65 | int LinearStepOrPos = 0; // Step or Position of the Parameter. |
||
66 | Align Alignment = Align(); // Optional alignment in bytes, defaulted to 1. |
||
67 | |||
68 | // Comparison operator. |
||
69 | bool operator==(const VFParameter &Other) const { |
||
70 | return std::tie(ParamPos, ParamKind, LinearStepOrPos, Alignment) == |
||
71 | std::tie(Other.ParamPos, Other.ParamKind, Other.LinearStepOrPos, |
||
72 | Other.Alignment); |
||
73 | } |
||
74 | }; |
||
75 | |||
76 | /// Contains the information about the kind of vectorization |
||
77 | /// available. |
||
78 | /// |
||
79 | /// This object in independent on the paradigm used to |
||
80 | /// represent vector functions. in particular, it is not attached to |
||
81 | /// any target-specific ABI. |
||
82 | struct VFShape { |
||
83 | ElementCount VF; // Vectorization factor. |
||
84 | SmallVector<VFParameter, 8> Parameters; // List of parameter information. |
||
85 | // Comparison operator. |
||
86 | bool operator==(const VFShape &Other) const { |
||
87 | return std::tie(VF, Parameters) == std::tie(Other.VF, Other.Parameters); |
||
88 | } |
||
89 | |||
90 | /// Update the parameter in position P.ParamPos to P. |
||
91 | void updateParam(VFParameter P) { |
||
92 | assert(P.ParamPos < Parameters.size() && "Invalid parameter position."); |
||
93 | Parameters[P.ParamPos] = P; |
||
94 | assert(hasValidParameterList() && "Invalid parameter list"); |
||
95 | } |
||
96 | |||
97 | // Retrieve the VFShape that can be used to map a (scalar) function to itself, |
||
98 | // with VF = 1. |
||
99 | static VFShape getScalarShape(const CallInst &CI) { |
||
100 | return VFShape::get(CI, ElementCount::getFixed(1), |
||
101 | /*HasGlobalPredicate*/ false); |
||
102 | } |
||
103 | |||
104 | // Retrieve the basic vectorization shape of the function, where all |
||
105 | // parameters are mapped to VFParamKind::Vector with \p EC |
||
106 | // lanes. Specifies whether the function has a Global Predicate |
||
107 | // argument via \p HasGlobalPred. |
||
108 | static VFShape get(const CallInst &CI, ElementCount EC, bool HasGlobalPred) { |
||
109 | SmallVector<VFParameter, 8> Parameters; |
||
110 | for (unsigned I = 0; I < CI.arg_size(); ++I) |
||
111 | Parameters.push_back(VFParameter({I, VFParamKind::Vector})); |
||
112 | if (HasGlobalPred) |
||
113 | Parameters.push_back( |
||
114 | VFParameter({CI.arg_size(), VFParamKind::GlobalPredicate})); |
||
115 | |||
116 | return {EC, Parameters}; |
||
117 | } |
||
118 | /// Validation check on the Parameters in the VFShape. |
||
119 | bool hasValidParameterList() const; |
||
120 | }; |
||
121 | |||
122 | /// Holds the VFShape for a specific scalar to vector function mapping. |
||
123 | struct VFInfo { |
||
124 | VFShape Shape; /// Classification of the vector function. |
||
125 | std::string ScalarName; /// Scalar Function Name. |
||
126 | std::string VectorName; /// Vector Function Name associated to this VFInfo. |
||
127 | VFISAKind ISA; /// Instruction Set Architecture. |
||
128 | }; |
||
129 | |||
130 | namespace VFABI { |
||
131 | /// LLVM Internal VFABI ISA token for vector functions. |
||
132 | static constexpr char const *_LLVM_ = "_LLVM_"; |
||
133 | /// Prefix for internal name redirection for vector function that |
||
134 | /// tells the compiler to scalarize the call using the scalar name |
||
135 | /// of the function. For example, a mangled name like |
||
136 | /// `_ZGV_LLVM_N2v_foo(_LLVM_Scalarize_foo)` would tell the |
||
137 | /// vectorizer to vectorize the scalar call `foo`, and to scalarize |
||
138 | /// it once vectorization is done. |
||
139 | static constexpr char const *_LLVM_Scalarize_ = "_LLVM_Scalarize_"; |
||
140 | |||
141 | /// Function to construct a VFInfo out of a mangled names in the |
||
142 | /// following format: |
||
143 | /// |
||
144 | /// <VFABI_name>{(<redirection>)} |
||
145 | /// |
||
146 | /// where <VFABI_name> is the name of the vector function, mangled according |
||
147 | /// to the rules described in the Vector Function ABI of the target vector |
||
148 | /// extension (or <isa> from now on). The <VFABI_name> is in the following |
||
149 | /// format: |
||
150 | /// |
||
151 | /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)] |
||
152 | /// |
||
153 | /// This methods support demangling rules for the following <isa>: |
||
154 | /// |
||
155 | /// * AArch64: https://developer.arm.com/docs/101129/latest |
||
156 | /// |
||
157 | /// * x86 (libmvec): https://sourceware.org/glibc/wiki/libmvec and |
||
158 | /// https://sourceware.org/glibc/wiki/libmvec?action=AttachFile&do=view&target=VectorABI.txt |
||
159 | /// |
||
160 | /// \param MangledName -> input string in the format |
||
161 | /// _ZGV<isa><mask><vlen><parameters>_<scalarname>[(<redirection>)]. |
||
162 | /// \param M -> Module used to retrieve informations about the vector |
||
163 | /// function that are not possible to retrieve from the mangled |
||
164 | /// name. At the moment, this parameter is needed only to retrieve the |
||
165 | /// Vectorization Factor of scalable vector functions from their |
||
166 | /// respective IR declarations. |
||
167 | std::optional<VFInfo> tryDemangleForVFABI(StringRef MangledName, |
||
168 | const Module &M); |
||
169 | |||
170 | /// This routine mangles the given VectorName according to the LangRef |
||
171 | /// specification for vector-function-abi-variant attribute and is specific to |
||
172 | /// the TLI mappings. It is the responsibility of the caller to make sure that |
||
173 | /// this is only used if all parameters in the vector function are vector type. |
||
174 | /// This returned string holds scalar-to-vector mapping: |
||
175 | /// _ZGV<isa><mask><vlen><vparams>_<scalarname>(<vectorname>) |
||
176 | /// |
||
177 | /// where: |
||
178 | /// |
||
179 | /// <isa> = "_LLVM_" |
||
180 | /// <mask> = "N". Note: TLI does not support masked interfaces. |
||
181 | /// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor` |
||
182 | /// field of the `VecDesc` struct. If the number of lanes is scalable |
||
183 | /// then 'x' is printed instead. |
||
184 | /// <vparams> = "v", as many as are the numArgs. |
||
185 | /// <scalarname> = the name of the scalar function. |
||
186 | /// <vectorname> = the name of the vector function. |
||
187 | std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, |
||
188 | unsigned numArgs, ElementCount VF); |
||
189 | |||
190 | /// Retrieve the `VFParamKind` from a string token. |
||
191 | VFParamKind getVFParamKindFromString(const StringRef Token); |
||
192 | |||
193 | // Name of the attribute where the variant mappings are stored. |
||
194 | static constexpr char const *MappingsAttrName = "vector-function-abi-variant"; |
||
195 | |||
196 | /// Populates a set of strings representing the Vector Function ABI variants |
||
197 | /// associated to the CallInst CI. If the CI does not contain the |
||
198 | /// vector-function-abi-variant attribute, we return without populating |
||
199 | /// VariantMappings, i.e. callers of getVectorVariantNames need not check for |
||
200 | /// the presence of the attribute (see InjectTLIMappings). |
||
201 | void getVectorVariantNames(const CallInst &CI, |
||
202 | SmallVectorImpl<std::string> &VariantMappings); |
||
203 | } // end namespace VFABI |
||
204 | |||
205 | /// The Vector Function Database. |
||
206 | /// |
||
207 | /// Helper class used to find the vector functions associated to a |
||
208 | /// scalar CallInst. |
||
209 | class VFDatabase { |
||
210 | /// The Module of the CallInst CI. |
||
211 | const Module *M; |
||
212 | /// The CallInst instance being queried for scalar to vector mappings. |
||
213 | const CallInst &CI; |
||
214 | /// List of vector functions descriptors associated to the call |
||
215 | /// instruction. |
||
216 | const SmallVector<VFInfo, 8> ScalarToVectorMappings; |
||
217 | |||
218 | /// Retrieve the scalar-to-vector mappings associated to the rule of |
||
219 | /// a vector Function ABI. |
||
220 | static void getVFABIMappings(const CallInst &CI, |
||
221 | SmallVectorImpl<VFInfo> &Mappings) { |
||
222 | if (!CI.getCalledFunction()) |
||
223 | return; |
||
224 | |||
225 | const StringRef ScalarName = CI.getCalledFunction()->getName(); |
||
226 | |||
227 | SmallVector<std::string, 8> ListOfStrings; |
||
228 | // The check for the vector-function-abi-variant attribute is done when |
||
229 | // retrieving the vector variant names here. |
||
230 | VFABI::getVectorVariantNames(CI, ListOfStrings); |
||
231 | if (ListOfStrings.empty()) |
||
232 | return; |
||
233 | for (const auto &MangledName : ListOfStrings) { |
||
234 | const std::optional<VFInfo> Shape = |
||
235 | VFABI::tryDemangleForVFABI(MangledName, *(CI.getModule())); |
||
236 | // A match is found via scalar and vector names, and also by |
||
237 | // ensuring that the variant described in the attribute has a |
||
238 | // corresponding definition or declaration of the vector |
||
239 | // function in the Module M. |
||
240 | if (Shape && (Shape->ScalarName == ScalarName)) { |
||
241 | assert(CI.getModule()->getFunction(Shape->VectorName) && |
||
242 | "Vector function is missing."); |
||
243 | Mappings.push_back(*Shape); |
||
244 | } |
||
245 | } |
||
246 | } |
||
247 | |||
248 | public: |
||
249 | /// Retrieve all the VFInfo instances associated to the CallInst CI. |
||
250 | static SmallVector<VFInfo, 8> getMappings(const CallInst &CI) { |
||
251 | SmallVector<VFInfo, 8> Ret; |
||
252 | |||
253 | // Get mappings from the Vector Function ABI variants. |
||
254 | getVFABIMappings(CI, Ret); |
||
255 | |||
256 | // Other non-VFABI variants should be retrieved here. |
||
257 | |||
258 | return Ret; |
||
259 | } |
||
260 | |||
261 | /// Constructor, requires a CallInst instance. |
||
262 | VFDatabase(CallInst &CI) |
||
263 | : M(CI.getModule()), CI(CI), |
||
264 | ScalarToVectorMappings(VFDatabase::getMappings(CI)) {} |
||
265 | /// \defgroup VFDatabase query interface. |
||
266 | /// |
||
267 | /// @{ |
||
268 | /// Retrieve the Function with VFShape \p Shape. |
||
269 | Function *getVectorizedFunction(const VFShape &Shape) const { |
||
270 | if (Shape == VFShape::getScalarShape(CI)) |
||
271 | return CI.getCalledFunction(); |
||
272 | |||
273 | for (const auto &Info : ScalarToVectorMappings) |
||
274 | if (Info.Shape == Shape) |
||
275 | return M->getFunction(Info.VectorName); |
||
276 | |||
277 | return nullptr; |
||
278 | } |
||
279 | /// @} |
||
280 | }; |
||
281 | |||
282 | template <typename T> class ArrayRef; |
||
283 | class DemandedBits; |
||
284 | class GetElementPtrInst; |
||
285 | template <typename InstTy> class InterleaveGroup; |
||
286 | class IRBuilderBase; |
||
287 | class Loop; |
||
288 | class ScalarEvolution; |
||
289 | class TargetTransformInfo; |
||
290 | class Type; |
||
291 | class Value; |
||
292 | |||
293 | namespace Intrinsic { |
||
294 | typedef unsigned ID; |
||
295 | } |
||
296 | |||
297 | /// A helper function for converting Scalar types to vector types. If |
||
298 | /// the incoming type is void, we return void. If the EC represents a |
||
299 | /// scalar, we return the scalar type. |
||
300 | inline Type *ToVectorTy(Type *Scalar, ElementCount EC) { |
||
301 | if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar()) |
||
302 | return Scalar; |
||
303 | return VectorType::get(Scalar, EC); |
||
304 | } |
||
305 | |||
306 | inline Type *ToVectorTy(Type *Scalar, unsigned VF) { |
||
307 | return ToVectorTy(Scalar, ElementCount::getFixed(VF)); |
||
308 | } |
||
309 | |||
310 | /// Identify if the intrinsic is trivially vectorizable. |
||
311 | /// This method returns true if the intrinsic's argument types are all scalars |
||
312 | /// for the scalar form of the intrinsic and all vectors (or scalars handled by |
||
313 | /// isVectorIntrinsicWithScalarOpAtArg) for the vector form of the intrinsic. |
||
314 | bool isTriviallyVectorizable(Intrinsic::ID ID); |
||
315 | |||
316 | /// Identifies if the vector form of the intrinsic has a scalar operand. |
||
317 | bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, |
||
318 | unsigned ScalarOpdIdx); |
||
319 | |||
320 | /// Identifies if the vector form of the intrinsic has a operand that has |
||
321 | /// an overloaded type. |
||
322 | bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx); |
||
323 | |||
324 | /// Returns intrinsic ID for call. |
||
325 | /// For the input call instruction it finds mapping intrinsic and returns |
||
326 | /// its intrinsic ID, in case it does not found it return not_intrinsic. |
||
327 | Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, |
||
328 | const TargetLibraryInfo *TLI); |
||
329 | |||
330 | /// Find the operand of the GEP that should be checked for consecutive |
||
331 | /// stores. This ignores trailing indices that have no effect on the final |
||
332 | /// pointer. |
||
333 | unsigned getGEPInductionOperand(const GetElementPtrInst *Gep); |
||
334 | |||
335 | /// If the argument is a GEP, then returns the operand identified by |
||
336 | /// getGEPInductionOperand. However, if there is some other non-loop-invariant |
||
337 | /// operand, it returns that instead. |
||
338 | Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp); |
||
339 | |||
340 | /// If a value has only one user that is a CastInst, return it. |
||
341 | Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty); |
||
342 | |||
343 | /// Get the stride of a pointer access in a loop. Looks for symbolic |
||
344 | /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise. |
||
345 | Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp); |
||
346 | |||
347 | /// Given a vector and an element number, see if the scalar value is |
||
348 | /// already around as a register, for example if it were inserted then extracted |
||
349 | /// from the vector. |
||
350 | Value *findScalarElement(Value *V, unsigned EltNo); |
||
351 | |||
352 | /// If all non-negative \p Mask elements are the same value, return that value. |
||
353 | /// If all elements are negative (undefined) or \p Mask contains different |
||
354 | /// non-negative values, return -1. |
||
355 | int getSplatIndex(ArrayRef<int> Mask); |
||
356 | |||
357 | /// Get splat value if the input is a splat vector or return nullptr. |
||
358 | /// The value may be extracted from a splat constants vector or from |
||
359 | /// a sequence of instructions that broadcast a single value into a vector. |
||
360 | Value *getSplatValue(const Value *V); |
||
361 | |||
362 | /// Return true if each element of the vector value \p V is poisoned or equal to |
||
363 | /// every other non-poisoned element. If an index element is specified, either |
||
364 | /// every element of the vector is poisoned or the element at that index is not |
||
365 | /// poisoned and equal to every other non-poisoned element. |
||
366 | /// This may be more powerful than the related getSplatValue() because it is |
||
367 | /// not limited by finding a scalar source value to a splatted vector. |
||
368 | bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0); |
||
369 | |||
370 | /// Transform a shuffle mask's output demanded element mask into demanded |
||
371 | /// element masks for the 2 operands, returns false if the mask isn't valid. |
||
372 | /// Both \p DemandedLHS and \p DemandedRHS are initialised to [SrcWidth]. |
||
373 | /// \p AllowUndefElts permits "-1" indices to be treated as undef. |
||
374 | bool getShuffleDemandedElts(int SrcWidth, ArrayRef<int> Mask, |
||
375 | const APInt &DemandedElts, APInt &DemandedLHS, |
||
376 | APInt &DemandedRHS, bool AllowUndefElts = false); |
||
377 | |||
378 | /// Replace each shuffle mask index with the scaled sequential indices for an |
||
379 | /// equivalent mask of narrowed elements. Mask elements that are less than 0 |
||
380 | /// (sentinel values) are repeated in the output mask. |
||
381 | /// |
||
382 | /// Example with Scale = 4: |
||
383 | /// <4 x i32> <3, 2, 0, -1> --> |
||
384 | /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> |
||
385 | /// |
||
386 | /// This is the reverse process of widening shuffle mask elements, but it always |
||
387 | /// succeeds because the indexes can always be multiplied (scaled up) to map to |
||
388 | /// narrower vector elements. |
||
389 | void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask, |
||
390 | SmallVectorImpl<int> &ScaledMask); |
||
391 | |||
392 | /// Try to transform a shuffle mask by replacing elements with the scaled index |
||
393 | /// for an equivalent mask of widened elements. If all mask elements that would |
||
394 | /// map to a wider element of the new mask are the same negative number |
||
395 | /// (sentinel value), that element of the new mask is the same value. If any |
||
396 | /// element in a given slice is negative and some other element in that slice is |
||
397 | /// not the same value, return false (partial matches with sentinel values are |
||
398 | /// not allowed). |
||
399 | /// |
||
400 | /// Example with Scale = 4: |
||
401 | /// <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> --> |
||
402 | /// <4 x i32> <3, 2, 0, -1> |
||
403 | /// |
||
404 | /// This is the reverse process of narrowing shuffle mask elements if it |
||
405 | /// succeeds. This transform is not always possible because indexes may not |
||
406 | /// divide evenly (scale down) to map to wider vector elements. |
||
407 | bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask, |
||
408 | SmallVectorImpl<int> &ScaledMask); |
||
409 | |||
410 | /// Repetitively apply `widenShuffleMaskElts()` for as long as it succeeds, |
||
411 | /// to get the shuffle mask with widest possible elements. |
||
412 | void getShuffleMaskWithWidestElts(ArrayRef<int> Mask, |
||
413 | SmallVectorImpl<int> &ScaledMask); |
||
414 | |||
415 | /// Splits and processes shuffle mask depending on the number of input and |
||
416 | /// output registers. The function does 2 main things: 1) splits the |
||
417 | /// source/destination vectors into real registers; 2) do the mask analysis to |
||
418 | /// identify which real registers are permuted. Then the function processes |
||
419 | /// resulting registers mask using provided action items. If no input register |
||
420 | /// is defined, \p NoInputAction action is used. If only 1 input register is |
||
421 | /// used, \p SingleInputAction is used, otherwise \p ManyInputsAction is used to |
||
422 | /// process > 2 input registers and masks. |
||
423 | /// \param Mask Original shuffle mask. |
||
424 | /// \param NumOfSrcRegs Number of source registers. |
||
425 | /// \param NumOfDestRegs Number of destination registers. |
||
426 | /// \param NumOfUsedRegs Number of actually used destination registers. |
||
427 | void processShuffleMasks( |
||
428 | ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, |
||
429 | unsigned NumOfUsedRegs, function_ref<void()> NoInputAction, |
||
430 | function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction, |
||
431 | function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction); |
||
432 | |||
433 | /// Compute a map of integer instructions to their minimum legal type |
||
434 | /// size. |
||
435 | /// |
||
436 | /// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int |
||
437 | /// type (e.g. i32) whenever arithmetic is performed on them. |
||
438 | /// |
||
439 | /// For targets with native i8 or i16 operations, usually InstCombine can shrink |
||
440 | /// the arithmetic type down again. However InstCombine refuses to create |
||
441 | /// illegal types, so for targets without i8 or i16 registers, the lengthening |
||
442 | /// and shrinking remains. |
||
443 | /// |
||
444 | /// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when |
||
445 | /// their scalar equivalents do not, so during vectorization it is important to |
||
446 | /// remove these lengthens and truncates when deciding the profitability of |
||
447 | /// vectorization. |
||
448 | /// |
||
449 | /// This function analyzes the given range of instructions and determines the |
||
450 | /// minimum type size each can be converted to. It attempts to remove or |
||
451 | /// minimize type size changes across each def-use chain, so for example in the |
||
452 | /// following code: |
||
453 | /// |
||
454 | /// %1 = load i8, i8* |
||
455 | /// %2 = add i8 %1, 2 |
||
456 | /// %3 = load i16, i16* |
||
457 | /// %4 = zext i8 %2 to i32 |
||
458 | /// %5 = zext i16 %3 to i32 |
||
459 | /// %6 = add i32 %4, %5 |
||
460 | /// %7 = trunc i32 %6 to i16 |
||
461 | /// |
||
462 | /// Instruction %6 must be done at least in i16, so computeMinimumValueSizes |
||
463 | /// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}. |
||
464 | /// |
||
465 | /// If the optional TargetTransformInfo is provided, this function tries harder |
||
466 | /// to do less work by only looking at illegal types. |
||
467 | MapVector<Instruction*, uint64_t> |
||
468 | computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks, |
||
469 | DemandedBits &DB, |
||
470 | const TargetTransformInfo *TTI=nullptr); |
||
471 | |||
472 | /// Compute the union of two access-group lists. |
||
473 | /// |
||
474 | /// If the list contains just one access group, it is returned directly. If the |
||
475 | /// list is empty, returns nullptr. |
||
476 | MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2); |
||
477 | |||
478 | /// Compute the access-group list of access groups that @p Inst1 and @p Inst2 |
||
479 | /// are both in. If either instruction does not access memory at all, it is |
||
480 | /// considered to be in every list. |
||
481 | /// |
||
482 | /// If the list contains just one access group, it is returned directly. If the |
||
483 | /// list is empty, returns nullptr. |
||
484 | MDNode *intersectAccessGroups(const Instruction *Inst1, |
||
485 | const Instruction *Inst2); |
||
486 | |||
487 | /// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, |
||
488 | /// MD_nontemporal, MD_access_group]. |
||
489 | /// For K in Kinds, we get the MDNode for K from each of the |
||
490 | /// elements of VL, compute their "intersection" (i.e., the most generic |
||
491 | /// metadata value that covers all of the individual values), and set I's |
||
492 | /// metadata for M equal to the intersection value. |
||
493 | /// |
||
494 | /// This function always sets a (possibly null) value for each K in Kinds. |
||
495 | Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL); |
||
496 | |||
497 | /// Create a mask that filters the members of an interleave group where there |
||
498 | /// are gaps. |
||
499 | /// |
||
500 | /// For example, the mask for \p Group with interleave-factor 3 |
||
501 | /// and \p VF 4, that has only its first member present is: |
||
502 | /// |
||
503 | /// <1,0,0,1,0,0,1,0,0,1,0,0> |
||
504 | /// |
||
505 | /// Note: The result is a mask of 0's and 1's, as opposed to the other |
||
506 | /// create[*]Mask() utilities which create a shuffle mask (mask that |
||
507 | /// consists of indices). |
||
508 | Constant *createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, |
||
509 | const InterleaveGroup<Instruction> &Group); |
||
510 | |||
511 | /// Create a mask with replicated elements. |
||
512 | /// |
||
513 | /// This function creates a shuffle mask for replicating each of the \p VF |
||
514 | /// elements in a vector \p ReplicationFactor times. It can be used to |
||
515 | /// transform a mask of \p VF elements into a mask of |
||
516 | /// \p VF * \p ReplicationFactor elements used by a predicated |
||
517 | /// interleaved-group of loads/stores whose Interleaved-factor == |
||
518 | /// \p ReplicationFactor. |
||
519 | /// |
||
520 | /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is: |
||
521 | /// |
||
522 | /// <0,0,0,1,1,1,2,2,2,3,3,3> |
||
523 | llvm::SmallVector<int, 16> createReplicatedMask(unsigned ReplicationFactor, |
||
524 | unsigned VF); |
||
525 | |||
526 | /// Create an interleave shuffle mask. |
||
527 | /// |
||
528 | /// This function creates a shuffle mask for interleaving \p NumVecs vectors of |
||
529 | /// vectorization factor \p VF into a single wide vector. The mask is of the |
||
530 | /// form: |
||
531 | /// |
||
532 | /// <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...> |
||
533 | /// |
||
534 | /// For example, the mask for VF = 4 and NumVecs = 2 is: |
||
535 | /// |
||
536 | /// <0, 4, 1, 5, 2, 6, 3, 7>. |
||
537 | llvm::SmallVector<int, 16> createInterleaveMask(unsigned VF, unsigned NumVecs); |
||
538 | |||
539 | /// Create a stride shuffle mask. |
||
540 | /// |
||
541 | /// This function creates a shuffle mask whose elements begin at \p Start and |
||
542 | /// are incremented by \p Stride. The mask can be used to deinterleave an |
||
543 | /// interleaved vector into separate vectors of vectorization factor \p VF. The |
||
544 | /// mask is of the form: |
||
545 | /// |
||
546 | /// <Start, Start + Stride, ..., Start + Stride * (VF - 1)> |
||
547 | /// |
||
548 | /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is: |
||
549 | /// |
||
550 | /// <0, 2, 4, 6> |
||
551 | llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride, |
||
552 | unsigned VF); |
||
553 | |||
554 | /// Create a sequential shuffle mask. |
||
555 | /// |
||
556 | /// This function creates shuffle mask whose elements are sequential and begin |
||
557 | /// at \p Start. The mask contains \p NumInts integers and is padded with \p |
||
558 | /// NumUndefs undef values. The mask is of the form: |
||
559 | /// |
||
560 | /// <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs> |
||
561 | /// |
||
562 | /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is: |
||
563 | /// |
||
564 | /// <0, 1, 2, 3, undef, undef, undef, undef> |
||
565 | llvm::SmallVector<int, 16> |
||
566 | createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs); |
||
567 | |||
568 | /// Given a shuffle mask for a binary shuffle, create the equivalent shuffle |
||
569 | /// mask assuming both operands are identical. This assumes that the unary |
||
570 | /// shuffle will use elements from operand 0 (operand 1 will be unused). |
||
571 | llvm::SmallVector<int, 16> createUnaryMask(ArrayRef<int> Mask, |
||
572 | unsigned NumElts); |
||
573 | |||
574 | /// Concatenate a list of vectors. |
||
575 | /// |
||
576 | /// This function generates code that concatenate the vectors in \p Vecs into a |
||
577 | /// single large vector. The number of vectors should be greater than one, and |
||
578 | /// their element types should be the same. The number of elements in the |
||
579 | /// vectors should also be the same; however, if the last vector has fewer |
||
580 | /// elements, it will be padded with undefs. |
||
581 | Value *concatenateVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vecs); |
||
582 | |||
583 | /// Given a mask vector of i1, Return true if all of the elements of this |
||
584 | /// predicate mask are known to be false or undef. That is, return true if all |
||
585 | /// lanes can be assumed inactive. |
||
586 | bool maskIsAllZeroOrUndef(Value *Mask); |
||
587 | |||
588 | /// Given a mask vector of i1, Return true if all of the elements of this |
||
589 | /// predicate mask are known to be true or undef. That is, return true if all |
||
590 | /// lanes can be assumed active. |
||
591 | bool maskIsAllOneOrUndef(Value *Mask); |
||
592 | |||
593 | /// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) |
||
594 | /// for each lane which may be active. |
||
595 | APInt possiblyDemandedEltsInMask(Value *Mask); |
||
596 | |||
597 | /// The group of interleaved loads/stores sharing the same stride and |
||
598 | /// close to each other. |
||
599 | /// |
||
600 | /// Each member in this group has an index starting from 0, and the largest |
||
601 | /// index should be less than interleaved factor, which is equal to the absolute |
||
602 | /// value of the access's stride. |
||
603 | /// |
||
604 | /// E.g. An interleaved load group of factor 4: |
||
605 | /// for (unsigned i = 0; i < 1024; i+=4) { |
||
606 | /// a = A[i]; // Member of index 0 |
||
607 | /// b = A[i+1]; // Member of index 1 |
||
608 | /// d = A[i+3]; // Member of index 3 |
||
609 | /// ... |
||
610 | /// } |
||
611 | /// |
||
612 | /// An interleaved store group of factor 4: |
||
613 | /// for (unsigned i = 0; i < 1024; i+=4) { |
||
614 | /// ... |
||
615 | /// A[i] = a; // Member of index 0 |
||
616 | /// A[i+1] = b; // Member of index 1 |
||
617 | /// A[i+2] = c; // Member of index 2 |
||
618 | /// A[i+3] = d; // Member of index 3 |
||
619 | /// } |
||
620 | /// |
||
621 | /// Note: the interleaved load group could have gaps (missing members), but |
||
622 | /// the interleaved store group doesn't allow gaps. |
||
623 | template <typename InstTy> class InterleaveGroup { |
||
624 | public: |
||
625 | InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment) |
||
626 | : Factor(Factor), Reverse(Reverse), Alignment(Alignment), |
||
627 | InsertPos(nullptr) {} |
||
628 | |||
629 | InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment) |
||
630 | : Alignment(Alignment), InsertPos(Instr) { |
||
631 | Factor = std::abs(Stride); |
||
632 | assert(Factor > 1 && "Invalid interleave factor"); |
||
633 | |||
634 | Reverse = Stride < 0; |
||
635 | Members[0] = Instr; |
||
636 | } |
||
637 | |||
638 | bool isReverse() const { return Reverse; } |
||
639 | uint32_t getFactor() const { return Factor; } |
||
640 | Align getAlign() const { return Alignment; } |
||
641 | uint32_t getNumMembers() const { return Members.size(); } |
||
642 | |||
643 | /// Try to insert a new member \p Instr with index \p Index and |
||
644 | /// alignment \p NewAlign. The index is related to the leader and it could be |
||
645 | /// negative if it is the new leader. |
||
646 | /// |
||
647 | /// \returns false if the instruction doesn't belong to the group. |
||
648 | bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) { |
||
649 | // Make sure the key fits in an int32_t. |
||
650 | std::optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey); |
||
651 | if (!MaybeKey) |
||
652 | return false; |
||
653 | int32_t Key = *MaybeKey; |
||
654 | |||
655 | // Skip if the key is used for either the tombstone or empty special values. |
||
656 | if (DenseMapInfo<int32_t>::getTombstoneKey() == Key || |
||
657 | DenseMapInfo<int32_t>::getEmptyKey() == Key) |
||
658 | return false; |
||
659 | |||
660 | // Skip if there is already a member with the same index. |
||
661 | if (Members.find(Key) != Members.end()) |
||
662 | return false; |
||
663 | |||
664 | if (Key > LargestKey) { |
||
665 | // The largest index is always less than the interleave factor. |
||
666 | if (Index >= static_cast<int32_t>(Factor)) |
||
667 | return false; |
||
668 | |||
669 | LargestKey = Key; |
||
670 | } else if (Key < SmallestKey) { |
||
671 | |||
672 | // Make sure the largest index fits in an int32_t. |
||
673 | std::optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key); |
||
674 | if (!MaybeLargestIndex) |
||
675 | return false; |
||
676 | |||
677 | // The largest index is always less than the interleave factor. |
||
678 | if (*MaybeLargestIndex >= static_cast<int64_t>(Factor)) |
||
679 | return false; |
||
680 | |||
681 | SmallestKey = Key; |
||
682 | } |
||
683 | |||
684 | // It's always safe to select the minimum alignment. |
||
685 | Alignment = std::min(Alignment, NewAlign); |
||
686 | Members[Key] = Instr; |
||
687 | return true; |
||
688 | } |
||
689 | |||
690 | /// Get the member with the given index \p Index |
||
691 | /// |
||
692 | /// \returns nullptr if contains no such member. |
||
693 | InstTy *getMember(uint32_t Index) const { |
||
694 | int32_t Key = SmallestKey + Index; |
||
695 | return Members.lookup(Key); |
||
696 | } |
||
697 | |||
698 | /// Get the index for the given member. Unlike the key in the member |
||
699 | /// map, the index starts from 0. |
||
700 | uint32_t getIndex(const InstTy *Instr) const { |
||
701 | for (auto I : Members) { |
||
702 | if (I.second == Instr) |
||
703 | return I.first - SmallestKey; |
||
704 | } |
||
705 | |||
706 | llvm_unreachable("InterleaveGroup contains no such member"); |
||
707 | } |
||
708 | |||
709 | InstTy *getInsertPos() const { return InsertPos; } |
||
710 | void setInsertPos(InstTy *Inst) { InsertPos = Inst; } |
||
711 | |||
712 | /// Add metadata (e.g. alias info) from the instructions in this group to \p |
||
713 | /// NewInst. |
||
714 | /// |
||
715 | /// FIXME: this function currently does not add noalias metadata a'la |
||
716 | /// addNewMedata. To do that we need to compute the intersection of the |
||
717 | /// noalias info from all members. |
||
718 | void addMetadata(InstTy *NewInst) const; |
||
719 | |||
720 | /// Returns true if this Group requires a scalar iteration to handle gaps. |
||
721 | bool requiresScalarEpilogue() const { |
||
722 | // If the last member of the Group exists, then a scalar epilog is not |
||
723 | // needed for this group. |
||
724 | if (getMember(getFactor() - 1)) |
||
725 | return false; |
||
726 | |||
727 | // We have a group with gaps. It therefore can't be a reversed access, |
||
728 | // because such groups get invalidated (TODO). |
||
729 | assert(!isReverse() && "Group should have been invalidated"); |
||
730 | |||
731 | // This is a group of loads, with gaps, and without a last-member |
||
732 | return true; |
||
733 | } |
||
734 | |||
735 | private: |
||
736 | uint32_t Factor; // Interleave Factor. |
||
737 | bool Reverse; |
||
738 | Align Alignment; |
||
739 | DenseMap<int32_t, InstTy *> Members; |
||
740 | int32_t SmallestKey = 0; |
||
741 | int32_t LargestKey = 0; |
||
742 | |||
743 | // To avoid breaking dependences, vectorized instructions of an interleave |
||
744 | // group should be inserted at either the first load or the last store in |
||
745 | // program order. |
||
746 | // |
||
747 | // E.g. %even = load i32 // Insert Position |
||
748 | // %add = add i32 %even // Use of %even |
||
749 | // %odd = load i32 |
||
750 | // |
||
751 | // store i32 %even |
||
752 | // %odd = add i32 // Def of %odd |
||
753 | // store i32 %odd // Insert Position |
||
754 | InstTy *InsertPos; |
||
755 | }; |
||
756 | |||
757 | /// Drive the analysis of interleaved memory accesses in the loop. |
||
758 | /// |
||
759 | /// Use this class to analyze interleaved accesses only when we can vectorize |
||
760 | /// a loop. Otherwise it's meaningless to do analysis as the vectorization |
||
761 | /// on interleaved accesses is unsafe. |
||
762 | /// |
||
763 | /// The analysis collects interleave groups and records the relationships |
||
764 | /// between the member and the group in a map. |
||
765 | class InterleavedAccessInfo { |
||
766 | public: |
||
767 | InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, |
||
768 | DominatorTree *DT, LoopInfo *LI, |
||
769 | const LoopAccessInfo *LAI) |
||
770 | : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {} |
||
771 | |||
772 | ~InterleavedAccessInfo() { invalidateGroups(); } |
||
773 | |||
774 | /// Analyze the interleaved accesses and collect them in interleave |
||
775 | /// groups. Substitute symbolic strides using \p Strides. |
||
776 | /// Consider also predicated loads/stores in the analysis if |
||
777 | /// \p EnableMaskedInterleavedGroup is true. |
||
778 | void analyzeInterleaving(bool EnableMaskedInterleavedGroup); |
||
779 | |||
780 | /// Invalidate groups, e.g., in case all blocks in loop will be predicated |
||
781 | /// contrary to original assumption. Although we currently prevent group |
||
782 | /// formation for predicated accesses, we may be able to relax this limitation |
||
783 | /// in the future once we handle more complicated blocks. Returns true if any |
||
784 | /// groups were invalidated. |
||
785 | bool invalidateGroups() { |
||
786 | if (InterleaveGroups.empty()) { |
||
787 | assert( |
||
788 | !RequiresScalarEpilogue && |
||
789 | "RequiresScalarEpilog should not be set without interleave groups"); |
||
790 | return false; |
||
791 | } |
||
792 | |||
793 | InterleaveGroupMap.clear(); |
||
794 | for (auto *Ptr : InterleaveGroups) |
||
795 | delete Ptr; |
||
796 | InterleaveGroups.clear(); |
||
797 | RequiresScalarEpilogue = false; |
||
798 | return true; |
||
799 | } |
||
800 | |||
801 | /// Check if \p Instr belongs to any interleave group. |
||
802 | bool isInterleaved(Instruction *Instr) const { |
||
803 | return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end(); |
||
804 | } |
||
805 | |||
806 | /// Get the interleave group that \p Instr belongs to. |
||
807 | /// |
||
808 | /// \returns nullptr if doesn't have such group. |
||
809 | InterleaveGroup<Instruction> * |
||
810 | getInterleaveGroup(const Instruction *Instr) const { |
||
811 | return InterleaveGroupMap.lookup(Instr); |
||
812 | } |
||
813 | |||
814 | iterator_range<SmallPtrSetIterator<llvm::InterleaveGroup<Instruction> *>> |
||
815 | getInterleaveGroups() { |
||
816 | return make_range(InterleaveGroups.begin(), InterleaveGroups.end()); |
||
817 | } |
||
818 | |||
819 | /// Returns true if an interleaved group that may access memory |
||
820 | /// out-of-bounds requires a scalar epilogue iteration for correctness. |
||
821 | bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; } |
||
822 | |||
823 | /// Invalidate groups that require a scalar epilogue (due to gaps). This can |
||
824 | /// happen when optimizing for size forbids a scalar epilogue, and the gap |
||
825 | /// cannot be filtered by masking the load/store. |
||
826 | void invalidateGroupsRequiringScalarEpilogue(); |
||
827 | |||
828 | /// Returns true if we have any interleave groups. |
||
829 | bool hasGroups() const { return !InterleaveGroups.empty(); } |
||
830 | |||
831 | private: |
||
832 | /// A wrapper around ScalarEvolution, used to add runtime SCEV checks. |
||
833 | /// Simplifies SCEV expressions in the context of existing SCEV assumptions. |
||
834 | /// The interleaved access analysis can also add new predicates (for example |
||
835 | /// by versioning strides of pointers). |
||
836 | PredicatedScalarEvolution &PSE; |
||
837 | |||
838 | Loop *TheLoop; |
||
839 | DominatorTree *DT; |
||
840 | LoopInfo *LI; |
||
841 | const LoopAccessInfo *LAI; |
||
842 | |||
843 | /// True if the loop may contain non-reversed interleaved groups with |
||
844 | /// out-of-bounds accesses. We ensure we don't speculatively access memory |
||
845 | /// out-of-bounds by executing at least one scalar epilogue iteration. |
||
846 | bool RequiresScalarEpilogue = false; |
||
847 | |||
848 | /// Holds the relationships between the members and the interleave group. |
||
849 | DenseMap<Instruction *, InterleaveGroup<Instruction> *> InterleaveGroupMap; |
||
850 | |||
851 | SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups; |
||
852 | |||
853 | /// Holds dependences among the memory accesses in the loop. It maps a source |
||
854 | /// access to a set of dependent sink accesses. |
||
855 | DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences; |
||
856 | |||
857 | /// The descriptor for a strided memory access. |
||
858 | struct StrideDescriptor { |
||
859 | StrideDescriptor() = default; |
||
860 | StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size, |
||
861 | Align Alignment) |
||
862 | : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {} |
||
863 | |||
864 | // The access's stride. It is negative for a reverse access. |
||
865 | int64_t Stride = 0; |
||
866 | |||
867 | // The scalar expression of this access. |
||
868 | const SCEV *Scev = nullptr; |
||
869 | |||
870 | // The size of the memory object. |
||
871 | uint64_t Size = 0; |
||
872 | |||
873 | // The alignment of this access. |
||
874 | Align Alignment; |
||
875 | }; |
||
876 | |||
877 | /// A type for holding instructions and their stride descriptors. |
||
878 | using StrideEntry = std::pair<Instruction *, StrideDescriptor>; |
||
879 | |||
880 | /// Create a new interleave group with the given instruction \p Instr, |
||
881 | /// stride \p Stride and alignment \p Align. |
||
882 | /// |
||
883 | /// \returns the newly created interleave group. |
||
884 | InterleaveGroup<Instruction> * |
||
885 | createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) { |
||
886 | assert(!InterleaveGroupMap.count(Instr) && |
||
887 | "Already in an interleaved access group"); |
||
888 | InterleaveGroupMap[Instr] = |
||
889 | new InterleaveGroup<Instruction>(Instr, Stride, Alignment); |
||
890 | InterleaveGroups.insert(InterleaveGroupMap[Instr]); |
||
891 | return InterleaveGroupMap[Instr]; |
||
892 | } |
||
893 | |||
894 | /// Release the group and remove all the relationships. |
||
895 | void releaseGroup(InterleaveGroup<Instruction> *Group) { |
||
896 | for (unsigned i = 0; i < Group->getFactor(); i++) |
||
897 | if (Instruction *Member = Group->getMember(i)) |
||
898 | InterleaveGroupMap.erase(Member); |
||
899 | |||
900 | InterleaveGroups.erase(Group); |
||
901 | delete Group; |
||
902 | } |
||
903 | |||
904 | /// Collect all the accesses with a constant stride in program order. |
||
905 | void collectConstStrideAccesses( |
||
906 | MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo, |
||
907 | const ValueToValueMap &Strides); |
||
908 | |||
909 | /// Returns true if \p Stride is allowed in an interleaved group. |
||
910 | static bool isStrided(int Stride); |
||
911 | |||
912 | /// Returns true if \p BB is a predicated block. |
||
913 | bool isPredicated(BasicBlock *BB) const { |
||
914 | return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); |
||
915 | } |
||
916 | |||
917 | /// Returns true if LoopAccessInfo can be used for dependence queries. |
||
918 | bool areDependencesValid() const { |
||
919 | return LAI && LAI->getDepChecker().getDependences(); |
||
920 | } |
||
921 | |||
922 | /// Returns true if memory accesses \p A and \p B can be reordered, if |
||
923 | /// necessary, when constructing interleaved groups. |
||
924 | /// |
||
925 | /// \p A must precede \p B in program order. We return false if reordering is |
||
926 | /// not necessary or is prevented because \p A and \p B may be dependent. |
||
927 | bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A, |
||
928 | StrideEntry *B) const { |
||
929 | // Code motion for interleaved accesses can potentially hoist strided loads |
||
930 | // and sink strided stores. The code below checks the legality of the |
||
931 | // following two conditions: |
||
932 | // |
||
933 | // 1. Potentially moving a strided load (B) before any store (A) that |
||
934 | // precedes B, or |
||
935 | // |
||
936 | // 2. Potentially moving a strided store (A) after any load or store (B) |
||
937 | // that A precedes. |
||
938 | // |
||
939 | // It's legal to reorder A and B if we know there isn't a dependence from A |
||
940 | // to B. Note that this determination is conservative since some |
||
941 | // dependences could potentially be reordered safely. |
||
942 | |||
943 | // A is potentially the source of a dependence. |
||
944 | auto *Src = A->first; |
||
945 | auto SrcDes = A->second; |
||
946 | |||
947 | // B is potentially the sink of a dependence. |
||
948 | auto *Sink = B->first; |
||
949 | auto SinkDes = B->second; |
||
950 | |||
951 | // Code motion for interleaved accesses can't violate WAR dependences. |
||
952 | // Thus, reordering is legal if the source isn't a write. |
||
953 | if (!Src->mayWriteToMemory()) |
||
954 | return true; |
||
955 | |||
956 | // At least one of the accesses must be strided. |
||
957 | if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride)) |
||
958 | return true; |
||
959 | |||
960 | // If dependence information is not available from LoopAccessInfo, |
||
961 | // conservatively assume the instructions can't be reordered. |
||
962 | if (!areDependencesValid()) |
||
963 | return false; |
||
964 | |||
965 | // If we know there is a dependence from source to sink, assume the |
||
966 | // instructions can't be reordered. Otherwise, reordering is legal. |
||
967 | return Dependences.find(Src) == Dependences.end() || |
||
968 | !Dependences.lookup(Src).count(Sink); |
||
969 | } |
||
970 | |||
971 | /// Collect the dependences from LoopAccessInfo. |
||
972 | /// |
||
973 | /// We process the dependences once during the interleaved access analysis to |
||
974 | /// enable constant-time dependence queries. |
||
975 | void collectDependences() { |
||
976 | if (!areDependencesValid()) |
||
977 | return; |
||
978 | auto *Deps = LAI->getDepChecker().getDependences(); |
||
979 | for (auto Dep : *Deps) |
||
980 | Dependences[Dep.getSource(*LAI)].insert(Dep.getDestination(*LAI)); |
||
981 | } |
||
982 | }; |
||
983 | |||
984 | } // llvm namespace |
||
985 | |||
986 | #endif |