Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // |
||
9 | // This file defines the OpenMPIRBuilder class and helpers used as a convenient |
||
10 | // way to create LLVM instructions for OpenMP directives. |
||
11 | // |
||
12 | //===----------------------------------------------------------------------===// |
||
13 | |||
14 | #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H |
||
15 | #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H |
||
16 | |||
17 | #include "llvm/Analysis/MemorySSAUpdater.h" |
||
18 | #include "llvm/Frontend/OpenMP/OMPConstants.h" |
||
19 | #include "llvm/IR/DebugLoc.h" |
||
20 | #include "llvm/IR/IRBuilder.h" |
||
21 | #include "llvm/Support/Allocator.h" |
||
22 | #include <forward_list> |
||
23 | #include <map> |
||
24 | #include <optional> |
||
25 | |||
26 | namespace llvm { |
||
27 | class CanonicalLoopInfo; |
||
28 | struct TargetRegionEntryInfo; |
||
29 | class OffloadEntriesInfoManager; |
||
30 | |||
31 | /// Move the instruction after an InsertPoint to the beginning of another |
||
32 | /// BasicBlock. |
||
33 | /// |
||
34 | /// The instructions after \p IP are moved to the beginning of \p New which must |
||
35 | /// not have any PHINodes. If \p CreateBranch is true, a branch instruction to |
||
36 | /// \p New will be added such that there is no semantic change. Otherwise, the |
||
37 | /// \p IP insert block remains degenerate and it is up to the caller to insert a |
||
38 | /// terminator. |
||
39 | void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, |
||
40 | bool CreateBranch); |
||
41 | |||
42 | /// Splice a BasicBlock at an IRBuilder's current insertion point. Its new |
||
43 | /// insert location will stick to after the instruction before the insertion |
||
44 | /// point (instead of moving with the instruction the InsertPoint stores |
||
45 | /// internally). |
||
46 | void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch); |
||
47 | |||
48 | /// Split a BasicBlock at an InsertPoint, even if the block is degenerate |
||
49 | /// (missing the terminator). |
||
50 | /// |
||
51 | /// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed |
||
52 | /// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch |
||
53 | /// is true, a branch to the new successor will new created such that |
||
54 | /// semantically there is no change; otherwise the block of the insertion point |
||
55 | /// remains degenerate and it is the caller's responsibility to insert a |
||
56 | /// terminator. Returns the new successor block. |
||
57 | BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, |
||
58 | llvm::Twine Name = {}); |
||
59 | |||
60 | /// Split a BasicBlock at \p Builder's insertion point, even if the block is |
||
61 | /// degenerate (missing the terminator). Its new insert location will stick to |
||
62 | /// after the instruction before the insertion point (instead of moving with the |
||
63 | /// instruction the InsertPoint stores internally). |
||
64 | BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch, |
||
65 | llvm::Twine Name = {}); |
||
66 | |||
67 | /// Split a BasicBlock at \p Builder's insertion point, even if the block is |
||
68 | /// degenerate (missing the terminator). Its new insert location will stick to |
||
69 | /// after the instruction before the insertion point (instead of moving with the |
||
70 | /// instruction the InsertPoint stores internally). |
||
71 | BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name); |
||
72 | |||
73 | /// Like splitBB, but reuses the current block's name for the new name. |
||
74 | BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, |
||
75 | llvm::Twine Suffix = ".split"); |
||
76 | |||
77 | /// Captures attributes that affect generating LLVM-IR using the |
||
78 | /// OpenMPIRBuilder and related classes. Note that not all attributes are |
||
79 | /// required for all classes or functions. In some use cases the configuration |
||
80 | /// is not necessary at all, because because the only functions that are called |
||
81 | /// are ones that are not dependent on the configuration. |
||
82 | class OpenMPIRBuilderConfig { |
||
83 | public: |
||
84 | /// Flag for specifying if the compilation is done for embedded device code |
||
85 | /// or host code. |
||
86 | std::optional<bool> IsEmbedded; |
||
87 | |||
88 | /// Flag for specifying if the compilation is done for an offloading target, |
||
89 | /// like GPU. |
||
90 | std::optional<bool> IsTargetCodegen; |
||
91 | |||
92 | /// Flag for specifying weather a requires unified_shared_memory |
||
93 | /// directive is present or not. |
||
94 | std::optional<bool> HasRequiresUnifiedSharedMemory; |
||
95 | |||
96 | // Flag for specifying if offloading is mandatory. |
||
97 | std::optional<bool> OpenMPOffloadMandatory; |
||
98 | |||
99 | /// First separator used between the initial two parts of a name. |
||
100 | std::optional<StringRef> FirstSeparator; |
||
101 | /// Separator used between all of the rest consecutive parts of s name |
||
102 | std::optional<StringRef> Separator; |
||
103 | |||
104 | OpenMPIRBuilderConfig() {} |
||
105 | OpenMPIRBuilderConfig(bool IsEmbedded, bool IsTargetCodegen, |
||
106 | bool HasRequiresUnifiedSharedMemory, |
||
107 | bool OpenMPOffloadMandatory) |
||
108 | : IsEmbedded(IsEmbedded), IsTargetCodegen(IsTargetCodegen), |
||
109 | HasRequiresUnifiedSharedMemory(HasRequiresUnifiedSharedMemory), |
||
110 | OpenMPOffloadMandatory(OpenMPOffloadMandatory) {} |
||
111 | |||
112 | // Getters functions that assert if the required values are not present. |
||
113 | bool isEmbedded() const { |
||
114 | assert(IsEmbedded.has_value() && "IsEmbedded is not set"); |
||
115 | return *IsEmbedded; |
||
116 | } |
||
117 | |||
118 | bool isTargetCodegen() const { |
||
119 | assert(IsTargetCodegen.has_value() && "IsTargetCodegen is not set"); |
||
120 | return *IsTargetCodegen; |
||
121 | } |
||
122 | |||
123 | bool hasRequiresUnifiedSharedMemory() const { |
||
124 | assert(HasRequiresUnifiedSharedMemory.has_value() && |
||
125 | "HasUnifiedSharedMemory is not set"); |
||
126 | return *HasRequiresUnifiedSharedMemory; |
||
127 | } |
||
128 | |||
129 | bool openMPOffloadMandatory() const { |
||
130 | assert(OpenMPOffloadMandatory.has_value() && |
||
131 | "OpenMPOffloadMandatory is not set"); |
||
132 | return *OpenMPOffloadMandatory; |
||
133 | } |
||
134 | // Returns the FirstSeparator if set, otherwise use the default |
||
135 | // separator depending on isTargetCodegen |
||
136 | StringRef firstSeparator() const { |
||
137 | if (FirstSeparator.has_value()) |
||
138 | return *FirstSeparator; |
||
139 | if (isTargetCodegen()) |
||
140 | return "_"; |
||
141 | return "."; |
||
142 | } |
||
143 | |||
144 | // Returns the Separator if set, otherwise use the default |
||
145 | // separator depending on isTargetCodegen |
||
146 | StringRef separator() const { |
||
147 | if (Separator.has_value()) |
||
148 | return *Separator; |
||
149 | if (isTargetCodegen()) |
||
150 | return "$"; |
||
151 | return "."; |
||
152 | } |
||
153 | |||
154 | void setIsEmbedded(bool Value) { IsEmbedded = Value; } |
||
155 | void setIsTargetCodegen(bool Value) { IsTargetCodegen = Value; } |
||
156 | void setHasRequiresUnifiedSharedMemory(bool Value) { |
||
157 | HasRequiresUnifiedSharedMemory = Value; |
||
158 | } |
||
159 | void setFirstSeparator(StringRef FS) { FirstSeparator = FS; } |
||
160 | void setSeparator(StringRef S) { Separator = S; } |
||
161 | }; |
||
162 | |||
163 | /// An interface to create LLVM-IR for OpenMP directives. |
||
164 | /// |
||
165 | /// Each OpenMP directive has a corresponding public generator method. |
||
166 | class OpenMPIRBuilder { |
||
167 | public: |
||
168 | /// Create a new OpenMPIRBuilder operating on the given module \p M. This will |
||
169 | /// not have an effect on \p M (see initialize) |
||
170 | OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {} |
||
171 | ~OpenMPIRBuilder(); |
||
172 | |||
173 | /// Initialize the internal state, this will put structures types and |
||
174 | /// potentially other helpers into the underlying module. Must be called |
||
175 | /// before any other method and only once! |
||
176 | void initialize(); |
||
177 | |||
178 | void setConfig(OpenMPIRBuilderConfig C) { Config = C; } |
||
179 | |||
180 | /// Finalize the underlying module, e.g., by outlining regions. |
||
181 | /// \param Fn The function to be finalized. If not used, |
||
182 | /// all functions are finalized. |
||
183 | void finalize(Function *Fn = nullptr); |
||
184 | |||
185 | /// Add attributes known for \p FnID to \p Fn. |
||
186 | void addAttributes(omp::RuntimeFunction FnID, Function &Fn); |
||
187 | |||
188 | /// Type used throughout for insertion points. |
||
189 | using InsertPointTy = IRBuilder<>::InsertPoint; |
||
190 | |||
191 | /// Get the create a name using the platform specific separators. |
||
192 | /// \param Parts parts of the final name that needs separation |
||
193 | /// The created name has a first separator between the first and second part |
||
194 | /// and a second separator between all other parts. |
||
195 | /// E.g. with FirstSeparator "$" and Separator "." and |
||
196 | /// parts: "p1", "p2", "p3", "p4" |
||
197 | /// The resulting name is "p1$p2.p3.p4" |
||
198 | /// The separators are retrieved from the OpenMPIRBuilderConfig. |
||
199 | std::string createPlatformSpecificName(ArrayRef<StringRef> Parts) const; |
||
200 | |||
201 | /// Callback type for variable finalization (think destructors). |
||
202 | /// |
||
203 | /// \param CodeGenIP is the insertion point at which the finalization code |
||
204 | /// should be placed. |
||
205 | /// |
||
206 | /// A finalize callback knows about all objects that need finalization, e.g. |
||
207 | /// destruction, when the scope of the currently generated construct is left |
||
208 | /// at the time, and location, the callback is invoked. |
||
209 | using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>; |
||
210 | |||
211 | struct FinalizationInfo { |
||
212 | /// The finalization callback provided by the last in-flight invocation of |
||
213 | /// createXXXX for the directive of kind DK. |
||
214 | FinalizeCallbackTy FiniCB; |
||
215 | |||
216 | /// The directive kind of the innermost directive that has an associated |
||
217 | /// region which might require finalization when it is left. |
||
218 | omp::Directive DK; |
||
219 | |||
220 | /// Flag to indicate if the directive is cancellable. |
||
221 | bool IsCancellable; |
||
222 | }; |
||
223 | |||
224 | /// Push a finalization callback on the finalization stack. |
||
225 | /// |
||
226 | /// NOTE: Temporary solution until Clang CG is gone. |
||
227 | void pushFinalizationCB(const FinalizationInfo &FI) { |
||
228 | FinalizationStack.push_back(FI); |
||
229 | } |
||
230 | |||
231 | /// Pop the last finalization callback from the finalization stack. |
||
232 | /// |
||
233 | /// NOTE: Temporary solution until Clang CG is gone. |
||
234 | void popFinalizationCB() { FinalizationStack.pop_back(); } |
||
235 | |||
236 | /// Callback type for body (=inner region) code generation |
||
237 | /// |
||
238 | /// The callback takes code locations as arguments, each describing a |
||
239 | /// location where additional instructions can be inserted. |
||
240 | /// |
||
241 | /// The CodeGenIP may be in the middle of a basic block or point to the end of |
||
242 | /// it. The basic block may have a terminator or be degenerate. The callback |
||
243 | /// function may just insert instructions at that position, but also split the |
||
244 | /// block (without the Before argument of BasicBlock::splitBasicBlock such |
||
245 | /// that the identify of the split predecessor block is preserved) and insert |
||
246 | /// additional control flow, including branches that do not lead back to what |
||
247 | /// follows the CodeGenIP. Note that since the callback is allowed to split |
||
248 | /// the block, callers must assume that InsertPoints to positions in the |
||
249 | /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If |
||
250 | /// such InsertPoints need to be preserved, it can split the block itself |
||
251 | /// before calling the callback. |
||
252 | /// |
||
253 | /// AllocaIP and CodeGenIP must not point to the same position. |
||
254 | /// |
||
255 | /// \param AllocaIP is the insertion point at which new alloca instructions |
||
256 | /// should be placed. The BasicBlock it is pointing to must |
||
257 | /// not be split. |
||
258 | /// \param CodeGenIP is the insertion point at which the body code should be |
||
259 | /// placed. |
||
260 | using BodyGenCallbackTy = |
||
261 | function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; |
||
262 | |||
263 | // This is created primarily for sections construct as llvm::function_ref |
||
264 | // (BodyGenCallbackTy) is not storable (as described in the comments of |
||
265 | // function_ref class - function_ref contains non-ownable reference |
||
266 | // to the callable. |
||
267 | using StorableBodyGenCallbackTy = |
||
268 | std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; |
||
269 | |||
270 | /// Callback type for loop body code generation. |
||
271 | /// |
||
272 | /// \param CodeGenIP is the insertion point where the loop's body code must be |
||
273 | /// placed. This will be a dedicated BasicBlock with a |
||
274 | /// conditional branch from the loop condition check and |
||
275 | /// terminated with an unconditional branch to the loop |
||
276 | /// latch. |
||
277 | /// \param IndVar is the induction variable usable at the insertion point. |
||
278 | using LoopBodyGenCallbackTy = |
||
279 | function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>; |
||
280 | |||
281 | /// Callback type for variable privatization (think copy & default |
||
282 | /// constructor). |
||
283 | /// |
||
284 | /// \param AllocaIP is the insertion point at which new alloca instructions |
||
285 | /// should be placed. |
||
286 | /// \param CodeGenIP is the insertion point at which the privatization code |
||
287 | /// should be placed. |
||
288 | /// \param Original The value being copied/created, should not be used in the |
||
289 | /// generated IR. |
||
290 | /// \param Inner The equivalent of \p Original that should be used in the |
||
291 | /// generated IR; this is equal to \p Original if the value is |
||
292 | /// a pointer and can thus be passed directly, otherwise it is |
||
293 | /// an equivalent but different value. |
||
294 | /// \param ReplVal The replacement value, thus a copy or new created version |
||
295 | /// of \p Inner. |
||
296 | /// |
||
297 | /// \returns The new insertion point where code generation continues and |
||
298 | /// \p ReplVal the replacement value. |
||
299 | using PrivatizeCallbackTy = function_ref<InsertPointTy( |
||
300 | InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, |
||
301 | Value &Inner, Value *&ReplVal)>; |
||
302 | |||
303 | /// Description of a LLVM-IR insertion point (IP) and a debug/source location |
||
304 | /// (filename, line, column, ...). |
||
305 | struct LocationDescription { |
||
306 | LocationDescription(const IRBuilderBase &IRB) |
||
307 | : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {} |
||
308 | LocationDescription(const InsertPointTy &IP) : IP(IP) {} |
||
309 | LocationDescription(const InsertPointTy &IP, const DebugLoc &DL) |
||
310 | : IP(IP), DL(DL) {} |
||
311 | InsertPointTy IP; |
||
312 | DebugLoc DL; |
||
313 | }; |
||
314 | |||
315 | /// Emitter methods for OpenMP directives. |
||
316 | /// |
||
317 | ///{ |
||
318 | |||
319 | /// Generator for '#omp barrier' |
||
320 | /// |
||
321 | /// \param Loc The location where the barrier directive was encountered. |
||
322 | /// \param DK The kind of directive that caused the barrier. |
||
323 | /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier. |
||
324 | /// \param CheckCancelFlag Flag to indicate a cancel barrier return value |
||
325 | /// should be checked and acted upon. |
||
326 | /// |
||
327 | /// \returns The insertion point after the barrier. |
||
328 | InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, |
||
329 | bool ForceSimpleCall = false, |
||
330 | bool CheckCancelFlag = true); |
||
331 | |||
332 | /// Generator for '#omp cancel' |
||
333 | /// |
||
334 | /// \param Loc The location where the directive was encountered. |
||
335 | /// \param IfCondition The evaluated 'if' clause expression, if any. |
||
336 | /// \param CanceledDirective The kind of directive that is cancled. |
||
337 | /// |
||
338 | /// \returns The insertion point after the barrier. |
||
339 | InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, |
||
340 | omp::Directive CanceledDirective); |
||
341 | |||
342 | /// Generator for '#omp parallel' |
||
343 | /// |
||
344 | /// \param Loc The insert and source location description. |
||
345 | /// \param AllocaIP The insertion points to be used for alloca instructions. |
||
346 | /// \param BodyGenCB Callback that will generate the region code. |
||
347 | /// \param PrivCB Callback to copy a given variable (think copy constructor). |
||
348 | /// \param FiniCB Callback to finalize variable copies. |
||
349 | /// \param IfCondition The evaluated 'if' clause expression, if any. |
||
350 | /// \param NumThreads The evaluated 'num_threads' clause expression, if any. |
||
351 | /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind). |
||
352 | /// \param IsCancellable Flag to indicate a cancellable parallel region. |
||
353 | /// |
||
354 | /// \returns The insertion position *after* the parallel. |
||
355 | IRBuilder<>::InsertPoint |
||
356 | createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, |
||
357 | BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, |
||
358 | FinalizeCallbackTy FiniCB, Value *IfCondition, |
||
359 | Value *NumThreads, omp::ProcBindKind ProcBind, |
||
360 | bool IsCancellable); |
||
361 | |||
362 | /// Generator for the control flow structure of an OpenMP canonical loop. |
||
363 | /// |
||
364 | /// This generator operates on the logical iteration space of the loop, i.e. |
||
365 | /// the caller only has to provide a loop trip count of the loop as defined by |
||
366 | /// base language semantics. The trip count is interpreted as an unsigned |
||
367 | /// integer. The induction variable passed to \p BodyGenCB will be of the same |
||
368 | /// type and run from 0 to \p TripCount - 1. It is up to the callback to |
||
369 | /// convert the logical iteration variable to the loop counter variable in the |
||
370 | /// loop body. |
||
371 | /// |
||
372 | /// \param Loc The insert and source location description. The insert |
||
373 | /// location can be between two instructions or the end of a |
||
374 | /// degenerate block (e.g. a BB under construction). |
||
375 | /// \param BodyGenCB Callback that will generate the loop body code. |
||
376 | /// \param TripCount Number of iterations the loop body is executed. |
||
377 | /// \param Name Base name used to derive BB and instruction names. |
||
378 | /// |
||
379 | /// \returns An object representing the created control flow structure which |
||
380 | /// can be used for loop-associated directives. |
||
381 | CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, |
||
382 | LoopBodyGenCallbackTy BodyGenCB, |
||
383 | Value *TripCount, |
||
384 | const Twine &Name = "loop"); |
||
385 | |||
386 | /// Generator for the control flow structure of an OpenMP canonical loop. |
||
387 | /// |
||
388 | /// Instead of a logical iteration space, this allows specifying user-defined |
||
389 | /// loop counter values using increment, upper- and lower bounds. To |
||
390 | /// disambiguate the terminology when counting downwards, instead of lower |
||
391 | /// bounds we use \p Start for the loop counter value in the first body |
||
392 | /// iteration. |
||
393 | /// |
||
394 | /// Consider the following limitations: |
||
395 | /// |
||
396 | /// * A loop counter space over all integer values of its bit-width cannot be |
||
397 | /// represented. E.g using uint8_t, its loop trip count of 256 cannot be |
||
398 | /// stored into an 8 bit integer): |
||
399 | /// |
||
400 | /// DO I = 0, 255, 1 |
||
401 | /// |
||
402 | /// * Unsigned wrapping is only supported when wrapping only "once"; E.g. |
||
403 | /// effectively counting downwards: |
||
404 | /// |
||
405 | /// for (uint8_t i = 100u; i > 0; i += 127u) |
||
406 | /// |
||
407 | /// |
||
408 | /// TODO: May need to add additional parameters to represent: |
||
409 | /// |
||
410 | /// * Allow representing downcounting with unsigned integers. |
||
411 | /// |
||
412 | /// * Sign of the step and the comparison operator might disagree: |
||
413 | /// |
||
414 | /// for (int i = 0; i < 42; i -= 1u) |
||
415 | /// |
||
416 | // |
||
417 | /// \param Loc The insert and source location description. |
||
418 | /// \param BodyGenCB Callback that will generate the loop body code. |
||
419 | /// \param Start Value of the loop counter for the first iterations. |
||
420 | /// \param Stop Loop counter values past this will stop the loop. |
||
421 | /// \param Step Loop counter increment after each iteration; negative |
||
422 | /// means counting down. |
||
423 | /// \param IsSigned Whether Start, Stop and Step are signed integers. |
||
424 | /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop |
||
425 | /// counter. |
||
426 | /// \param ComputeIP Insertion point for instructions computing the trip |
||
427 | /// count. Can be used to ensure the trip count is available |
||
428 | /// at the outermost loop of a loop nest. If not set, |
||
429 | /// defaults to the preheader of the generated loop. |
||
430 | /// \param Name Base name used to derive BB and instruction names. |
||
431 | /// |
||
432 | /// \returns An object representing the created control flow structure which |
||
433 | /// can be used for loop-associated directives. |
||
434 | CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, |
||
435 | LoopBodyGenCallbackTy BodyGenCB, |
||
436 | Value *Start, Value *Stop, Value *Step, |
||
437 | bool IsSigned, bool InclusiveStop, |
||
438 | InsertPointTy ComputeIP = {}, |
||
439 | const Twine &Name = "loop"); |
||
440 | |||
441 | /// Collapse a loop nest into a single loop. |
||
442 | /// |
||
443 | /// Merges loops of a loop nest into a single CanonicalLoopNest representation |
||
444 | /// that has the same number of innermost loop iterations as the origin loop |
||
445 | /// nest. The induction variables of the input loops are derived from the |
||
446 | /// collapsed loop's induction variable. This is intended to be used to |
||
447 | /// implement OpenMP's collapse clause. Before applying a directive, |
||
448 | /// collapseLoops normalizes a loop nest to contain only a single loop and the |
||
449 | /// directive's implementation does not need to handle multiple loops itself. |
||
450 | /// This does not remove the need to handle all loop nest handling by |
||
451 | /// directives, such as the ordered(<n>) clause or the simd schedule-clause |
||
452 | /// modifier of the worksharing-loop directive. |
||
453 | /// |
||
454 | /// Example: |
||
455 | /// \code |
||
456 | /// for (int i = 0; i < 7; ++i) // Canonical loop "i" |
||
457 | /// for (int j = 0; j < 9; ++j) // Canonical loop "j" |
||
458 | /// body(i, j); |
||
459 | /// \endcode |
||
460 | /// |
||
461 | /// After collapsing with Loops={i,j}, the loop is changed to |
||
462 | /// \code |
||
463 | /// for (int ij = 0; ij < 63; ++ij) { |
||
464 | /// int i = ij / 9; |
||
465 | /// int j = ij % 9; |
||
466 | /// body(i, j); |
||
467 | /// } |
||
468 | /// \endcode |
||
469 | /// |
||
470 | /// In the current implementation, the following limitations apply: |
||
471 | /// |
||
472 | /// * All input loops have an induction variable of the same type. |
||
473 | /// |
||
474 | /// * The collapsed loop will have the same trip count integer type as the |
||
475 | /// input loops. Therefore it is possible that the collapsed loop cannot |
||
476 | /// represent all iterations of the input loops. For instance, assuming a |
||
477 | /// 32 bit integer type, and two input loops both iterating 2^16 times, the |
||
478 | /// theoretical trip count of the collapsed loop would be 2^32 iteration, |
||
479 | /// which cannot be represented in an 32-bit integer. Behavior is undefined |
||
480 | /// in this case. |
||
481 | /// |
||
482 | /// * The trip counts of every input loop must be available at \p ComputeIP. |
||
483 | /// Non-rectangular loops are not yet supported. |
||
484 | /// |
||
485 | /// * At each nest level, code between a surrounding loop and its nested loop |
||
486 | /// is hoisted into the loop body, and such code will be executed more |
||
487 | /// often than before collapsing (or not at all if any inner loop iteration |
||
488 | /// has a trip count of 0). This is permitted by the OpenMP specification. |
||
489 | /// |
||
490 | /// \param DL Debug location for instructions added for collapsing, |
||
491 | /// such as instructions to compute/derive the input loop's |
||
492 | /// induction variables. |
||
493 | /// \param Loops Loops in the loop nest to collapse. Loops are specified |
||
494 | /// from outermost-to-innermost and every control flow of a |
||
495 | /// loop's body must pass through its directly nested loop. |
||
496 | /// \param ComputeIP Where additional instruction that compute the collapsed |
||
497 | /// trip count. If not set, defaults to before the generated |
||
498 | /// loop. |
||
499 | /// |
||
500 | /// \returns The CanonicalLoopInfo object representing the collapsed loop. |
||
501 | CanonicalLoopInfo *collapseLoops(DebugLoc DL, |
||
502 | ArrayRef<CanonicalLoopInfo *> Loops, |
||
503 | InsertPointTy ComputeIP); |
||
504 | |||
505 | private: |
||
506 | /// Modifies the canonical loop to be a statically-scheduled workshare loop. |
||
507 | /// |
||
508 | /// This takes a \p LoopInfo representing a canonical loop, such as the one |
||
509 | /// created by \p createCanonicalLoop and emits additional instructions to |
||
510 | /// turn it into a workshare loop. In particular, it calls to an OpenMP |
||
511 | /// runtime function in the preheader to obtain the loop bounds to be used in |
||
512 | /// the current thread, updates the relevant instructions in the canonical |
||
513 | /// loop and calls to an OpenMP runtime finalization function after the loop. |
||
514 | /// |
||
515 | /// \param DL Debug location for instructions added for the |
||
516 | /// workshare-loop construct itself. |
||
517 | /// \param CLI A descriptor of the canonical loop to workshare. |
||
518 | /// \param AllocaIP An insertion point for Alloca instructions usable in the |
||
519 | /// preheader of the loop. |
||
520 | /// \param NeedsBarrier Indicates whether a barrier must be inserted after |
||
521 | /// the loop. |
||
522 | /// |
||
523 | /// \returns Point where to insert code after the workshare construct. |
||
524 | InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, |
||
525 | InsertPointTy AllocaIP, |
||
526 | bool NeedsBarrier); |
||
527 | |||
528 | /// Modifies the canonical loop a statically-scheduled workshare loop with a |
||
529 | /// user-specified chunk size. |
||
530 | /// |
||
531 | /// \param DL Debug location for instructions added for the |
||
532 | /// workshare-loop construct itself. |
||
533 | /// \param CLI A descriptor of the canonical loop to workshare. |
||
534 | /// \param AllocaIP An insertion point for Alloca instructions usable in |
||
535 | /// the preheader of the loop. |
||
536 | /// \param NeedsBarrier Indicates whether a barrier must be inserted after the |
||
537 | /// loop. |
||
538 | /// \param ChunkSize The user-specified chunk size. |
||
539 | /// |
||
540 | /// \returns Point where to insert code after the workshare construct. |
||
541 | InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL, |
||
542 | CanonicalLoopInfo *CLI, |
||
543 | InsertPointTy AllocaIP, |
||
544 | bool NeedsBarrier, |
||
545 | Value *ChunkSize); |
||
546 | |||
547 | /// Modifies the canonical loop to be a dynamically-scheduled workshare loop. |
||
548 | /// |
||
549 | /// This takes a \p LoopInfo representing a canonical loop, such as the one |
||
550 | /// created by \p createCanonicalLoop and emits additional instructions to |
||
551 | /// turn it into a workshare loop. In particular, it calls to an OpenMP |
||
552 | /// runtime function in the preheader to obtain, and then in each iteration |
||
553 | /// to update the loop counter. |
||
554 | /// |
||
555 | /// \param DL Debug location for instructions added for the |
||
556 | /// workshare-loop construct itself. |
||
557 | /// \param CLI A descriptor of the canonical loop to workshare. |
||
558 | /// \param AllocaIP An insertion point for Alloca instructions usable in the |
||
559 | /// preheader of the loop. |
||
560 | /// \param SchedType Type of scheduling to be passed to the init function. |
||
561 | /// \param NeedsBarrier Indicates whether a barrier must be insterted after |
||
562 | /// the loop. |
||
563 | /// \param Chunk The size of loop chunk considered as a unit when |
||
564 | /// scheduling. If \p nullptr, defaults to 1. |
||
565 | /// |
||
566 | /// \returns Point where to insert code after the workshare construct. |
||
567 | InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, |
||
568 | InsertPointTy AllocaIP, |
||
569 | omp::OMPScheduleType SchedType, |
||
570 | bool NeedsBarrier, |
||
571 | Value *Chunk = nullptr); |
||
572 | |||
573 | /// Create alternative version of the loop to support if clause |
||
574 | /// |
||
575 | /// OpenMP if clause can require to generate second loop. This loop |
||
576 | /// will be executed when if clause condition is not met. createIfVersion |
||
577 | /// adds branch instruction to the copied loop if \p ifCond is not met. |
||
578 | /// |
||
579 | /// \param Loop Original loop which should be versioned. |
||
580 | /// \param IfCond Value which corresponds to if clause condition |
||
581 | /// \param VMap Value to value map to define relation between |
||
582 | /// original and copied loop values and loop blocks. |
||
583 | /// \param NamePrefix Optional name prefix for if.then if.else blocks. |
||
584 | void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond, |
||
585 | ValueToValueMapTy &VMap, const Twine &NamePrefix = ""); |
||
586 | |||
587 | public: |
||
588 | /// Modifies the canonical loop to be a workshare loop. |
||
589 | /// |
||
590 | /// This takes a \p LoopInfo representing a canonical loop, such as the one |
||
591 | /// created by \p createCanonicalLoop and emits additional instructions to |
||
592 | /// turn it into a workshare loop. In particular, it calls to an OpenMP |
||
593 | /// runtime function in the preheader to obtain the loop bounds to be used in |
||
594 | /// the current thread, updates the relevant instructions in the canonical |
||
595 | /// loop and calls to an OpenMP runtime finalization function after the loop. |
||
596 | /// |
||
597 | /// The concrete transformation is done by applyStaticWorkshareLoop, |
||
598 | /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending |
||
599 | /// on the value of \p SchedKind and \p ChunkSize. |
||
600 | /// |
||
601 | /// \param DL Debug location for instructions added for the |
||
602 | /// workshare-loop construct itself. |
||
603 | /// \param CLI A descriptor of the canonical loop to workshare. |
||
604 | /// \param AllocaIP An insertion point for Alloca instructions usable in the |
||
605 | /// preheader of the loop. |
||
606 | /// \param NeedsBarrier Indicates whether a barrier must be insterted after |
||
607 | /// the loop. |
||
608 | /// \param SchedKind Scheduling algorithm to use. |
||
609 | /// \param ChunkSize The chunk size for the inner loop. |
||
610 | /// \param HasSimdModifier Whether the simd modifier is present in the |
||
611 | /// schedule clause. |
||
612 | /// \param HasMonotonicModifier Whether the monotonic modifier is present in |
||
613 | /// the schedule clause. |
||
614 | /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is |
||
615 | /// present in the schedule clause. |
||
616 | /// \param HasOrderedClause Whether the (parameterless) ordered clause is |
||
617 | /// present. |
||
618 | /// |
||
619 | /// \returns Point where to insert code after the workshare construct. |
||
620 | InsertPointTy applyWorkshareLoop( |
||
621 | DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, |
||
622 | bool NeedsBarrier, |
||
623 | llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default, |
||
624 | Value *ChunkSize = nullptr, bool HasSimdModifier = false, |
||
625 | bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false, |
||
626 | bool HasOrderedClause = false); |
||
627 | |||
628 | /// Tile a loop nest. |
||
629 | /// |
||
630 | /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in |
||
631 | /// \p/ Loops must be perfectly nested, from outermost to innermost loop |
||
632 | /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value |
||
633 | /// of every loop and every tile sizes must be usable in the outermost |
||
634 | /// loop's preheader. This implies that the loop nest is rectangular. |
||
635 | /// |
||
636 | /// Example: |
||
637 | /// \code |
||
638 | /// for (int i = 0; i < 15; ++i) // Canonical loop "i" |
||
639 | /// for (int j = 0; j < 14; ++j) // Canonical loop "j" |
||
640 | /// body(i, j); |
||
641 | /// \endcode |
||
642 | /// |
||
643 | /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to |
||
644 | /// \code |
||
645 | /// for (int i1 = 0; i1 < 3; ++i1) |
||
646 | /// for (int j1 = 0; j1 < 2; ++j1) |
||
647 | /// for (int i2 = 0; i2 < 5; ++i2) |
||
648 | /// for (int j2 = 0; j2 < 7; ++j2) |
||
649 | /// body(i1*3+i2, j1*3+j2); |
||
650 | /// \endcode |
||
651 | /// |
||
652 | /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are |
||
653 | /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also |
||
654 | /// handles non-constant trip counts, non-constant tile sizes and trip counts |
||
655 | /// that are not multiples of the tile size. In the latter case the tile loop |
||
656 | /// of the last floor-loop iteration will have fewer iterations than specified |
||
657 | /// as its tile size. |
||
658 | /// |
||
659 | /// |
||
660 | /// @param DL Debug location for instructions added by tiling, for |
||
661 | /// instance the floor- and tile trip count computation. |
||
662 | /// @param Loops Loops to tile. The CanonicalLoopInfo objects are |
||
663 | /// invalidated by this method, i.e. should not used after |
||
664 | /// tiling. |
||
665 | /// @param TileSizes For each loop in \p Loops, the tile size for that |
||
666 | /// dimensions. |
||
667 | /// |
||
668 | /// \returns A list of generated loops. Contains twice as many loops as the |
||
669 | /// input loop nest; the first half are the floor loops and the |
||
670 | /// second half are the tile loops. |
||
671 | std::vector<CanonicalLoopInfo *> |
||
672 | tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, |
||
673 | ArrayRef<Value *> TileSizes); |
||
674 | |||
675 | /// Fully unroll a loop. |
||
676 | /// |
||
677 | /// Instead of unrolling the loop immediately (and duplicating its body |
||
678 | /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop |
||
679 | /// metadata. |
||
680 | /// |
||
681 | /// \param DL Debug location for instructions added by unrolling. |
||
682 | /// \param Loop The loop to unroll. The loop will be invalidated. |
||
683 | void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop); |
||
684 | |||
685 | /// Fully or partially unroll a loop. How the loop is unrolled is determined |
||
686 | /// using LLVM's LoopUnrollPass. |
||
687 | /// |
||
688 | /// \param DL Debug location for instructions added by unrolling. |
||
689 | /// \param Loop The loop to unroll. The loop will be invalidated. |
||
690 | void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop); |
||
691 | |||
692 | /// Partially unroll a loop. |
||
693 | /// |
||
694 | /// The CanonicalLoopInfo of the unrolled loop for use with chained |
||
695 | /// loop-associated directive can be requested using \p UnrolledCLI. Not |
||
696 | /// needing the CanonicalLoopInfo allows more efficient code generation by |
||
697 | /// deferring the actual unrolling to the LoopUnrollPass using loop metadata. |
||
698 | /// A loop-associated directive applied to the unrolled loop needs to know the |
||
699 | /// new trip count which means that if using a heuristically determined unroll |
||
700 | /// factor (\p Factor == 0), that factor must be computed immediately. We are |
||
701 | /// using the same logic as the LoopUnrollPass to derived the unroll factor, |
||
702 | /// but which assumes that some canonicalization has taken place (e.g. |
||
703 | /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform |
||
704 | /// better when the unrolled loop's CanonicalLoopInfo is not needed. |
||
705 | /// |
||
706 | /// \param DL Debug location for instructions added by unrolling. |
||
707 | /// \param Loop The loop to unroll. The loop will be invalidated. |
||
708 | /// \param Factor The factor to unroll the loop by. A factor of 0 |
||
709 | /// indicates that a heuristic should be used to determine |
||
710 | /// the unroll-factor. |
||
711 | /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the |
||
712 | /// partially unrolled loop. Otherwise, uses loop metadata |
||
713 | /// to defer unrolling to the LoopUnrollPass. |
||
714 | void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, |
||
715 | CanonicalLoopInfo **UnrolledCLI); |
||
716 | |||
717 | /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop |
||
718 | /// is cloned. The metadata which prevents vectorization is added to |
||
719 | /// to the cloned loop. The cloned loop is executed when ifCond is evaluated |
||
720 | /// to false. |
||
721 | /// |
||
722 | /// \param Loop The loop to simd-ize. |
||
723 | /// \param AlignedVars The map which containts pairs of the pointer |
||
724 | /// and its corresponding alignment. |
||
725 | /// \param IfCond The value which corresponds to the if clause |
||
726 | /// condition. |
||
727 | /// \param Order The enum to map order clause. |
||
728 | /// \param Simdlen The Simdlen length to apply to the simd loop. |
||
729 | /// \param Safelen The Safelen length to apply to the simd loop. |
||
730 | void applySimd(CanonicalLoopInfo *Loop, |
||
731 | MapVector<Value *, Value *> AlignedVars, Value *IfCond, |
||
732 | omp::OrderKind Order, ConstantInt *Simdlen, |
||
733 | ConstantInt *Safelen); |
||
734 | |||
735 | /// Generator for '#omp flush' |
||
736 | /// |
||
737 | /// \param Loc The location where the flush directive was encountered |
||
738 | void createFlush(const LocationDescription &Loc); |
||
739 | |||
740 | /// Generator for '#omp taskwait' |
||
741 | /// |
||
742 | /// \param Loc The location where the taskwait directive was encountered. |
||
743 | void createTaskwait(const LocationDescription &Loc); |
||
744 | |||
745 | /// Generator for '#omp taskyield' |
||
746 | /// |
||
747 | /// \param Loc The location where the taskyield directive was encountered. |
||
748 | void createTaskyield(const LocationDescription &Loc); |
||
749 | |||
750 | /// A struct to pack the relevant information for an OpenMP depend clause. |
||
751 | struct DependData { |
||
752 | omp::RTLDependenceKindTy DepKind = omp::RTLDependenceKindTy::DepUnknown; |
||
753 | Type *DepValueType; |
||
754 | Value *DepVal; |
||
755 | explicit DependData() = default; |
||
756 | DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType, |
||
757 | Value *DepVal) |
||
758 | : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {} |
||
759 | }; |
||
760 | |||
761 | /// Generator for `#omp task` |
||
762 | /// |
||
763 | /// \param Loc The location where the task construct was encountered. |
||
764 | /// \param AllocaIP The insertion point to be used for alloca instructions. |
||
765 | /// \param BodyGenCB Callback that will generate the region code. |
||
766 | /// \param Tied True if the task is tied, false if the task is untied. |
||
767 | /// \param Final i1 value which is `true` if the task is final, `false` if the |
||
768 | /// task is not final. |
||
769 | /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred |
||
770 | /// task is generated, and the encountering thread must |
||
771 | /// suspend the current task region, for which execution |
||
772 | /// cannot be resumed until execution of the structured |
||
773 | /// block that is associated with the generated task is |
||
774 | /// completed. |
||
775 | InsertPointTy createTask(const LocationDescription &Loc, |
||
776 | InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, |
||
777 | bool Tied = true, Value *Final = nullptr, |
||
778 | Value *IfCondition = nullptr, |
||
779 | SmallVector<DependData> Dependencies = {}); |
||
780 | |||
781 | /// Generator for the taskgroup construct |
||
782 | /// |
||
783 | /// \param Loc The location where the taskgroup construct was encountered. |
||
784 | /// \param AllocaIP The insertion point to be used for alloca instructions. |
||
785 | /// \param BodyGenCB Callback that will generate the region code. |
||
786 | InsertPointTy createTaskgroup(const LocationDescription &Loc, |
||
787 | InsertPointTy AllocaIP, |
||
788 | BodyGenCallbackTy BodyGenCB); |
||
789 | |||
790 | /// Functions used to generate reductions. Such functions take two Values |
||
791 | /// representing LHS and RHS of the reduction, respectively, and a reference |
||
792 | /// to the value that is updated to refer to the reduction result. |
||
793 | using ReductionGenTy = |
||
794 | function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>; |
||
795 | |||
796 | /// Functions used to generate atomic reductions. Such functions take two |
||
797 | /// Values representing pointers to LHS and RHS of the reduction, as well as |
||
798 | /// the element type of these pointers. They are expected to atomically |
||
799 | /// update the LHS to the reduced value. |
||
800 | using AtomicReductionGenTy = |
||
801 | function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>; |
||
802 | |||
803 | /// Information about an OpenMP reduction. |
||
804 | struct ReductionInfo { |
||
805 | ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable, |
||
806 | ReductionGenTy ReductionGen, |
||
807 | AtomicReductionGenTy AtomicReductionGen) |
||
808 | : ElementType(ElementType), Variable(Variable), |
||
809 | PrivateVariable(PrivateVariable), ReductionGen(ReductionGen), |
||
810 | AtomicReductionGen(AtomicReductionGen) { |
||
811 | assert(cast<PointerType>(Variable->getType()) |
||
812 | ->isOpaqueOrPointeeTypeMatches(ElementType) && "Invalid elem type"); |
||
813 | } |
||
814 | |||
815 | /// Reduction element type, must match pointee type of variable. |
||
816 | Type *ElementType; |
||
817 | |||
818 | /// Reduction variable of pointer type. |
||
819 | Value *Variable; |
||
820 | |||
821 | /// Thread-private partial reduction variable. |
||
822 | Value *PrivateVariable; |
||
823 | |||
824 | /// Callback for generating the reduction body. The IR produced by this will |
||
825 | /// be used to combine two values in a thread-safe context, e.g., under |
||
826 | /// lock or within the same thread, and therefore need not be atomic. |
||
827 | ReductionGenTy ReductionGen; |
||
828 | |||
829 | /// Callback for generating the atomic reduction body, may be null. The IR |
||
830 | /// produced by this will be used to atomically combine two values during |
||
831 | /// reduction. If null, the implementation will use the non-atomic version |
||
832 | /// along with the appropriate synchronization mechanisms. |
||
833 | AtomicReductionGenTy AtomicReductionGen; |
||
834 | }; |
||
835 | |||
836 | // TODO: provide atomic and non-atomic reduction generators for reduction |
||
837 | // operators defined by the OpenMP specification. |
||
838 | |||
839 | /// Generator for '#omp reduction'. |
||
840 | /// |
||
841 | /// Emits the IR instructing the runtime to perform the specific kind of |
||
842 | /// reductions. Expects reduction variables to have been privatized and |
||
843 | /// initialized to reduction-neutral values separately. Emits the calls to |
||
844 | /// runtime functions as well as the reduction function and the basic blocks |
||
845 | /// performing the reduction atomically and non-atomically. |
||
846 | /// |
||
847 | /// The code emitted for the following: |
||
848 | /// |
||
849 | /// \code |
||
850 | /// type var_1; |
||
851 | /// type var_2; |
||
852 | /// #pragma omp <directive> reduction(reduction-op:var_1,var_2) |
||
853 | /// /* body */; |
||
854 | /// \endcode |
||
855 | /// |
||
856 | /// corresponds to the following sketch. |
||
857 | /// |
||
858 | /// \code |
||
859 | /// void _outlined_par() { |
||
860 | /// // N is the number of different reductions. |
||
861 | /// void *red_array[] = {privatized_var_1, privatized_var_2, ...}; |
||
862 | /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array, |
||
863 | /// _omp_reduction_func, |
||
864 | /// _gomp_critical_user.reduction.var)) { |
||
865 | /// case 1: { |
||
866 | /// var_1 = var_1 <reduction-op> privatized_var_1; |
||
867 | /// var_2 = var_2 <reduction-op> privatized_var_2; |
||
868 | /// // ... |
||
869 | /// __kmpc_end_reduce(...); |
||
870 | /// break; |
||
871 | /// } |
||
872 | /// case 2: { |
||
873 | /// _Atomic<ReductionOp>(var_1, privatized_var_1); |
||
874 | /// _Atomic<ReductionOp>(var_2, privatized_var_2); |
||
875 | /// // ... |
||
876 | /// break; |
||
877 | /// } |
||
878 | /// default: break; |
||
879 | /// } |
||
880 | /// } |
||
881 | /// |
||
882 | /// void _omp_reduction_func(void **lhs, void **rhs) { |
||
883 | /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0]; |
||
884 | /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1]; |
||
885 | /// // ... |
||
886 | /// } |
||
887 | /// \endcode |
||
888 | /// |
||
889 | /// \param Loc The location where the reduction was |
||
890 | /// encountered. Must be within the associate |
||
891 | /// directive and after the last local access to the |
||
892 | /// reduction variables. |
||
893 | /// \param AllocaIP An insertion point suitable for allocas usable |
||
894 | /// in reductions. |
||
895 | /// \param ReductionInfos A list of info on each reduction variable. |
||
896 | /// \param IsNoWait A flag set if the reduction is marked as nowait. |
||
897 | InsertPointTy createReductions(const LocationDescription &Loc, |
||
898 | InsertPointTy AllocaIP, |
||
899 | ArrayRef<ReductionInfo> ReductionInfos, |
||
900 | bool IsNoWait = false); |
||
901 | |||
902 | ///} |
||
903 | |||
904 | /// Return the insertion point used by the underlying IRBuilder. |
||
905 | InsertPointTy getInsertionPoint() { return Builder.saveIP(); } |
||
906 | |||
907 | /// Update the internal location to \p Loc. |
||
908 | bool updateToLocation(const LocationDescription &Loc) { |
||
909 | Builder.restoreIP(Loc.IP); |
||
910 | Builder.SetCurrentDebugLocation(Loc.DL); |
||
911 | return Loc.IP.getBlock() != nullptr; |
||
912 | } |
||
913 | |||
914 | /// Return the function declaration for the runtime function with \p FnID. |
||
915 | FunctionCallee getOrCreateRuntimeFunction(Module &M, |
||
916 | omp::RuntimeFunction FnID); |
||
917 | |||
918 | Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID); |
||
919 | |||
920 | /// Return the (LLVM-IR) string describing the source location \p LocStr. |
||
921 | Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize); |
||
922 | |||
923 | /// Return the (LLVM-IR) string describing the default source location. |
||
924 | Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize); |
||
925 | |||
926 | /// Return the (LLVM-IR) string describing the source location identified by |
||
927 | /// the arguments. |
||
928 | Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName, |
||
929 | unsigned Line, unsigned Column, |
||
930 | uint32_t &SrcLocStrSize); |
||
931 | |||
932 | /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as |
||
933 | /// fallback if \p DL does not specify the function name. |
||
934 | Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize, |
||
935 | Function *F = nullptr); |
||
936 | |||
937 | /// Return the (LLVM-IR) string describing the source location \p Loc. |
||
938 | Constant *getOrCreateSrcLocStr(const LocationDescription &Loc, |
||
939 | uint32_t &SrcLocStrSize); |
||
940 | |||
941 | /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags. |
||
942 | /// TODO: Create a enum class for the Reserve2Flags |
||
943 | Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, |
||
944 | omp::IdentFlag Flags = omp::IdentFlag(0), |
||
945 | unsigned Reserve2Flags = 0); |
||
946 | |||
947 | /// Create a hidden global flag \p Name in the module with initial value \p |
||
948 | /// Value. |
||
949 | GlobalValue *createGlobalFlag(unsigned Value, StringRef Name); |
||
950 | |||
951 | /// Create an offloading section struct used to register this global at |
||
952 | /// runtime. |
||
953 | /// |
||
954 | /// Type struct __tgt_offload_entry{ |
||
955 | /// void *addr; // Pointer to the offload entry info. |
||
956 | /// // (function or global) |
||
957 | /// char *name; // Name of the function or global. |
||
958 | /// size_t size; // Size of the entry info (0 if it a function). |
||
959 | /// int32_t flags; |
||
960 | /// int32_t reserved; |
||
961 | /// }; |
||
962 | /// |
||
963 | /// \param Addr The pointer to the global being registered. |
||
964 | /// \param Name The symbol name associated with the global. |
||
965 | /// \param Size The size in bytes of the global (0 for functions). |
||
966 | /// \param Flags Flags associated with the entry. |
||
967 | /// \param SectionName The section this entry will be placed at. |
||
968 | void emitOffloadingEntry(Constant *Addr, StringRef Name, uint64_t Size, |
||
969 | int32_t Flags, |
||
970 | StringRef SectionName = "omp_offloading_entries"); |
||
971 | |||
972 | /// Generate control flow and cleanup for cancellation. |
||
973 | /// |
||
974 | /// \param CancelFlag Flag indicating if the cancellation is performed. |
||
975 | /// \param CanceledDirective The kind of directive that is cancled. |
||
976 | /// \param ExitCB Extra code to be generated in the exit block. |
||
977 | void emitCancelationCheckImpl(Value *CancelFlag, |
||
978 | omp::Directive CanceledDirective, |
||
979 | FinalizeCallbackTy ExitCB = {}); |
||
980 | |||
981 | /// Generate a target region entry call. |
||
982 | /// |
||
983 | /// \param Loc The location at which the request originated and is fulfilled. |
||
984 | /// \param Return Return value of the created function returned by reference. |
||
985 | /// \param DeviceID Identifier for the device via the 'device' clause. |
||
986 | /// \param NumTeams Numer of teams for the region via the 'num_teams' clause |
||
987 | /// or 0 if unspecified and -1 if there is no 'teams' clause. |
||
988 | /// \param NumThreads Number of threads via the 'thread_limit' clause. |
||
989 | /// \param HostPtr Pointer to the host-side pointer of the target kernel. |
||
990 | /// \param KernelArgs Array of arguments to the kernel. |
||
991 | InsertPointTy emitTargetKernel(const LocationDescription &Loc, Value *&Return, |
||
992 | Value *Ident, Value *DeviceID, Value *NumTeams, |
||
993 | Value *NumThreads, Value *HostPtr, |
||
994 | ArrayRef<Value *> KernelArgs); |
||
995 | |||
996 | /// Generate a barrier runtime call. |
||
997 | /// |
||
998 | /// \param Loc The location at which the request originated and is fulfilled. |
||
999 | /// \param DK The directive which caused the barrier |
||
1000 | /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier. |
||
1001 | /// \param CheckCancelFlag Flag to indicate a cancel barrier return value |
||
1002 | /// should be checked and acted upon. |
||
1003 | /// |
||
1004 | /// \returns The insertion point after the barrier. |
||
1005 | InsertPointTy emitBarrierImpl(const LocationDescription &Loc, |
||
1006 | omp::Directive DK, bool ForceSimpleCall, |
||
1007 | bool CheckCancelFlag); |
||
1008 | |||
1009 | /// Generate a flush runtime call. |
||
1010 | /// |
||
1011 | /// \param Loc The location at which the request originated and is fulfilled. |
||
1012 | void emitFlush(const LocationDescription &Loc); |
||
1013 | |||
1014 | /// The finalization stack made up of finalize callbacks currently in-flight, |
||
1015 | /// wrapped into FinalizationInfo objects that reference also the finalization |
||
1016 | /// target block and the kind of cancellable directive. |
||
1017 | SmallVector<FinalizationInfo, 8> FinalizationStack; |
||
1018 | |||
1019 | /// Return true if the last entry in the finalization stack is of kind \p DK |
||
1020 | /// and cancellable. |
||
1021 | bool isLastFinalizationInfoCancellable(omp::Directive DK) { |
||
1022 | return !FinalizationStack.empty() && |
||
1023 | FinalizationStack.back().IsCancellable && |
||
1024 | FinalizationStack.back().DK == DK; |
||
1025 | } |
||
1026 | |||
1027 | /// Generate a taskwait runtime call. |
||
1028 | /// |
||
1029 | /// \param Loc The location at which the request originated and is fulfilled. |
||
1030 | void emitTaskwaitImpl(const LocationDescription &Loc); |
||
1031 | |||
1032 | /// Generate a taskyield runtime call. |
||
1033 | /// |
||
1034 | /// \param Loc The location at which the request originated and is fulfilled. |
||
1035 | void emitTaskyieldImpl(const LocationDescription &Loc); |
||
1036 | |||
1037 | /// Return the current thread ID. |
||
1038 | /// |
||
1039 | /// \param Ident The ident (ident_t*) describing the query origin. |
||
1040 | Value *getOrCreateThreadID(Value *Ident); |
||
1041 | |||
1042 | /// The OpenMPIRBuilder Configuration |
||
1043 | OpenMPIRBuilderConfig Config; |
||
1044 | |||
1045 | /// The underlying LLVM-IR module |
||
1046 | Module &M; |
||
1047 | |||
1048 | /// The LLVM-IR Builder used to create IR. |
||
1049 | IRBuilder<> Builder; |
||
1050 | |||
1051 | /// Map to remember source location strings |
||
1052 | StringMap<Constant *> SrcLocStrMap; |
||
1053 | |||
1054 | /// Map to remember existing ident_t*. |
||
1055 | DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap; |
||
1056 | |||
1057 | /// Helper that contains information about regions we need to outline |
||
1058 | /// during finalization. |
||
1059 | struct OutlineInfo { |
||
1060 | using PostOutlineCBTy = std::function<void(Function &)>; |
||
1061 | PostOutlineCBTy PostOutlineCB; |
||
1062 | BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB; |
||
1063 | SmallVector<Value *, 2> ExcludeArgsFromAggregate; |
||
1064 | |||
1065 | /// Collect all blocks in between EntryBB and ExitBB in both the given |
||
1066 | /// vector and set. |
||
1067 | void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet, |
||
1068 | SmallVectorImpl<BasicBlock *> &BlockVector); |
||
1069 | |||
1070 | /// Return the function that contains the region to be outlined. |
||
1071 | Function *getFunction() const { return EntryBB->getParent(); } |
||
1072 | }; |
||
1073 | |||
1074 | /// Collection of regions that need to be outlined during finalization. |
||
1075 | SmallVector<OutlineInfo, 16> OutlineInfos; |
||
1076 | |||
1077 | /// Collection of owned canonical loop objects that eventually need to be |
||
1078 | /// free'd. |
||
1079 | std::forward_list<CanonicalLoopInfo> LoopInfos; |
||
1080 | |||
1081 | /// Add a new region that will be outlined later. |
||
1082 | void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); } |
||
1083 | |||
1084 | /// An ordered map of auto-generated variables to their unique names. |
||
1085 | /// It stores variables with the following names: 1) ".gomp_critical_user_" + |
||
1086 | /// <critical_section_name> + ".var" for "omp critical" directives; 2) |
||
1087 | /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate |
||
1088 | /// variables. |
||
1089 | StringMap<Constant*, BumpPtrAllocator> InternalVars; |
||
1090 | |||
1091 | /// Create the global variable holding the offload mappings information. |
||
1092 | GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings, |
||
1093 | std::string VarName); |
||
1094 | |||
1095 | /// Create the global variable holding the offload names information. |
||
1096 | GlobalVariable * |
||
1097 | createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names, |
||
1098 | std::string VarName); |
||
1099 | |||
1100 | struct MapperAllocas { |
||
1101 | AllocaInst *ArgsBase = nullptr; |
||
1102 | AllocaInst *Args = nullptr; |
||
1103 | AllocaInst *ArgSizes = nullptr; |
||
1104 | }; |
||
1105 | |||
1106 | /// Create the allocas instruction used in call to mapper functions. |
||
1107 | void createMapperAllocas(const LocationDescription &Loc, |
||
1108 | InsertPointTy AllocaIP, unsigned NumOperands, |
||
1109 | struct MapperAllocas &MapperAllocas); |
||
1110 | |||
1111 | /// Create the call for the target mapper function. |
||
1112 | /// \param Loc The source location description. |
||
1113 | /// \param MapperFunc Function to be called. |
||
1114 | /// \param SrcLocInfo Source location information global. |
||
1115 | /// \param MaptypesArg The argument types. |
||
1116 | /// \param MapnamesArg The argument names. |
||
1117 | /// \param MapperAllocas The AllocaInst used for the call. |
||
1118 | /// \param DeviceID Device ID for the call. |
||
1119 | /// \param NumOperands Number of operands in the call. |
||
1120 | void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, |
||
1121 | Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, |
||
1122 | struct MapperAllocas &MapperAllocas, int64_t DeviceID, |
||
1123 | unsigned NumOperands); |
||
1124 | |||
1125 | /// Container for the arguments used to pass data to the runtime library. |
||
1126 | struct TargetDataRTArgs { |
||
1127 | explicit TargetDataRTArgs() {} |
||
1128 | /// The array of base pointer passed to the runtime library. |
||
1129 | Value *BasePointersArray = nullptr; |
||
1130 | /// The array of section pointers passed to the runtime library. |
||
1131 | Value *PointersArray = nullptr; |
||
1132 | /// The array of sizes passed to the runtime library. |
||
1133 | Value *SizesArray = nullptr; |
||
1134 | /// The array of map types passed to the runtime library for the beginning |
||
1135 | /// of the region or for the entire region if there are no separate map |
||
1136 | /// types for the region end. |
||
1137 | Value *MapTypesArray = nullptr; |
||
1138 | /// The array of map types passed to the runtime library for the end of the |
||
1139 | /// region, or nullptr if there are no separate map types for the region |
||
1140 | /// end. |
||
1141 | Value *MapTypesArrayEnd = nullptr; |
||
1142 | /// The array of user-defined mappers passed to the runtime library. |
||
1143 | Value *MappersArray = nullptr; |
||
1144 | /// The array of original declaration names of mapped pointers sent to the |
||
1145 | /// runtime library for debugging |
||
1146 | Value *MapNamesArray = nullptr; |
||
1147 | }; |
||
1148 | |||
1149 | /// Struct that keeps the information that should be kept throughout |
||
1150 | /// a 'target data' region. |
||
1151 | class TargetDataInfo { |
||
1152 | /// Set to true if device pointer information have to be obtained. |
||
1153 | bool RequiresDevicePointerInfo = false; |
||
1154 | /// Set to true if Clang emits separate runtime calls for the beginning and |
||
1155 | /// end of the region. These calls might have separate map type arrays. |
||
1156 | bool SeparateBeginEndCalls = false; |
||
1157 | |||
1158 | public: |
||
1159 | TargetDataRTArgs RTArgs; |
||
1160 | |||
1161 | /// Indicate whether any user-defined mapper exists. |
||
1162 | bool HasMapper = false; |
||
1163 | /// The total number of pointers passed to the runtime library. |
||
1164 | unsigned NumberOfPtrs = 0u; |
||
1165 | |||
1166 | explicit TargetDataInfo() {} |
||
1167 | explicit TargetDataInfo(bool RequiresDevicePointerInfo, |
||
1168 | bool SeparateBeginEndCalls) |
||
1169 | : RequiresDevicePointerInfo(RequiresDevicePointerInfo), |
||
1170 | SeparateBeginEndCalls(SeparateBeginEndCalls) {} |
||
1171 | /// Clear information about the data arrays. |
||
1172 | void clearArrayInfo() { |
||
1173 | RTArgs = TargetDataRTArgs(); |
||
1174 | HasMapper = false; |
||
1175 | NumberOfPtrs = 0u; |
||
1176 | } |
||
1177 | /// Return true if the current target data information has valid arrays. |
||
1178 | bool isValid() { |
||
1179 | return RTArgs.BasePointersArray && RTArgs.PointersArray && |
||
1180 | RTArgs.SizesArray && RTArgs.MapTypesArray && |
||
1181 | (!HasMapper || RTArgs.MappersArray) && NumberOfPtrs; |
||
1182 | } |
||
1183 | bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; } |
||
1184 | bool separateBeginEndCalls() { return SeparateBeginEndCalls; } |
||
1185 | }; |
||
1186 | |||
1187 | /// Emit the arguments to be passed to the runtime library based on the |
||
1188 | /// arrays of base pointers, pointers, sizes, map types, and mappers. If |
||
1189 | /// ForEndCall, emit map types to be passed for the end of the region instead |
||
1190 | /// of the beginning. |
||
1191 | void emitOffloadingArraysArgument(IRBuilderBase &Builder, |
||
1192 | OpenMPIRBuilder::TargetDataRTArgs &RTArgs, |
||
1193 | OpenMPIRBuilder::TargetDataInfo &Info, |
||
1194 | bool EmitDebug = false, |
||
1195 | bool ForEndCall = false); |
||
1196 | |||
1197 | /// Creates offloading entry for the provided entry ID \a ID, address \a |
||
1198 | /// Addr, size \a Size, and flags \a Flags. |
||
1199 | void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, |
||
1200 | int32_t Flags, GlobalValue::LinkageTypes); |
||
1201 | |||
1202 | /// The kind of errors that can occur when emitting the offload entries and |
||
1203 | /// metadata. |
||
1204 | enum EmitMetadataErrorKind { |
||
1205 | EMIT_MD_TARGET_REGION_ERROR, |
||
1206 | EMIT_MD_DECLARE_TARGET_ERROR, |
||
1207 | EMIT_MD_GLOBAL_VAR_LINK_ERROR |
||
1208 | }; |
||
1209 | |||
1210 | /// Callback function type |
||
1211 | using EmitMetadataErrorReportFunctionTy = |
||
1212 | std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>; |
||
1213 | |||
1214 | // Emit the offloading entries and metadata so that the device codegen side |
||
1215 | // can easily figure out what to emit. The produced metadata looks like |
||
1216 | // this: |
||
1217 | // |
||
1218 | // !omp_offload.info = !{!1, ...} |
||
1219 | // |
||
1220 | // We only generate metadata for function that contain target regions. |
||
1221 | void createOffloadEntriesAndInfoMetadata( |
||
1222 | OffloadEntriesInfoManager &OffloadEntriesInfoManager, |
||
1223 | EmitMetadataErrorReportFunctionTy &ErrorReportFunction); |
||
1224 | |||
1225 | public: |
||
1226 | /// Generator for __kmpc_copyprivate |
||
1227 | /// |
||
1228 | /// \param Loc The source location description. |
||
1229 | /// \param BufSize Number of elements in the buffer. |
||
1230 | /// \param CpyBuf List of pointers to data to be copied. |
||
1231 | /// \param CpyFn function to call for copying data. |
||
1232 | /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise. |
||
1233 | /// |
||
1234 | /// \return The insertion position *after* the CopyPrivate call. |
||
1235 | |||
1236 | InsertPointTy createCopyPrivate(const LocationDescription &Loc, |
||
1237 | llvm::Value *BufSize, llvm::Value *CpyBuf, |
||
1238 | llvm::Value *CpyFn, llvm::Value *DidIt); |
||
1239 | |||
1240 | /// Generator for '#omp single' |
||
1241 | /// |
||
1242 | /// \param Loc The source location description. |
||
1243 | /// \param BodyGenCB Callback that will generate the region code. |
||
1244 | /// \param FiniCB Callback to finalize variable copies. |
||
1245 | /// \param IsNowait If false, a barrier is emitted. |
||
1246 | /// \param DidIt Local variable used as a flag to indicate 'single' thread |
||
1247 | /// |
||
1248 | /// \returns The insertion position *after* the single call. |
||
1249 | InsertPointTy createSingle(const LocationDescription &Loc, |
||
1250 | BodyGenCallbackTy BodyGenCB, |
||
1251 | FinalizeCallbackTy FiniCB, bool IsNowait, |
||
1252 | llvm::Value *DidIt); |
||
1253 | |||
1254 | /// Generator for '#omp master' |
||
1255 | /// |
||
1256 | /// \param Loc The insert and source location description. |
||
1257 | /// \param BodyGenCB Callback that will generate the region code. |
||
1258 | /// \param FiniCB Callback to finalize variable copies. |
||
1259 | /// |
||
1260 | /// \returns The insertion position *after* the master. |
||
1261 | InsertPointTy createMaster(const LocationDescription &Loc, |
||
1262 | BodyGenCallbackTy BodyGenCB, |
||
1263 | FinalizeCallbackTy FiniCB); |
||
1264 | |||
1265 | /// Generator for '#omp masked' |
||
1266 | /// |
||
1267 | /// \param Loc The insert and source location description. |
||
1268 | /// \param BodyGenCB Callback that will generate the region code. |
||
1269 | /// \param FiniCB Callback to finialize variable copies. |
||
1270 | /// |
||
1271 | /// \returns The insertion position *after* the masked. |
||
1272 | InsertPointTy createMasked(const LocationDescription &Loc, |
||
1273 | BodyGenCallbackTy BodyGenCB, |
||
1274 | FinalizeCallbackTy FiniCB, Value *Filter); |
||
1275 | |||
1276 | /// Generator for '#omp critical' |
||
1277 | /// |
||
1278 | /// \param Loc The insert and source location description. |
||
1279 | /// \param BodyGenCB Callback that will generate the region body code. |
||
1280 | /// \param FiniCB Callback to finalize variable copies. |
||
1281 | /// \param CriticalName name of the lock used by the critical directive |
||
1282 | /// \param HintInst Hint Instruction for hint clause associated with critical |
||
1283 | /// |
||
1284 | /// \returns The insertion position *after* the critical. |
||
1285 | InsertPointTy createCritical(const LocationDescription &Loc, |
||
1286 | BodyGenCallbackTy BodyGenCB, |
||
1287 | FinalizeCallbackTy FiniCB, |
||
1288 | StringRef CriticalName, Value *HintInst); |
||
1289 | |||
1290 | /// Generator for '#omp ordered depend (source | sink)' |
||
1291 | /// |
||
1292 | /// \param Loc The insert and source location description. |
||
1293 | /// \param AllocaIP The insertion point to be used for alloca instructions. |
||
1294 | /// \param NumLoops The number of loops in depend clause. |
||
1295 | /// \param StoreValues The value will be stored in vector address. |
||
1296 | /// \param Name The name of alloca instruction. |
||
1297 | /// \param IsDependSource If true, depend source; otherwise, depend sink. |
||
1298 | /// |
||
1299 | /// \return The insertion position *after* the ordered. |
||
1300 | InsertPointTy createOrderedDepend(const LocationDescription &Loc, |
||
1301 | InsertPointTy AllocaIP, unsigned NumLoops, |
||
1302 | ArrayRef<llvm::Value *> StoreValues, |
||
1303 | const Twine &Name, bool IsDependSource); |
||
1304 | |||
1305 | /// Generator for '#omp ordered [threads | simd]' |
||
1306 | /// |
||
1307 | /// \param Loc The insert and source location description. |
||
1308 | /// \param BodyGenCB Callback that will generate the region code. |
||
1309 | /// \param FiniCB Callback to finalize variable copies. |
||
1310 | /// \param IsThreads If true, with threads clause or without clause; |
||
1311 | /// otherwise, with simd clause; |
||
1312 | /// |
||
1313 | /// \returns The insertion position *after* the ordered. |
||
1314 | InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, |
||
1315 | BodyGenCallbackTy BodyGenCB, |
||
1316 | FinalizeCallbackTy FiniCB, |
||
1317 | bool IsThreads); |
||
1318 | |||
1319 | /// Generator for '#omp sections' |
||
1320 | /// |
||
1321 | /// \param Loc The insert and source location description. |
||
1322 | /// \param AllocaIP The insertion points to be used for alloca instructions. |
||
1323 | /// \param SectionCBs Callbacks that will generate body of each section. |
||
1324 | /// \param PrivCB Callback to copy a given variable (think copy constructor). |
||
1325 | /// \param FiniCB Callback to finalize variable copies. |
||
1326 | /// \param IsCancellable Flag to indicate a cancellable parallel region. |
||
1327 | /// \param IsNowait If true, barrier - to ensure all sections are executed |
||
1328 | /// before moving forward will not be generated. |
||
1329 | /// \returns The insertion position *after* the sections. |
||
1330 | InsertPointTy createSections(const LocationDescription &Loc, |
||
1331 | InsertPointTy AllocaIP, |
||
1332 | ArrayRef<StorableBodyGenCallbackTy> SectionCBs, |
||
1333 | PrivatizeCallbackTy PrivCB, |
||
1334 | FinalizeCallbackTy FiniCB, bool IsCancellable, |
||
1335 | bool IsNowait); |
||
1336 | |||
1337 | /// Generator for '#omp section' |
||
1338 | /// |
||
1339 | /// \param Loc The insert and source location description. |
||
1340 | /// \param BodyGenCB Callback that will generate the region body code. |
||
1341 | /// \param FiniCB Callback to finalize variable copies. |
||
1342 | /// \returns The insertion position *after* the section. |
||
1343 | InsertPointTy createSection(const LocationDescription &Loc, |
||
1344 | BodyGenCallbackTy BodyGenCB, |
||
1345 | FinalizeCallbackTy FiniCB); |
||
1346 | |||
1347 | /// Generate conditional branch and relevant BasicBlocks through which private |
||
1348 | /// threads copy the 'copyin' variables from Master copy to threadprivate |
||
1349 | /// copies. |
||
1350 | /// |
||
1351 | /// \param IP insertion block for copyin conditional |
||
1352 | /// \param MasterVarPtr a pointer to the master variable |
||
1353 | /// \param PrivateVarPtr a pointer to the threadprivate variable |
||
1354 | /// \param IntPtrTy Pointer size type |
||
1355 | /// \param BranchtoEnd Create a branch between the copyin.not.master blocks |
||
1356 | // and copy.in.end block |
||
1357 | /// |
||
1358 | /// \returns The insertion point where copying operation to be emitted. |
||
1359 | InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, |
||
1360 | Value *PrivateAddr, |
||
1361 | llvm::IntegerType *IntPtrTy, |
||
1362 | bool BranchtoEnd = true); |
||
1363 | |||
1364 | /// Create a runtime call for kmpc_Alloc |
||
1365 | /// |
||
1366 | /// \param Loc The insert and source location description. |
||
1367 | /// \param Size Size of allocated memory space |
||
1368 | /// \param Allocator Allocator information instruction |
||
1369 | /// \param Name Name of call Instruction for OMP_alloc |
||
1370 | /// |
||
1371 | /// \returns CallInst to the OMP_Alloc call |
||
1372 | CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size, |
||
1373 | Value *Allocator, std::string Name = ""); |
||
1374 | |||
1375 | /// Create a runtime call for kmpc_free |
||
1376 | /// |
||
1377 | /// \param Loc The insert and source location description. |
||
1378 | /// \param Addr Address of memory space to be freed |
||
1379 | /// \param Allocator Allocator information instruction |
||
1380 | /// \param Name Name of call Instruction for OMP_Free |
||
1381 | /// |
||
1382 | /// \returns CallInst to the OMP_Free call |
||
1383 | CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr, |
||
1384 | Value *Allocator, std::string Name = ""); |
||
1385 | |||
1386 | /// Create a runtime call for kmpc_threadprivate_cached |
||
1387 | /// |
||
1388 | /// \param Loc The insert and source location description. |
||
1389 | /// \param Pointer pointer to data to be cached |
||
1390 | /// \param Size size of data to be cached |
||
1391 | /// \param Name Name of call Instruction for callinst |
||
1392 | /// |
||
1393 | /// \returns CallInst to the thread private cache call. |
||
1394 | CallInst *createCachedThreadPrivate(const LocationDescription &Loc, |
||
1395 | llvm::Value *Pointer, |
||
1396 | llvm::ConstantInt *Size, |
||
1397 | const llvm::Twine &Name = Twine("")); |
||
1398 | |||
1399 | /// Create a runtime call for __tgt_interop_init |
||
1400 | /// |
||
1401 | /// \param Loc The insert and source location description. |
||
1402 | /// \param InteropVar variable to be allocated |
||
1403 | /// \param InteropType type of interop operation |
||
1404 | /// \param Device devide to which offloading will occur |
||
1405 | /// \param NumDependences number of dependence variables |
||
1406 | /// \param DependenceAddress pointer to dependence variables |
||
1407 | /// \param HaveNowaitClause does nowait clause exist |
||
1408 | /// |
||
1409 | /// \returns CallInst to the __tgt_interop_init call |
||
1410 | CallInst *createOMPInteropInit(const LocationDescription &Loc, |
||
1411 | Value *InteropVar, |
||
1412 | omp::OMPInteropType InteropType, Value *Device, |
||
1413 | Value *NumDependences, |
||
1414 | Value *DependenceAddress, |
||
1415 | bool HaveNowaitClause); |
||
1416 | |||
1417 | /// Create a runtime call for __tgt_interop_destroy |
||
1418 | /// |
||
1419 | /// \param Loc The insert and source location description. |
||
1420 | /// \param InteropVar variable to be allocated |
||
1421 | /// \param Device devide to which offloading will occur |
||
1422 | /// \param NumDependences number of dependence variables |
||
1423 | /// \param DependenceAddress pointer to dependence variables |
||
1424 | /// \param HaveNowaitClause does nowait clause exist |
||
1425 | /// |
||
1426 | /// \returns CallInst to the __tgt_interop_destroy call |
||
1427 | CallInst *createOMPInteropDestroy(const LocationDescription &Loc, |
||
1428 | Value *InteropVar, Value *Device, |
||
1429 | Value *NumDependences, |
||
1430 | Value *DependenceAddress, |
||
1431 | bool HaveNowaitClause); |
||
1432 | |||
1433 | /// Create a runtime call for __tgt_interop_use |
||
1434 | /// |
||
1435 | /// \param Loc The insert and source location description. |
||
1436 | /// \param InteropVar variable to be allocated |
||
1437 | /// \param Device devide to which offloading will occur |
||
1438 | /// \param NumDependences number of dependence variables |
||
1439 | /// \param DependenceAddress pointer to dependence variables |
||
1440 | /// \param HaveNowaitClause does nowait clause exist |
||
1441 | /// |
||
1442 | /// \returns CallInst to the __tgt_interop_use call |
||
1443 | CallInst *createOMPInteropUse(const LocationDescription &Loc, |
||
1444 | Value *InteropVar, Value *Device, |
||
1445 | Value *NumDependences, Value *DependenceAddress, |
||
1446 | bool HaveNowaitClause); |
||
1447 | |||
1448 | /// The `omp target` interface |
||
1449 | /// |
||
1450 | /// For more information about the usage of this interface, |
||
1451 | /// \see openmp/libomptarget/deviceRTLs/common/include/target.h |
||
1452 | /// |
||
1453 | ///{ |
||
1454 | |||
1455 | /// Create a runtime call for kmpc_target_init |
||
1456 | /// |
||
1457 | /// \param Loc The insert and source location description. |
||
1458 | /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. |
||
1459 | InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD); |
||
1460 | |||
1461 | /// Create a runtime call for kmpc_target_deinit |
||
1462 | /// |
||
1463 | /// \param Loc The insert and source location description. |
||
1464 | /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. |
||
1465 | void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD); |
||
1466 | |||
1467 | ///} |
||
1468 | |||
1469 | private: |
||
1470 | // Sets the function attributes expected for the outlined function |
||
1471 | void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn, |
||
1472 | int32_t NumTeams, |
||
1473 | int32_t NumThreads); |
||
1474 | |||
1475 | // Creates the function ID/Address for the given outlined function. |
||
1476 | // In the case of an embedded device function the address of the function is |
||
1477 | // used, in the case of a non-offload function a constant is created. |
||
1478 | Constant *createOutlinedFunctionID(Function *OutlinedFn, |
||
1479 | StringRef EntryFnIDName); |
||
1480 | |||
1481 | // Creates the region entry address for the outlined function |
||
1482 | Constant *createTargetRegionEntryAddr(Function *OutlinedFunction, |
||
1483 | StringRef EntryFnName); |
||
1484 | |||
1485 | public: |
||
1486 | /// Functions used to generate a function with the given name. |
||
1487 | using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>; |
||
1488 | |||
1489 | /// Create a unique name for the entry function using the source location |
||
1490 | /// information of the current target region. The name will be something like: |
||
1491 | /// |
||
1492 | /// __omp_offloading_DD_FFFF_PP_lBB[_CC] |
||
1493 | /// |
||
1494 | /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the |
||
1495 | /// mangled name of the function that encloses the target region and BB is the |
||
1496 | /// line number of the target region. CC is a count added when more than one |
||
1497 | /// region is located at the same location. |
||
1498 | /// |
||
1499 | /// If this target outline function is not an offload entry, we don't need to |
||
1500 | /// register it. This may happen if it is guarded by an if clause that is |
||
1501 | /// false at compile time, or no target archs have been specified. |
||
1502 | /// |
||
1503 | /// The created target region ID is used by the runtime library to identify |
||
1504 | /// the current target region, so it only has to be unique and not |
||
1505 | /// necessarily point to anything. It could be the pointer to the outlined |
||
1506 | /// function that implements the target region, but we aren't using that so |
||
1507 | /// that the compiler doesn't need to keep that, and could therefore inline |
||
1508 | /// the host function if proven worthwhile during optimization. In the other |
||
1509 | /// hand, if emitting code for the device, the ID has to be the function |
||
1510 | /// address so that it can retrieved from the offloading entry and launched |
||
1511 | /// by the runtime library. We also mark the outlined function to have |
||
1512 | /// external linkage in case we are emitting code for the device, because |
||
1513 | /// these functions will be entry points to the device. |
||
1514 | /// |
||
1515 | /// \param InfoManager The info manager keeping track of the offload entries |
||
1516 | /// \param EntryInfo The entry information about the function |
||
1517 | /// \param GenerateFunctionCallback The callback function to generate the code |
||
1518 | /// \param NumTeams Number default teams |
||
1519 | /// \param NumThreads Number default threads |
||
1520 | /// \param OutlinedFunction Pointer to the outlined function |
||
1521 | /// \param EntryFnIDName Name of the ID o be created |
||
1522 | void emitTargetRegionFunction(OffloadEntriesInfoManager &InfoManager, |
||
1523 | TargetRegionEntryInfo &EntryInfo, |
||
1524 | FunctionGenCallback &GenerateFunctionCallback, |
||
1525 | int32_t NumTeams, int32_t NumThreads, |
||
1526 | bool IsOffloadEntry, Function *&OutlinedFn, |
||
1527 | Constant *&OutlinedFnID); |
||
1528 | |||
1529 | /// Registers the given function and sets up the attribtues of the function |
||
1530 | /// Returns the FunctionID. |
||
1531 | /// |
||
1532 | /// \param InfoManager The info manager keeping track of the offload entries |
||
1533 | /// \param EntryInfo The entry information about the function |
||
1534 | /// \param OutlinedFunction Pointer to the outlined function |
||
1535 | /// \param EntryFnName Name of the outlined function |
||
1536 | /// \param EntryFnIDName Name of the ID o be created |
||
1537 | /// \param NumTeams Number default teams |
||
1538 | /// \param NumThreads Number default threads |
||
1539 | Constant *registerTargetRegionFunction(OffloadEntriesInfoManager &InfoManager, |
||
1540 | TargetRegionEntryInfo &EntryInfo, |
||
1541 | Function *OutlinedFunction, |
||
1542 | StringRef EntryFnName, |
||
1543 | StringRef EntryFnIDName, |
||
1544 | int32_t NumTeams, int32_t NumThreads); |
||
1545 | |||
1546 | /// Declarations for LLVM-IR types (simple, array, function and structure) are |
||
1547 | /// generated below. Their names are defined and used in OpenMPKinds.def. Here |
||
1548 | /// we provide the declarations, the initializeTypes function will provide the |
||
1549 | /// values. |
||
1550 | /// |
||
1551 | ///{ |
||
1552 | #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr; |
||
1553 | #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \ |
||
1554 | ArrayType *VarName##Ty = nullptr; \ |
||
1555 | PointerType *VarName##PtrTy = nullptr; |
||
1556 | #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \ |
||
1557 | FunctionType *VarName = nullptr; \ |
||
1558 | PointerType *VarName##Ptr = nullptr; |
||
1559 | #define OMP_STRUCT_TYPE(VarName, StrName, ...) \ |
||
1560 | StructType *VarName = nullptr; \ |
||
1561 | PointerType *VarName##Ptr = nullptr; |
||
1562 | #include "llvm/Frontend/OpenMP/OMPKinds.def" |
||
1563 | |||
1564 | ///} |
||
1565 | |||
1566 | private: |
||
1567 | /// Create all simple and struct types exposed by the runtime and remember |
||
1568 | /// the llvm::PointerTypes of them for easy access later. |
||
1569 | void initializeTypes(Module &M); |
||
1570 | |||
1571 | /// Common interface for generating entry calls for OMP Directives. |
||
1572 | /// if the directive has a region/body, It will set the insertion |
||
1573 | /// point to the body |
||
1574 | /// |
||
1575 | /// \param OMPD Directive to generate entry blocks for |
||
1576 | /// \param EntryCall Call to the entry OMP Runtime Function |
||
1577 | /// \param ExitBB block where the region ends. |
||
1578 | /// \param Conditional indicate if the entry call result will be used |
||
1579 | /// to evaluate a conditional of whether a thread will execute |
||
1580 | /// body code or not. |
||
1581 | /// |
||
1582 | /// \return The insertion position in exit block |
||
1583 | InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall, |
||
1584 | BasicBlock *ExitBB, |
||
1585 | bool Conditional = false); |
||
1586 | |||
1587 | /// Common interface to finalize the region |
||
1588 | /// |
||
1589 | /// \param OMPD Directive to generate exiting code for |
||
1590 | /// \param FinIP Insertion point for emitting Finalization code and exit call |
||
1591 | /// \param ExitCall Call to the ending OMP Runtime Function |
||
1592 | /// \param HasFinalize indicate if the directive will require finalization |
||
1593 | /// and has a finalization callback in the stack that |
||
1594 | /// should be called. |
||
1595 | /// |
||
1596 | /// \return The insertion position in exit block |
||
1597 | InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD, |
||
1598 | InsertPointTy FinIP, |
||
1599 | Instruction *ExitCall, |
||
1600 | bool HasFinalize = true); |
||
1601 | |||
1602 | /// Common Interface to generate OMP inlined regions |
||
1603 | /// |
||
1604 | /// \param OMPD Directive to generate inlined region for |
||
1605 | /// \param EntryCall Call to the entry OMP Runtime Function |
||
1606 | /// \param ExitCall Call to the ending OMP Runtime Function |
||
1607 | /// \param BodyGenCB Body code generation callback. |
||
1608 | /// \param FiniCB Finalization Callback. Will be called when finalizing region |
||
1609 | /// \param Conditional indicate if the entry call result will be used |
||
1610 | /// to evaluate a conditional of whether a thread will execute |
||
1611 | /// body code or not. |
||
1612 | /// \param HasFinalize indicate if the directive will require finalization |
||
1613 | /// and has a finalization callback in the stack that |
||
1614 | /// should be called. |
||
1615 | /// \param IsCancellable if HasFinalize is set to true, indicate if the |
||
1616 | /// the directive should be cancellable. |
||
1617 | /// \return The insertion point after the region |
||
1618 | |||
1619 | InsertPointTy |
||
1620 | EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall, |
||
1621 | Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, |
||
1622 | FinalizeCallbackTy FiniCB, bool Conditional = false, |
||
1623 | bool HasFinalize = true, bool IsCancellable = false); |
||
1624 | |||
1625 | /// Get the platform-specific name separator. |
||
1626 | /// \param Parts different parts of the final name that needs separation |
||
1627 | /// \param FirstSeparator First separator used between the initial two |
||
1628 | /// parts of the name. |
||
1629 | /// \param Separator separator used between all of the rest consecutive |
||
1630 | /// parts of the name |
||
1631 | static std::string getNameWithSeparators(ArrayRef<StringRef> Parts, |
||
1632 | StringRef FirstSeparator, |
||
1633 | StringRef Separator); |
||
1634 | |||
1635 | /// Returns corresponding lock object for the specified critical region |
||
1636 | /// name. If the lock object does not exist it is created, otherwise the |
||
1637 | /// reference to the existing copy is returned. |
||
1638 | /// \param CriticalName Name of the critical region. |
||
1639 | /// |
||
1640 | Value *getOMPCriticalRegionLock(StringRef CriticalName); |
||
1641 | |||
1642 | /// Callback type for Atomic Expression update |
||
1643 | /// ex: |
||
1644 | /// \code{.cpp} |
||
1645 | /// unsigned x = 0; |
||
1646 | /// #pragma omp atomic update |
||
1647 | /// x = Expr(x_old); //Expr() is any legal operation |
||
1648 | /// \endcode |
||
1649 | /// |
||
1650 | /// \param XOld the value of the atomic memory address to use for update |
||
1651 | /// \param IRB reference to the IRBuilder to use |
||
1652 | /// |
||
1653 | /// \returns Value to update X to. |
||
1654 | using AtomicUpdateCallbackTy = |
||
1655 | const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>; |
||
1656 | |||
1657 | private: |
||
1658 | enum AtomicKind { Read, Write, Update, Capture, Compare }; |
||
1659 | |||
1660 | /// Determine whether to emit flush or not |
||
1661 | /// |
||
1662 | /// \param Loc The insert and source location description. |
||
1663 | /// \param AO The required atomic ordering |
||
1664 | /// \param AK The OpenMP atomic operation kind used. |
||
1665 | /// |
||
1666 | /// \returns wether a flush was emitted or not |
||
1667 | bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc, |
||
1668 | AtomicOrdering AO, AtomicKind AK); |
||
1669 | |||
1670 | /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X |
||
1671 | /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X) |
||
1672 | /// Only Scalar data types. |
||
1673 | /// |
||
1674 | /// \param AllocaIP The insertion point to be used for alloca |
||
1675 | /// instructions. |
||
1676 | /// \param X The target atomic pointer to be updated |
||
1677 | /// \param XElemTy The element type of the atomic pointer. |
||
1678 | /// \param Expr The value to update X with. |
||
1679 | /// \param AO Atomic ordering of the generated atomic |
||
1680 | /// instructions. |
||
1681 | /// \param RMWOp The binary operation used for update. If |
||
1682 | /// operation is not supported by atomicRMW, |
||
1683 | /// or belong to {FADD, FSUB, BAD_BINOP}. |
||
1684 | /// Then a `cmpExch` based atomic will be generated. |
||
1685 | /// \param UpdateOp Code generator for complex expressions that cannot be |
||
1686 | /// expressed through atomicrmw instruction. |
||
1687 | /// \param VolatileX true if \a X volatile? |
||
1688 | /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the |
||
1689 | /// update expression, false otherwise. |
||
1690 | /// (e.g. true for X = X BinOp Expr) |
||
1691 | /// |
||
1692 | /// \returns A pair of the old value of X before the update, and the value |
||
1693 | /// used for the update. |
||
1694 | std::pair<Value *, Value *> |
||
1695 | emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr, |
||
1696 | AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, |
||
1697 | AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, |
||
1698 | bool IsXBinopExpr); |
||
1699 | |||
1700 | /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 . |
||
1701 | /// |
||
1702 | /// \Return The instruction |
||
1703 | Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2, |
||
1704 | AtomicRMWInst::BinOp RMWOp); |
||
1705 | |||
1706 | public: |
||
1707 | /// a struct to pack relevant information while generating atomic Ops |
||
1708 | struct AtomicOpValue { |
||
1709 | Value *Var = nullptr; |
||
1710 | Type *ElemTy = nullptr; |
||
1711 | bool IsSigned = false; |
||
1712 | bool IsVolatile = false; |
||
1713 | }; |
||
1714 | |||
1715 | /// Emit atomic Read for : V = X --- Only Scalar data types. |
||
1716 | /// |
||
1717 | /// \param Loc The insert and source location description. |
||
1718 | /// \param X The target pointer to be atomically read |
||
1719 | /// \param V Memory address where to store atomically read |
||
1720 | /// value |
||
1721 | /// \param AO Atomic ordering of the generated atomic |
||
1722 | /// instructions. |
||
1723 | /// |
||
1724 | /// \return Insertion point after generated atomic read IR. |
||
1725 | InsertPointTy createAtomicRead(const LocationDescription &Loc, |
||
1726 | AtomicOpValue &X, AtomicOpValue &V, |
||
1727 | AtomicOrdering AO); |
||
1728 | |||
1729 | /// Emit atomic write for : X = Expr --- Only Scalar data types. |
||
1730 | /// |
||
1731 | /// \param Loc The insert and source location description. |
||
1732 | /// \param X The target pointer to be atomically written to |
||
1733 | /// \param Expr The value to store. |
||
1734 | /// \param AO Atomic ordering of the generated atomic |
||
1735 | /// instructions. |
||
1736 | /// |
||
1737 | /// \return Insertion point after generated atomic Write IR. |
||
1738 | InsertPointTy createAtomicWrite(const LocationDescription &Loc, |
||
1739 | AtomicOpValue &X, Value *Expr, |
||
1740 | AtomicOrdering AO); |
||
1741 | |||
1742 | /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X |
||
1743 | /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X) |
||
1744 | /// Only Scalar data types. |
||
1745 | /// |
||
1746 | /// \param Loc The insert and source location description. |
||
1747 | /// \param AllocaIP The insertion point to be used for alloca instructions. |
||
1748 | /// \param X The target atomic pointer to be updated |
||
1749 | /// \param Expr The value to update X with. |
||
1750 | /// \param AO Atomic ordering of the generated atomic instructions. |
||
1751 | /// \param RMWOp The binary operation used for update. If operation |
||
1752 | /// is not supported by atomicRMW, or belong to |
||
1753 | /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based |
||
1754 | /// atomic will be generated. |
||
1755 | /// \param UpdateOp Code generator for complex expressions that cannot be |
||
1756 | /// expressed through atomicrmw instruction. |
||
1757 | /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the |
||
1758 | /// update expression, false otherwise. |
||
1759 | /// (e.g. true for X = X BinOp Expr) |
||
1760 | /// |
||
1761 | /// \return Insertion point after generated atomic update IR. |
||
1762 | InsertPointTy createAtomicUpdate(const LocationDescription &Loc, |
||
1763 | InsertPointTy AllocaIP, AtomicOpValue &X, |
||
1764 | Value *Expr, AtomicOrdering AO, |
||
1765 | AtomicRMWInst::BinOp RMWOp, |
||
1766 | AtomicUpdateCallbackTy &UpdateOp, |
||
1767 | bool IsXBinopExpr); |
||
1768 | |||
1769 | /// Emit atomic update for constructs: --- Only Scalar data types |
||
1770 | /// V = X; X = X BinOp Expr , |
||
1771 | /// X = X BinOp Expr; V = X, |
||
1772 | /// V = X; X = Expr BinOp X, |
||
1773 | /// X = Expr BinOp X; V = X, |
||
1774 | /// V = X; X = UpdateOp(X), |
||
1775 | /// X = UpdateOp(X); V = X, |
||
1776 | /// |
||
1777 | /// \param Loc The insert and source location description. |
||
1778 | /// \param AllocaIP The insertion point to be used for alloca instructions. |
||
1779 | /// \param X The target atomic pointer to be updated |
||
1780 | /// \param V Memory address where to store captured value |
||
1781 | /// \param Expr The value to update X with. |
||
1782 | /// \param AO Atomic ordering of the generated atomic instructions |
||
1783 | /// \param RMWOp The binary operation used for update. If |
||
1784 | /// operation is not supported by atomicRMW, or belong to |
||
1785 | /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based |
||
1786 | /// atomic will be generated. |
||
1787 | /// \param UpdateOp Code generator for complex expressions that cannot be |
||
1788 | /// expressed through atomicrmw instruction. |
||
1789 | /// \param UpdateExpr true if X is an in place update of the form |
||
1790 | /// X = X BinOp Expr or X = Expr BinOp X |
||
1791 | /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the |
||
1792 | /// update expression, false otherwise. |
||
1793 | /// (e.g. true for X = X BinOp Expr) |
||
1794 | /// \param IsPostfixUpdate true if original value of 'x' must be stored in |
||
1795 | /// 'v', not an updated one. |
||
1796 | /// |
||
1797 | /// \return Insertion point after generated atomic capture IR. |
||
1798 | InsertPointTy |
||
1799 | createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, |
||
1800 | AtomicOpValue &X, AtomicOpValue &V, Value *Expr, |
||
1801 | AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, |
||
1802 | AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, |
||
1803 | bool IsPostfixUpdate, bool IsXBinopExpr); |
||
1804 | |||
1805 | /// Emit atomic compare for constructs: --- Only scalar data types |
||
1806 | /// cond-expr-stmt: |
||
1807 | /// x = x ordop expr ? expr : x; |
||
1808 | /// x = expr ordop x ? expr : x; |
||
1809 | /// x = x == e ? d : x; |
||
1810 | /// x = e == x ? d : x; (this one is not in the spec) |
||
1811 | /// cond-update-stmt: |
||
1812 | /// if (x ordop expr) { x = expr; } |
||
1813 | /// if (expr ordop x) { x = expr; } |
||
1814 | /// if (x == e) { x = d; } |
||
1815 | /// if (e == x) { x = d; } (this one is not in the spec) |
||
1816 | /// conditional-update-capture-atomic: |
||
1817 | /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false) |
||
1818 | /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false) |
||
1819 | /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false, |
||
1820 | /// IsFailOnly=true) |
||
1821 | /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false) |
||
1822 | /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false, |
||
1823 | /// IsFailOnly=true) |
||
1824 | /// |
||
1825 | /// \param Loc The insert and source location description. |
||
1826 | /// \param X The target atomic pointer to be updated. |
||
1827 | /// \param V Memory address where to store captured value (for |
||
1828 | /// compare capture only). |
||
1829 | /// \param R Memory address where to store comparison result |
||
1830 | /// (for compare capture with '==' only). |
||
1831 | /// \param E The expected value ('e') for forms that use an |
||
1832 | /// equality comparison or an expression ('expr') for |
||
1833 | /// forms that use 'ordop' (logically an atomic maximum or |
||
1834 | /// minimum). |
||
1835 | /// \param D The desired value for forms that use an equality |
||
1836 | /// comparison. If forms that use 'ordop', it should be |
||
1837 | /// \p nullptr. |
||
1838 | /// \param AO Atomic ordering of the generated atomic instructions. |
||
1839 | /// \param Op Atomic compare operation. It can only be ==, <, or >. |
||
1840 | /// \param IsXBinopExpr True if the conditional statement is in the form where |
||
1841 | /// x is on LHS. It only matters for < or >. |
||
1842 | /// \param IsPostfixUpdate True if original value of 'x' must be stored in |
||
1843 | /// 'v', not an updated one (for compare capture |
||
1844 | /// only). |
||
1845 | /// \param IsFailOnly True if the original value of 'x' is stored to 'v' |
||
1846 | /// only when the comparison fails. This is only valid for |
||
1847 | /// the case the comparison is '=='. |
||
1848 | /// |
||
1849 | /// \return Insertion point after generated atomic capture IR. |
||
1850 | InsertPointTy |
||
1851 | createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, |
||
1852 | AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, |
||
1853 | AtomicOrdering AO, omp::OMPAtomicCompareOp Op, |
||
1854 | bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly); |
||
1855 | |||
1856 | /// Create the control flow structure of a canonical OpenMP loop. |
||
1857 | /// |
||
1858 | /// The emitted loop will be disconnected, i.e. no edge to the loop's |
||
1859 | /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's |
||
1860 | /// IRBuilder location is not preserved. |
||
1861 | /// |
||
1862 | /// \param DL DebugLoc used for the instructions in the skeleton. |
||
1863 | /// \param TripCount Value to be used for the trip count. |
||
1864 | /// \param F Function in which to insert the BasicBlocks. |
||
1865 | /// \param PreInsertBefore Where to insert BBs that execute before the body, |
||
1866 | /// typically the body itself. |
||
1867 | /// \param PostInsertBefore Where to insert BBs that execute after the body. |
||
1868 | /// \param Name Base name used to derive BB |
||
1869 | /// and instruction names. |
||
1870 | /// |
||
1871 | /// \returns The CanonicalLoopInfo that represents the emitted loop. |
||
1872 | CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount, |
||
1873 | Function *F, |
||
1874 | BasicBlock *PreInsertBefore, |
||
1875 | BasicBlock *PostInsertBefore, |
||
1876 | const Twine &Name = {}); |
||
1877 | /// OMP Offload Info Metadata name string |
||
1878 | const std::string ompOffloadInfoName = "omp_offload.info"; |
||
1879 | |||
1880 | /// Loads all the offload entries information from the host IR |
||
1881 | /// metadata. This function is only meant to be used with device code |
||
1882 | /// generation. |
||
1883 | /// |
||
1884 | /// \param M Module to load Metadata info from. Module passed maybe |
||
1885 | /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module. |
||
1886 | /// \param OffloadEntriesInfoManager Initialize Offload Entry information. |
||
1887 | void |
||
1888 | loadOffloadInfoMetadata(Module &M, |
||
1889 | OffloadEntriesInfoManager &OffloadEntriesInfoManager); |
||
1890 | |||
1891 | /// Gets (if variable with the given name already exist) or creates |
||
1892 | /// internal global variable with the specified Name. The created variable has |
||
1893 | /// linkage CommonLinkage by default and is initialized by null value. |
||
1894 | /// \param Ty Type of the global variable. If it is exist already the type |
||
1895 | /// must be the same. |
||
1896 | /// \param Name Name of the variable. |
||
1897 | GlobalVariable *getOrCreateInternalVariable(Type *Ty, const StringRef &Name, |
||
1898 | unsigned AddressSpace = 0); |
||
1899 | }; |
||
1900 | |||
1901 | /// Data structure to contain the information needed to uniquely identify |
||
1902 | /// a target entry. |
||
1903 | struct TargetRegionEntryInfo { |
||
1904 | std::string ParentName; |
||
1905 | unsigned DeviceID; |
||
1906 | unsigned FileID; |
||
1907 | unsigned Line; |
||
1908 | unsigned Count; |
||
1909 | |||
1910 | TargetRegionEntryInfo() |
||
1911 | : ParentName(""), DeviceID(0), FileID(0), Line(0), Count(0) {} |
||
1912 | TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID, |
||
1913 | unsigned FileID, unsigned Line, unsigned Count = 0) |
||
1914 | : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line), |
||
1915 | Count(Count) {} |
||
1916 | |||
1917 | static void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name, |
||
1918 | StringRef ParentName, |
||
1919 | unsigned DeviceID, unsigned FileID, |
||
1920 | unsigned Line, unsigned Count); |
||
1921 | |||
1922 | bool operator<(const TargetRegionEntryInfo RHS) const { |
||
1923 | return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) < |
||
1924 | std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line, |
||
1925 | RHS.Count); |
||
1926 | } |
||
1927 | }; |
||
1928 | |||
1929 | /// Class that manages information about offload code regions and data |
||
1930 | class OffloadEntriesInfoManager { |
||
1931 | /// Number of entries registered so far. |
||
1932 | OpenMPIRBuilderConfig Config; |
||
1933 | unsigned OffloadingEntriesNum = 0; |
||
1934 | |||
1935 | public: |
||
1936 | void setConfig(OpenMPIRBuilderConfig C) { Config = C; } |
||
1937 | |||
1938 | /// Base class of the entries info. |
||
1939 | class OffloadEntryInfo { |
||
1940 | public: |
||
1941 | /// Kind of a given entry. |
||
1942 | enum OffloadingEntryInfoKinds : unsigned { |
||
1943 | /// Entry is a target region. |
||
1944 | OffloadingEntryInfoTargetRegion = 0, |
||
1945 | /// Entry is a declare target variable. |
||
1946 | OffloadingEntryInfoDeviceGlobalVar = 1, |
||
1947 | /// Invalid entry info. |
||
1948 | OffloadingEntryInfoInvalid = ~0u |
||
1949 | }; |
||
1950 | |||
1951 | protected: |
||
1952 | OffloadEntryInfo() = delete; |
||
1953 | explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {} |
||
1954 | explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, |
||
1955 | uint32_t Flags) |
||
1956 | : Flags(Flags), Order(Order), Kind(Kind) {} |
||
1957 | ~OffloadEntryInfo() = default; |
||
1958 | |||
1959 | public: |
||
1960 | bool isValid() const { return Order != ~0u; } |
||
1961 | unsigned getOrder() const { return Order; } |
||
1962 | OffloadingEntryInfoKinds getKind() const { return Kind; } |
||
1963 | uint32_t getFlags() const { return Flags; } |
||
1964 | void setFlags(uint32_t NewFlags) { Flags = NewFlags; } |
||
1965 | Constant *getAddress() const { return cast_or_null<Constant>(Addr); } |
||
1966 | void setAddress(Constant *V) { |
||
1967 | assert(!Addr.pointsToAliveValue() && "Address has been set before!"); |
||
1968 | Addr = V; |
||
1969 | } |
||
1970 | static bool classof(const OffloadEntryInfo *Info) { return true; } |
||
1971 | |||
1972 | private: |
||
1973 | /// Address of the entity that has to be mapped for offloading. |
||
1974 | WeakTrackingVH Addr; |
||
1975 | |||
1976 | /// Flags associated with the device global. |
||
1977 | uint32_t Flags = 0u; |
||
1978 | |||
1979 | /// Order this entry was emitted. |
||
1980 | unsigned Order = ~0u; |
||
1981 | |||
1982 | OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid; |
||
1983 | }; |
||
1984 | |||
1985 | /// Return true if a there are no entries defined. |
||
1986 | bool empty() const; |
||
1987 | /// Return number of entries defined so far. |
||
1988 | unsigned size() const { return OffloadingEntriesNum; } |
||
1989 | |||
1990 | OffloadEntriesInfoManager() : Config() {} |
||
1991 | |||
1992 | // |
||
1993 | // Target region entries related. |
||
1994 | // |
||
1995 | |||
1996 | /// Kind of the target registry entry. |
||
1997 | enum OMPTargetRegionEntryKind : uint32_t { |
||
1998 | /// Mark the entry as target region. |
||
1999 | OMPTargetRegionEntryTargetRegion = 0x0, |
||
2000 | /// Mark the entry as a global constructor. |
||
2001 | OMPTargetRegionEntryCtor = 0x02, |
||
2002 | /// Mark the entry as a global destructor. |
||
2003 | OMPTargetRegionEntryDtor = 0x04, |
||
2004 | }; |
||
2005 | |||
2006 | /// Target region entries info. |
||
2007 | class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo { |
||
2008 | /// Address that can be used as the ID of the entry. |
||
2009 | Constant *ID = nullptr; |
||
2010 | |||
2011 | public: |
||
2012 | OffloadEntryInfoTargetRegion() |
||
2013 | : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {} |
||
2014 | explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr, |
||
2015 | Constant *ID, |
||
2016 | OMPTargetRegionEntryKind Flags) |
||
2017 | : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags), |
||
2018 | ID(ID) { |
||
2019 | setAddress(Addr); |
||
2020 | } |
||
2021 | |||
2022 | Constant *getID() const { return ID; } |
||
2023 | void setID(Constant *V) { |
||
2024 | assert(!ID && "ID has been set before!"); |
||
2025 | ID = V; |
||
2026 | } |
||
2027 | static bool classof(const OffloadEntryInfo *Info) { |
||
2028 | return Info->getKind() == OffloadingEntryInfoTargetRegion; |
||
2029 | } |
||
2030 | }; |
||
2031 | |||
2032 | /// Initialize target region entry. |
||
2033 | /// This is ONLY needed for DEVICE compilation. |
||
2034 | void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, |
||
2035 | unsigned Order); |
||
2036 | /// Register target region entry. |
||
2037 | void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, |
||
2038 | Constant *Addr, Constant *ID, |
||
2039 | OMPTargetRegionEntryKind Flags); |
||
2040 | /// Return true if a target region entry with the provided information |
||
2041 | /// exists. |
||
2042 | bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, |
||
2043 | bool IgnoreAddressId = false) const; |
||
2044 | |||
2045 | // Return the Name based on \a EntryInfo using the next available Count. |
||
2046 | void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name, |
||
2047 | const TargetRegionEntryInfo &EntryInfo); |
||
2048 | |||
2049 | /// brief Applies action \a Action on all registered entries. |
||
2050 | typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo, |
||
2051 | const OffloadEntryInfoTargetRegion &)> |
||
2052 | OffloadTargetRegionEntryInfoActTy; |
||
2053 | void |
||
2054 | actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action); |
||
2055 | |||
2056 | // |
||
2057 | // Device global variable entries related. |
||
2058 | // |
||
2059 | |||
2060 | /// Kind of the global variable entry.. |
||
2061 | enum OMPTargetGlobalVarEntryKind : uint32_t { |
||
2062 | /// Mark the entry as a to declare target. |
||
2063 | OMPTargetGlobalVarEntryTo = 0x0, |
||
2064 | /// Mark the entry as a to declare target link. |
||
2065 | OMPTargetGlobalVarEntryLink = 0x1, |
||
2066 | }; |
||
2067 | |||
2068 | /// Device global variable entries info. |
||
2069 | class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo { |
||
2070 | /// Type of the global variable. |
||
2071 | int64_t VarSize; |
||
2072 | GlobalValue::LinkageTypes Linkage; |
||
2073 | |||
2074 | public: |
||
2075 | OffloadEntryInfoDeviceGlobalVar() |
||
2076 | : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {} |
||
2077 | explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, |
||
2078 | OMPTargetGlobalVarEntryKind Flags) |
||
2079 | : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {} |
||
2080 | explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr, |
||
2081 | int64_t VarSize, |
||
2082 | OMPTargetGlobalVarEntryKind Flags, |
||
2083 | GlobalValue::LinkageTypes Linkage) |
||
2084 | : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags), |
||
2085 | VarSize(VarSize), Linkage(Linkage) { |
||
2086 | setAddress(Addr); |
||
2087 | } |
||
2088 | |||
2089 | int64_t getVarSize() const { return VarSize; } |
||
2090 | void setVarSize(int64_t Size) { VarSize = Size; } |
||
2091 | GlobalValue::LinkageTypes getLinkage() const { return Linkage; } |
||
2092 | void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; } |
||
2093 | static bool classof(const OffloadEntryInfo *Info) { |
||
2094 | return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar; |
||
2095 | } |
||
2096 | }; |
||
2097 | |||
2098 | /// Initialize device global variable entry. |
||
2099 | /// This is ONLY used for DEVICE compilation. |
||
2100 | void initializeDeviceGlobalVarEntryInfo(StringRef Name, |
||
2101 | OMPTargetGlobalVarEntryKind Flags, |
||
2102 | unsigned Order); |
||
2103 | |||
2104 | /// Register device global variable entry. |
||
2105 | void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, |
||
2106 | int64_t VarSize, |
||
2107 | OMPTargetGlobalVarEntryKind Flags, |
||
2108 | GlobalValue::LinkageTypes Linkage); |
||
2109 | /// Checks if the variable with the given name has been registered already. |
||
2110 | bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const { |
||
2111 | return OffloadEntriesDeviceGlobalVar.count(VarName) > 0; |
||
2112 | } |
||
2113 | /// Applies action \a Action on all registered entries. |
||
2114 | typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> |
||
2115 | OffloadDeviceGlobalVarEntryInfoActTy; |
||
2116 | void actOnDeviceGlobalVarEntriesInfo( |
||
2117 | const OffloadDeviceGlobalVarEntryInfoActTy &Action); |
||
2118 | |||
2119 | private: |
||
2120 | /// Return the count of entries at a particular source location. |
||
2121 | unsigned |
||
2122 | getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const; |
||
2123 | |||
2124 | /// Update the count of entries at a particular source location. |
||
2125 | void |
||
2126 | incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo); |
||
2127 | |||
2128 | static TargetRegionEntryInfo |
||
2129 | getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) { |
||
2130 | return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID, |
||
2131 | EntryInfo.FileID, EntryInfo.Line, 0); |
||
2132 | } |
||
2133 | |||
2134 | // Count of entries at a location. |
||
2135 | std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount; |
||
2136 | |||
2137 | // Storage for target region entries kind. |
||
2138 | typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion> |
||
2139 | OffloadEntriesTargetRegionTy; |
||
2140 | OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion; |
||
2141 | /// Storage for device global variable entries kind. The storage is to be |
||
2142 | /// indexed by mangled name. |
||
2143 | typedef StringMap<OffloadEntryInfoDeviceGlobalVar> |
||
2144 | OffloadEntriesDeviceGlobalVarTy; |
||
2145 | OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar; |
||
2146 | }; |
||
2147 | |||
2148 | /// Class to represented the control flow structure of an OpenMP canonical loop. |
||
2149 | /// |
||
2150 | /// The control-flow structure is standardized for easy consumption by |
||
2151 | /// directives associated with loops. For instance, the worksharing-loop |
||
2152 | /// construct may change this control flow such that each loop iteration is |
||
2153 | /// executed on only one thread. The constraints of a canonical loop in brief |
||
2154 | /// are: |
||
2155 | /// |
||
2156 | /// * The number of loop iterations must have been computed before entering the |
||
2157 | /// loop. |
||
2158 | /// |
||
2159 | /// * Has an (unsigned) logical induction variable that starts at zero and |
||
2160 | /// increments by one. |
||
2161 | /// |
||
2162 | /// * The loop's CFG itself has no side-effects. The OpenMP specification |
||
2163 | /// itself allows side-effects, but the order in which they happen, including |
||
2164 | /// how often or whether at all, is unspecified. We expect that the frontend |
||
2165 | /// will emit those side-effect instructions somewhere (e.g. before the loop) |
||
2166 | /// such that the CanonicalLoopInfo itself can be side-effect free. |
||
2167 | /// |
||
2168 | /// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated |
||
2169 | /// execution of a loop body that satifies these constraints. It does NOT |
||
2170 | /// represent arbitrary SESE regions that happen to contain a loop. Do not use |
||
2171 | /// CanonicalLoopInfo for such purposes. |
||
2172 | /// |
||
2173 | /// The control flow can be described as follows: |
||
2174 | /// |
||
2175 | /// Preheader |
||
2176 | /// | |
||
2177 | /// /-> Header |
||
2178 | /// | | |
||
2179 | /// | Cond---\ |
||
2180 | /// | | | |
||
2181 | /// | Body | |
||
2182 | /// | | | | |
||
2183 | /// | <...> | |
||
2184 | /// | | | | |
||
2185 | /// \--Latch | |
||
2186 | /// | |
||
2187 | /// Exit |
||
2188 | /// | |
||
2189 | /// After |
||
2190 | /// |
||
2191 | /// The loop is thought to start at PreheaderIP (at the Preheader's terminator, |
||
2192 | /// including) and end at AfterIP (at the After's first instruction, excluding). |
||
2193 | /// That is, instructions in the Preheader and After blocks (except the |
||
2194 | /// Preheader's terminator) are out of CanonicalLoopInfo's control and may have |
||
2195 | /// side-effects. Typically, the Preheader is used to compute the loop's trip |
||
2196 | /// count. The instructions from BodyIP (at the Body block's first instruction, |
||
2197 | /// excluding) until the Latch are also considered outside CanonicalLoopInfo's |
||
2198 | /// control and thus can have side-effects. The body block is the single entry |
||
2199 | /// point into the loop body, which may contain arbitrary control flow as long |
||
2200 | /// as all control paths eventually branch to the Latch block. |
||
2201 | /// |
||
2202 | /// TODO: Consider adding another standardized BasicBlock between Body CFG and |
||
2203 | /// Latch to guarantee that there is only a single edge to the latch. It would |
||
2204 | /// make loop transformations easier to not needing to consider multiple |
||
2205 | /// predecessors of the latch (See redirectAllPredecessorsTo) and would give us |
||
2206 | /// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that |
||
2207 | /// executes after each body iteration. |
||
2208 | /// |
||
2209 | /// There must be no loop-carried dependencies through llvm::Values. This is |
||
2210 | /// equivalant to that the Latch has no PHINode and the Header's only PHINode is |
||
2211 | /// for the induction variable. |
||
2212 | /// |
||
2213 | /// All code in Header, Cond, Latch and Exit (plus the terminator of the |
||
2214 | /// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked |
||
2215 | /// by assertOK(). They are expected to not be modified unless explicitly |
||
2216 | /// modifying the CanonicalLoopInfo through a methods that applies a OpenMP |
||
2217 | /// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop, |
||
2218 | /// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its |
||
2219 | /// basic blocks. After invalidation, the CanonicalLoopInfo must not be used |
||
2220 | /// anymore as its underlying control flow may not exist anymore. |
||
2221 | /// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop |
||
2222 | /// may also return a new CanonicalLoopInfo that can be passed to other |
||
2223 | /// loop-associated construct implementing methods. These loop-transforming |
||
2224 | /// methods may either create a new CanonicalLoopInfo usually using |
||
2225 | /// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and |
||
2226 | /// modify one of the input CanonicalLoopInfo and return it as representing the |
||
2227 | /// modified loop. What is done is an implementation detail of |
||
2228 | /// transformation-implementing method and callers should always assume that the |
||
2229 | /// CanonicalLoopInfo passed to it is invalidated and a new object is returned. |
||
2230 | /// Returned CanonicalLoopInfo have the same structure and guarantees as the one |
||
2231 | /// created by createCanonicalLoop, such that transforming methods do not have |
||
2232 | /// to special case where the CanonicalLoopInfo originated from. |
||
2233 | /// |
||
2234 | /// Generally, methods consuming CanonicalLoopInfo do not need an |
||
2235 | /// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the |
||
2236 | /// CanonicalLoopInfo to insert new or modify existing instructions. Unless |
||
2237 | /// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate |
||
2238 | /// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically, |
||
2239 | /// any InsertPoint in the Preheader, After or Block can still be used after |
||
2240 | /// calling such a method. |
||
2241 | /// |
||
2242 | /// TODO: Provide mechanisms for exception handling and cancellation points. |
||
2243 | /// |
||
2244 | /// Defined outside OpenMPIRBuilder because nested classes cannot be |
||
2245 | /// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h. |
||
2246 | class CanonicalLoopInfo { |
||
2247 | friend class OpenMPIRBuilder; |
||
2248 | |||
2249 | private: |
||
2250 | BasicBlock *Header = nullptr; |
||
2251 | BasicBlock *Cond = nullptr; |
||
2252 | BasicBlock *Latch = nullptr; |
||
2253 | BasicBlock *Exit = nullptr; |
||
2254 | |||
2255 | /// Add the control blocks of this loop to \p BBs. |
||
2256 | /// |
||
2257 | /// This does not include any block from the body, including the one returned |
||
2258 | /// by getBody(). |
||
2259 | /// |
||
2260 | /// FIXME: This currently includes the Preheader and After blocks even though |
||
2261 | /// their content is (mostly) not under CanonicalLoopInfo's control. |
||
2262 | /// Re-evaluated whether this makes sense. |
||
2263 | void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs); |
||
2264 | |||
2265 | /// Sets the number of loop iterations to the given value. This value must be |
||
2266 | /// valid in the condition block (i.e., defined in the preheader) and is |
||
2267 | /// interpreted as an unsigned integer. |
||
2268 | void setTripCount(Value *TripCount); |
||
2269 | |||
2270 | /// Replace all uses of the canonical induction variable in the loop body with |
||
2271 | /// a new one. |
||
2272 | /// |
||
2273 | /// The intended use case is to update the induction variable for an updated |
||
2274 | /// iteration space such that it can stay normalized in the 0...tripcount-1 |
||
2275 | /// range. |
||
2276 | /// |
||
2277 | /// The \p Updater is called with the (presumable updated) current normalized |
||
2278 | /// induction variable and is expected to return the value that uses of the |
||
2279 | /// pre-updated induction values should use instead, typically dependent on |
||
2280 | /// the new induction variable. This is a lambda (instead of e.g. just passing |
||
2281 | /// the new value) to be able to distinguish the uses of the pre-updated |
||
2282 | /// induction variable and uses of the induction varible to compute the |
||
2283 | /// updated induction variable value. |
||
2284 | void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater); |
||
2285 | |||
2286 | public: |
||
2287 | /// Returns whether this object currently represents the IR of a loop. If |
||
2288 | /// returning false, it may have been consumed by a loop transformation or not |
||
2289 | /// been intialized. Do not use in this case; |
||
2290 | bool isValid() const { return Header; } |
||
2291 | |||
2292 | /// The preheader ensures that there is only a single edge entering the loop. |
||
2293 | /// Code that must be execute before any loop iteration can be emitted here, |
||
2294 | /// such as computing the loop trip count and begin lifetime markers. Code in |
||
2295 | /// the preheader is not considered part of the canonical loop. |
||
2296 | BasicBlock *getPreheader() const; |
||
2297 | |||
2298 | /// The header is the entry for each iteration. In the canonical control flow, |
||
2299 | /// it only contains the PHINode for the induction variable. |
||
2300 | BasicBlock *getHeader() const { |
||
2301 | assert(isValid() && "Requires a valid canonical loop"); |
||
2302 | return Header; |
||
2303 | } |
||
2304 | |||
2305 | /// The condition block computes whether there is another loop iteration. If |
||
2306 | /// yes, branches to the body; otherwise to the exit block. |
||
2307 | BasicBlock *getCond() const { |
||
2308 | assert(isValid() && "Requires a valid canonical loop"); |
||
2309 | return Cond; |
||
2310 | } |
||
2311 | |||
2312 | /// The body block is the single entry for a loop iteration and not controlled |
||
2313 | /// by CanonicalLoopInfo. It can contain arbitrary control flow but must |
||
2314 | /// eventually branch to the \p Latch block. |
||
2315 | BasicBlock *getBody() const { |
||
2316 | assert(isValid() && "Requires a valid canonical loop"); |
||
2317 | return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0); |
||
2318 | } |
||
2319 | |||
2320 | /// Reaching the latch indicates the end of the loop body code. In the |
||
2321 | /// canonical control flow, it only contains the increment of the induction |
||
2322 | /// variable. |
||
2323 | BasicBlock *getLatch() const { |
||
2324 | assert(isValid() && "Requires a valid canonical loop"); |
||
2325 | return Latch; |
||
2326 | } |
||
2327 | |||
2328 | /// Reaching the exit indicates no more iterations are being executed. |
||
2329 | BasicBlock *getExit() const { |
||
2330 | assert(isValid() && "Requires a valid canonical loop"); |
||
2331 | return Exit; |
||
2332 | } |
||
2333 | |||
2334 | /// The after block is intended for clean-up code such as lifetime end |
||
2335 | /// markers. It is separate from the exit block to ensure, analogous to the |
||
2336 | /// preheader, it having just a single entry edge and being free from PHI |
||
2337 | /// nodes should there be multiple loop exits (such as from break |
||
2338 | /// statements/cancellations). |
||
2339 | BasicBlock *getAfter() const { |
||
2340 | assert(isValid() && "Requires a valid canonical loop"); |
||
2341 | return Exit->getSingleSuccessor(); |
||
2342 | } |
||
2343 | |||
2344 | /// Returns the llvm::Value containing the number of loop iterations. It must |
||
2345 | /// be valid in the preheader and always interpreted as an unsigned integer of |
||
2346 | /// any bit-width. |
||
2347 | Value *getTripCount() const { |
||
2348 | assert(isValid() && "Requires a valid canonical loop"); |
||
2349 | Instruction *CmpI = &Cond->front(); |
||
2350 | assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount"); |
||
2351 | return CmpI->getOperand(1); |
||
2352 | } |
||
2353 | |||
2354 | /// Returns the instruction representing the current logical induction |
||
2355 | /// variable. Always unsigned, always starting at 0 with an increment of one. |
||
2356 | Instruction *getIndVar() const { |
||
2357 | assert(isValid() && "Requires a valid canonical loop"); |
||
2358 | Instruction *IndVarPHI = &Header->front(); |
||
2359 | assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI"); |
||
2360 | return IndVarPHI; |
||
2361 | } |
||
2362 | |||
2363 | /// Return the type of the induction variable (and the trip count). |
||
2364 | Type *getIndVarType() const { |
||
2365 | assert(isValid() && "Requires a valid canonical loop"); |
||
2366 | return getIndVar()->getType(); |
||
2367 | } |
||
2368 | |||
2369 | /// Return the insertion point for user code before the loop. |
||
2370 | OpenMPIRBuilder::InsertPointTy getPreheaderIP() const { |
||
2371 | assert(isValid() && "Requires a valid canonical loop"); |
||
2372 | BasicBlock *Preheader = getPreheader(); |
||
2373 | return {Preheader, std::prev(Preheader->end())}; |
||
2374 | }; |
||
2375 | |||
2376 | /// Return the insertion point for user code in the body. |
||
2377 | OpenMPIRBuilder::InsertPointTy getBodyIP() const { |
||
2378 | assert(isValid() && "Requires a valid canonical loop"); |
||
2379 | BasicBlock *Body = getBody(); |
||
2380 | return {Body, Body->begin()}; |
||
2381 | }; |
||
2382 | |||
2383 | /// Return the insertion point for user code after the loop. |
||
2384 | OpenMPIRBuilder::InsertPointTy getAfterIP() const { |
||
2385 | assert(isValid() && "Requires a valid canonical loop"); |
||
2386 | BasicBlock *After = getAfter(); |
||
2387 | return {After, After->begin()}; |
||
2388 | }; |
||
2389 | |||
2390 | Function *getFunction() const { |
||
2391 | assert(isValid() && "Requires a valid canonical loop"); |
||
2392 | return Header->getParent(); |
||
2393 | } |
||
2394 | |||
2395 | /// Consistency self-check. |
||
2396 | void assertOK() const; |
||
2397 | |||
2398 | /// Invalidate this loop. That is, the underlying IR does not fulfill the |
||
2399 | /// requirements of an OpenMP canonical loop anymore. |
||
2400 | void invalidate(); |
||
2401 | }; |
||
2402 | |||
2403 | } // end namespace llvm |
||
2404 | |||
2405 | #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H |