Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
//===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the OpenMPIRBuilder class and helpers used as a convenient
10
// way to create LLVM instructions for OpenMP directives.
11
//
12
//===----------------------------------------------------------------------===//
13
 
14
#ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
15
#define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
16
 
17
#include "llvm/Analysis/MemorySSAUpdater.h"
18
#include "llvm/Frontend/OpenMP/OMPConstants.h"
19
#include "llvm/IR/DebugLoc.h"
20
#include "llvm/IR/IRBuilder.h"
21
#include "llvm/Support/Allocator.h"
22
#include <forward_list>
23
#include <map>
24
#include <optional>
25
 
26
namespace llvm {
27
class CanonicalLoopInfo;
28
struct TargetRegionEntryInfo;
29
class OffloadEntriesInfoManager;
30
 
31
/// Move the instruction after an InsertPoint to the beginning of another
32
/// BasicBlock.
33
///
34
/// The instructions after \p IP are moved to the beginning of \p New which must
35
/// not have any PHINodes. If \p CreateBranch is true, a branch instruction to
36
/// \p New will be added such that there is no semantic change. Otherwise, the
37
/// \p IP insert block remains degenerate and it is up to the caller to insert a
38
/// terminator.
39
void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New,
40
              bool CreateBranch);
41
 
42
/// Splice a BasicBlock at an IRBuilder's current insertion point. Its new
43
/// insert location will stick to after the instruction before the insertion
44
/// point (instead of moving with the instruction the InsertPoint stores
45
/// internally).
46
void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch);
47
 
48
/// Split a BasicBlock at an InsertPoint, even if the block is degenerate
49
/// (missing the terminator).
50
///
51
/// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed
52
/// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch
53
/// is true, a branch to the new successor will new created such that
54
/// semantically there is no change; otherwise the block of the insertion point
55
/// remains degenerate and it is the caller's responsibility to insert a
56
/// terminator. Returns the new successor block.
57
BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch,
58
                    llvm::Twine Name = {});
59
 
60
/// Split a BasicBlock at \p Builder's insertion point, even if the block is
61
/// degenerate (missing the terminator).  Its new insert location will stick to
62
/// after the instruction before the insertion point (instead of moving with the
63
/// instruction the InsertPoint stores internally).
64
BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch,
65
                    llvm::Twine Name = {});
66
 
67
/// Split a BasicBlock at \p Builder's insertion point, even if the block is
68
/// degenerate (missing the terminator).  Its new insert location will stick to
69
/// after the instruction before the insertion point (instead of moving with the
70
/// instruction the InsertPoint stores internally).
71
BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name);
72
 
73
/// Like splitBB, but reuses the current block's name for the new name.
74
BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
75
                              llvm::Twine Suffix = ".split");
76
 
77
/// Captures attributes that affect generating LLVM-IR using the
78
/// OpenMPIRBuilder and related classes. Note that not all attributes are
79
/// required for all classes or functions. In some use cases the configuration
80
/// is not necessary at all, because because the only functions that are called
81
/// are ones that are not dependent on the configuration.
82
class OpenMPIRBuilderConfig {
83
public:
84
  /// Flag for specifying if the compilation is done for embedded device code
85
  /// or host code.
86
  std::optional<bool> IsEmbedded;
87
 
88
  /// Flag for specifying if the compilation is done for an offloading target,
89
  /// like GPU.
90
  std::optional<bool> IsTargetCodegen;
91
 
92
  /// Flag for specifying weather a requires unified_shared_memory
93
  /// directive is present or not.
94
  std::optional<bool> HasRequiresUnifiedSharedMemory;
95
 
96
  // Flag for specifying if offloading is mandatory.
97
  std::optional<bool> OpenMPOffloadMandatory;
98
 
99
  /// First separator used between the initial two parts of a name.
100
  std::optional<StringRef> FirstSeparator;
101
  /// Separator used between all of the rest consecutive parts of s name
102
  std::optional<StringRef> Separator;
103
 
104
  OpenMPIRBuilderConfig() {}
105
  OpenMPIRBuilderConfig(bool IsEmbedded, bool IsTargetCodegen,
106
                        bool HasRequiresUnifiedSharedMemory,
107
                        bool OpenMPOffloadMandatory)
108
      : IsEmbedded(IsEmbedded), IsTargetCodegen(IsTargetCodegen),
109
        HasRequiresUnifiedSharedMemory(HasRequiresUnifiedSharedMemory),
110
        OpenMPOffloadMandatory(OpenMPOffloadMandatory) {}
111
 
112
  // Getters functions that assert if the required values are not present.
113
  bool isEmbedded() const {
114
    assert(IsEmbedded.has_value() && "IsEmbedded is not set");
115
    return *IsEmbedded;
116
  }
117
 
118
  bool isTargetCodegen() const {
119
    assert(IsTargetCodegen.has_value() && "IsTargetCodegen is not set");
120
    return *IsTargetCodegen;
121
  }
122
 
123
  bool hasRequiresUnifiedSharedMemory() const {
124
    assert(HasRequiresUnifiedSharedMemory.has_value() &&
125
           "HasUnifiedSharedMemory is not set");
126
    return *HasRequiresUnifiedSharedMemory;
127
  }
128
 
129
  bool openMPOffloadMandatory() const {
130
    assert(OpenMPOffloadMandatory.has_value() &&
131
           "OpenMPOffloadMandatory is not set");
132
    return *OpenMPOffloadMandatory;
133
  }
134
  // Returns the FirstSeparator if set, otherwise use the default
135
  // separator depending on isTargetCodegen
136
  StringRef firstSeparator() const {
137
    if (FirstSeparator.has_value())
138
      return *FirstSeparator;
139
    if (isTargetCodegen())
140
      return "_";
141
    return ".";
142
  }
143
 
144
  // Returns the Separator if set, otherwise use the default
145
  // separator depending on isTargetCodegen
146
  StringRef separator() const {
147
    if (Separator.has_value())
148
      return *Separator;
149
    if (isTargetCodegen())
150
      return "$";
151
    return ".";
152
  }
153
 
154
  void setIsEmbedded(bool Value) { IsEmbedded = Value; }
155
  void setIsTargetCodegen(bool Value) { IsTargetCodegen = Value; }
156
  void setHasRequiresUnifiedSharedMemory(bool Value) {
157
    HasRequiresUnifiedSharedMemory = Value;
158
  }
159
  void setFirstSeparator(StringRef FS) { FirstSeparator = FS; }
160
  void setSeparator(StringRef S) { Separator = S; }
161
};
162
 
163
/// An interface to create LLVM-IR for OpenMP directives.
164
///
165
/// Each OpenMP directive has a corresponding public generator method.
166
class OpenMPIRBuilder {
167
public:
168
  /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
169
  /// not have an effect on \p M (see initialize)
170
  OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {}
171
  ~OpenMPIRBuilder();
172
 
173
  /// Initialize the internal state, this will put structures types and
174
  /// potentially other helpers into the underlying module. Must be called
175
  /// before any other method and only once!
176
  void initialize();
177
 
178
  void setConfig(OpenMPIRBuilderConfig C) { Config = C; }
179
 
180
  /// Finalize the underlying module, e.g., by outlining regions.
181
  /// \param Fn                    The function to be finalized. If not used,
182
  ///                              all functions are finalized.
183
  void finalize(Function *Fn = nullptr);
184
 
185
  /// Add attributes known for \p FnID to \p Fn.
186
  void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
187
 
188
  /// Type used throughout for insertion points.
189
  using InsertPointTy = IRBuilder<>::InsertPoint;
190
 
191
  /// Get the create a name using the platform specific separators.
192
  /// \param Parts parts of the final name that needs separation
193
  /// The created name has a first separator between the first and second part
194
  /// and a second separator between all other parts.
195
  /// E.g. with FirstSeparator "$" and Separator "." and
196
  /// parts: "p1", "p2", "p3", "p4"
197
  /// The resulting name is "p1$p2.p3.p4"
198
  /// The separators are retrieved from the OpenMPIRBuilderConfig.
199
  std::string createPlatformSpecificName(ArrayRef<StringRef> Parts) const;
200
 
201
  /// Callback type for variable finalization (think destructors).
202
  ///
203
  /// \param CodeGenIP is the insertion point at which the finalization code
204
  ///                  should be placed.
205
  ///
206
  /// A finalize callback knows about all objects that need finalization, e.g.
207
  /// destruction, when the scope of the currently generated construct is left
208
  /// at the time, and location, the callback is invoked.
209
  using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
210
 
211
  struct FinalizationInfo {
212
    /// The finalization callback provided by the last in-flight invocation of
213
    /// createXXXX for the directive of kind DK.
214
    FinalizeCallbackTy FiniCB;
215
 
216
    /// The directive kind of the innermost directive that has an associated
217
    /// region which might require finalization when it is left.
218
    omp::Directive DK;
219
 
220
    /// Flag to indicate if the directive is cancellable.
221
    bool IsCancellable;
222
  };
223
 
224
  /// Push a finalization callback on the finalization stack.
225
  ///
226
  /// NOTE: Temporary solution until Clang CG is gone.
227
  void pushFinalizationCB(const FinalizationInfo &FI) {
228
    FinalizationStack.push_back(FI);
229
  }
230
 
231
  /// Pop the last finalization callback from the finalization stack.
232
  ///
233
  /// NOTE: Temporary solution until Clang CG is gone.
234
  void popFinalizationCB() { FinalizationStack.pop_back(); }
235
 
236
  /// Callback type for body (=inner region) code generation
237
  ///
238
  /// The callback takes code locations as arguments, each describing a
239
  /// location where additional instructions can be inserted.
240
  ///
241
  /// The CodeGenIP may be in the middle of a basic block or point to the end of
242
  /// it. The basic block may have a terminator or be degenerate. The callback
243
  /// function may just insert instructions at that position, but also split the
244
  /// block (without the Before argument of BasicBlock::splitBasicBlock such
245
  /// that the identify of the split predecessor block is preserved) and insert
246
  /// additional control flow, including branches that do not lead back to what
247
  /// follows the CodeGenIP. Note that since the callback is allowed to split
248
  /// the block, callers must assume that InsertPoints to positions in the
249
  /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If
250
  /// such InsertPoints need to be preserved, it can split the block itself
251
  /// before calling the callback.
252
  ///
253
  /// AllocaIP and CodeGenIP must not point to the same position.
254
  ///
255
  /// \param AllocaIP is the insertion point at which new alloca instructions
256
  ///                 should be placed. The BasicBlock it is pointing to must
257
  ///                 not be split.
258
  /// \param CodeGenIP is the insertion point at which the body code should be
259
  ///                  placed.
260
  using BodyGenCallbackTy =
261
      function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
262
 
263
  // This is created primarily for sections construct as llvm::function_ref
264
  // (BodyGenCallbackTy) is not storable (as described in the comments of
265
  // function_ref class - function_ref contains non-ownable reference
266
  // to the callable.
267
  using StorableBodyGenCallbackTy =
268
      std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
269
 
270
  /// Callback type for loop body code generation.
271
  ///
272
  /// \param CodeGenIP is the insertion point where the loop's body code must be
273
  ///                  placed. This will be a dedicated BasicBlock with a
274
  ///                  conditional branch from the loop condition check and
275
  ///                  terminated with an unconditional branch to the loop
276
  ///                  latch.
277
  /// \param IndVar    is the induction variable usable at the insertion point.
278
  using LoopBodyGenCallbackTy =
279
      function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
280
 
281
  /// Callback type for variable privatization (think copy & default
282
  /// constructor).
283
  ///
284
  /// \param AllocaIP is the insertion point at which new alloca instructions
285
  ///                 should be placed.
286
  /// \param CodeGenIP is the insertion point at which the privatization code
287
  ///                  should be placed.
288
  /// \param Original The value being copied/created, should not be used in the
289
  ///                 generated IR.
290
  /// \param Inner The equivalent of \p Original that should be used in the
291
  ///              generated IR; this is equal to \p Original if the value is
292
  ///              a pointer and can thus be passed directly, otherwise it is
293
  ///              an equivalent but different value.
294
  /// \param ReplVal The replacement value, thus a copy or new created version
295
  ///                of \p Inner.
296
  ///
297
  /// \returns The new insertion point where code generation continues and
298
  ///          \p ReplVal the replacement value.
299
  using PrivatizeCallbackTy = function_ref<InsertPointTy(
300
      InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
301
      Value &Inner, Value *&ReplVal)>;
302
 
303
  /// Description of a LLVM-IR insertion point (IP) and a debug/source location
304
  /// (filename, line, column, ...).
305
  struct LocationDescription {
306
    LocationDescription(const IRBuilderBase &IRB)
307
        : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
308
    LocationDescription(const InsertPointTy &IP) : IP(IP) {}
309
    LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
310
        : IP(IP), DL(DL) {}
311
    InsertPointTy IP;
312
    DebugLoc DL;
313
  };
314
 
315
  /// Emitter methods for OpenMP directives.
316
  ///
317
  ///{
318
 
319
  /// Generator for '#omp barrier'
320
  ///
321
  /// \param Loc The location where the barrier directive was encountered.
322
  /// \param DK The kind of directive that caused the barrier.
323
  /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
324
  /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
325
  ///                        should be checked and acted upon.
326
  ///
327
  /// \returns The insertion point after the barrier.
328
  InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
329
                              bool ForceSimpleCall = false,
330
                              bool CheckCancelFlag = true);
331
 
332
  /// Generator for '#omp cancel'
333
  ///
334
  /// \param Loc The location where the directive was encountered.
335
  /// \param IfCondition The evaluated 'if' clause expression, if any.
336
  /// \param CanceledDirective The kind of directive that is cancled.
337
  ///
338
  /// \returns The insertion point after the barrier.
339
  InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
340
                             omp::Directive CanceledDirective);
341
 
342
  /// Generator for '#omp parallel'
343
  ///
344
  /// \param Loc The insert and source location description.
345
  /// \param AllocaIP The insertion points to be used for alloca instructions.
346
  /// \param BodyGenCB Callback that will generate the region code.
347
  /// \param PrivCB Callback to copy a given variable (think copy constructor).
348
  /// \param FiniCB Callback to finalize variable copies.
349
  /// \param IfCondition The evaluated 'if' clause expression, if any.
350
  /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
351
  /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
352
  /// \param IsCancellable Flag to indicate a cancellable parallel region.
353
  ///
354
  /// \returns The insertion position *after* the parallel.
355
  IRBuilder<>::InsertPoint
356
  createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
357
                 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
358
                 FinalizeCallbackTy FiniCB, Value *IfCondition,
359
                 Value *NumThreads, omp::ProcBindKind ProcBind,
360
                 bool IsCancellable);
361
 
362
  /// Generator for the control flow structure of an OpenMP canonical loop.
363
  ///
364
  /// This generator operates on the logical iteration space of the loop, i.e.
365
  /// the caller only has to provide a loop trip count of the loop as defined by
366
  /// base language semantics. The trip count is interpreted as an unsigned
367
  /// integer. The induction variable passed to \p BodyGenCB will be of the same
368
  /// type and run from 0 to \p TripCount - 1. It is up to the callback to
369
  /// convert the logical iteration variable to the loop counter variable in the
370
  /// loop body.
371
  ///
372
  /// \param Loc       The insert and source location description. The insert
373
  ///                  location can be between two instructions or the end of a
374
  ///                  degenerate block (e.g. a BB under construction).
375
  /// \param BodyGenCB Callback that will generate the loop body code.
376
  /// \param TripCount Number of iterations the loop body is executed.
377
  /// \param Name      Base name used to derive BB and instruction names.
378
  ///
379
  /// \returns An object representing the created control flow structure which
380
  ///          can be used for loop-associated directives.
381
  CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
382
                                         LoopBodyGenCallbackTy BodyGenCB,
383
                                         Value *TripCount,
384
                                         const Twine &Name = "loop");
385
 
386
  /// Generator for the control flow structure of an OpenMP canonical loop.
387
  ///
388
  /// Instead of a logical iteration space, this allows specifying user-defined
389
  /// loop counter values using increment, upper- and lower bounds. To
390
  /// disambiguate the terminology when counting downwards, instead of lower
391
  /// bounds we use \p Start for the loop counter value in the first body
392
  /// iteration.
393
  ///
394
  /// Consider the following limitations:
395
  ///
396
  ///  * A loop counter space over all integer values of its bit-width cannot be
397
  ///    represented. E.g using uint8_t, its loop trip count of 256 cannot be
398
  ///    stored into an 8 bit integer):
399
  ///
400
  ///      DO I = 0, 255, 1
401
  ///
402
  ///  * Unsigned wrapping is only supported when wrapping only "once"; E.g.
403
  ///    effectively counting downwards:
404
  ///
405
  ///      for (uint8_t i = 100u; i > 0; i += 127u)
406
  ///
407
  ///
408
  /// TODO: May need to add additional parameters to represent:
409
  ///
410
  ///  * Allow representing downcounting with unsigned integers.
411
  ///
412
  ///  * Sign of the step and the comparison operator might disagree:
413
  ///
414
  ///      for (int i = 0; i < 42; i -= 1u)
415
  ///
416
  //
417
  /// \param Loc       The insert and source location description.
418
  /// \param BodyGenCB Callback that will generate the loop body code.
419
  /// \param Start     Value of the loop counter for the first iterations.
420
  /// \param Stop      Loop counter values past this will stop the loop.
421
  /// \param Step      Loop counter increment after each iteration; negative
422
  ///                  means counting down.
423
  /// \param IsSigned  Whether Start, Stop and Step are signed integers.
424
  /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
425
  ///                      counter.
426
  /// \param ComputeIP Insertion point for instructions computing the trip
427
  ///                  count. Can be used to ensure the trip count is available
428
  ///                  at the outermost loop of a loop nest. If not set,
429
  ///                  defaults to the preheader of the generated loop.
430
  /// \param Name      Base name used to derive BB and instruction names.
431
  ///
432
  /// \returns An object representing the created control flow structure which
433
  ///          can be used for loop-associated directives.
434
  CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
435
                                         LoopBodyGenCallbackTy BodyGenCB,
436
                                         Value *Start, Value *Stop, Value *Step,
437
                                         bool IsSigned, bool InclusiveStop,
438
                                         InsertPointTy ComputeIP = {},
439
                                         const Twine &Name = "loop");
440
 
441
  /// Collapse a loop nest into a single loop.
442
  ///
443
  /// Merges loops of a loop nest into a single CanonicalLoopNest representation
444
  /// that has the same number of innermost loop iterations as the origin loop
445
  /// nest. The induction variables of the input loops are derived from the
446
  /// collapsed loop's induction variable. This is intended to be used to
447
  /// implement OpenMP's collapse clause. Before applying a directive,
448
  /// collapseLoops normalizes a loop nest to contain only a single loop and the
449
  /// directive's implementation does not need to handle multiple loops itself.
450
  /// This does not remove the need to handle all loop nest handling by
451
  /// directives, such as the ordered(<n>) clause or the simd schedule-clause
452
  /// modifier of the worksharing-loop directive.
453
  ///
454
  /// Example:
455
  /// \code
456
  ///   for (int i = 0; i < 7; ++i) // Canonical loop "i"
457
  ///     for (int j = 0; j < 9; ++j) // Canonical loop "j"
458
  ///       body(i, j);
459
  /// \endcode
460
  ///
461
  /// After collapsing with Loops={i,j}, the loop is changed to
462
  /// \code
463
  ///   for (int ij = 0; ij < 63; ++ij) {
464
  ///     int i = ij / 9;
465
  ///     int j = ij % 9;
466
  ///     body(i, j);
467
  ///   }
468
  /// \endcode
469
  ///
470
  /// In the current implementation, the following limitations apply:
471
  ///
472
  ///  * All input loops have an induction variable of the same type.
473
  ///
474
  ///  * The collapsed loop will have the same trip count integer type as the
475
  ///    input loops. Therefore it is possible that the collapsed loop cannot
476
  ///    represent all iterations of the input loops. For instance, assuming a
477
  ///    32 bit integer type, and two input loops both iterating 2^16 times, the
478
  ///    theoretical trip count of the collapsed loop would be 2^32 iteration,
479
  ///    which cannot be represented in an 32-bit integer. Behavior is undefined
480
  ///    in this case.
481
  ///
482
  ///  * The trip counts of every input loop must be available at \p ComputeIP.
483
  ///    Non-rectangular loops are not yet supported.
484
  ///
485
  ///  * At each nest level, code between a surrounding loop and its nested loop
486
  ///    is hoisted into the loop body, and such code will be executed more
487
  ///    often than before collapsing (or not at all if any inner loop iteration
488
  ///    has a trip count of 0). This is permitted by the OpenMP specification.
489
  ///
490
  /// \param DL        Debug location for instructions added for collapsing,
491
  ///                  such as instructions to compute/derive the input loop's
492
  ///                  induction variables.
493
  /// \param Loops     Loops in the loop nest to collapse. Loops are specified
494
  ///                  from outermost-to-innermost and every control flow of a
495
  ///                  loop's body must pass through its directly nested loop.
496
  /// \param ComputeIP Where additional instruction that compute the collapsed
497
  ///                  trip count. If not set, defaults to before the generated
498
  ///                  loop.
499
  ///
500
  /// \returns The CanonicalLoopInfo object representing the collapsed loop.
501
  CanonicalLoopInfo *collapseLoops(DebugLoc DL,
502
                                   ArrayRef<CanonicalLoopInfo *> Loops,
503
                                   InsertPointTy ComputeIP);
504
 
505
private:
506
  /// Modifies the canonical loop to be a statically-scheduled workshare loop.
507
  ///
508
  /// This takes a \p LoopInfo representing a canonical loop, such as the one
509
  /// created by \p createCanonicalLoop and emits additional instructions to
510
  /// turn it into a workshare loop. In particular, it calls to an OpenMP
511
  /// runtime function in the preheader to obtain the loop bounds to be used in
512
  /// the current thread, updates the relevant instructions in the canonical
513
  /// loop and calls to an OpenMP runtime finalization function after the loop.
514
  ///
515
  /// \param DL       Debug location for instructions added for the
516
  ///                 workshare-loop construct itself.
517
  /// \param CLI      A descriptor of the canonical loop to workshare.
518
  /// \param AllocaIP An insertion point for Alloca instructions usable in the
519
  ///                 preheader of the loop.
520
  /// \param NeedsBarrier Indicates whether a barrier must be inserted after
521
  ///                     the loop.
522
  ///
523
  /// \returns Point where to insert code after the workshare construct.
524
  InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
525
                                         InsertPointTy AllocaIP,
526
                                         bool NeedsBarrier);
527
 
528
  /// Modifies the canonical loop a statically-scheduled workshare loop with a
529
  /// user-specified chunk size.
530
  ///
531
  /// \param DL           Debug location for instructions added for the
532
  ///                     workshare-loop construct itself.
533
  /// \param CLI          A descriptor of the canonical loop to workshare.
534
  /// \param AllocaIP     An insertion point for Alloca instructions usable in
535
  ///                     the preheader of the loop.
536
  /// \param NeedsBarrier Indicates whether a barrier must be inserted after the
537
  ///                     loop.
538
  /// \param ChunkSize    The user-specified chunk size.
539
  ///
540
  /// \returns Point where to insert code after the workshare construct.
541
  InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL,
542
                                                CanonicalLoopInfo *CLI,
543
                                                InsertPointTy AllocaIP,
544
                                                bool NeedsBarrier,
545
                                                Value *ChunkSize);
546
 
547
  /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
548
  ///
549
  /// This takes a \p LoopInfo representing a canonical loop, such as the one
550
  /// created by \p createCanonicalLoop and emits additional instructions to
551
  /// turn it into a workshare loop. In particular, it calls to an OpenMP
552
  /// runtime function in the preheader to obtain, and then in each iteration
553
  /// to update the loop counter.
554
  ///
555
  /// \param DL       Debug location for instructions added for the
556
  ///                 workshare-loop construct itself.
557
  /// \param CLI      A descriptor of the canonical loop to workshare.
558
  /// \param AllocaIP An insertion point for Alloca instructions usable in the
559
  ///                 preheader of the loop.
560
  /// \param SchedType Type of scheduling to be passed to the init function.
561
  /// \param NeedsBarrier Indicates whether a barrier must be insterted after
562
  ///                     the loop.
563
  /// \param Chunk    The size of loop chunk considered as a unit when
564
  ///                 scheduling. If \p nullptr, defaults to 1.
565
  ///
566
  /// \returns Point where to insert code after the workshare construct.
567
  InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
568
                                          InsertPointTy AllocaIP,
569
                                          omp::OMPScheduleType SchedType,
570
                                          bool NeedsBarrier,
571
                                          Value *Chunk = nullptr);
572
 
573
  /// Create alternative version of the loop to support if clause
574
  ///
575
  /// OpenMP if clause can require to generate second loop. This loop
576
  /// will be executed when if clause condition is not met. createIfVersion
577
  /// adds branch instruction to the copied loop if \p  ifCond is not met.
578
  ///
579
  /// \param Loop       Original loop which should be versioned.
580
  /// \param IfCond     Value which corresponds to if clause condition
581
  /// \param VMap       Value to value map to define relation between
582
  ///                   original and copied loop values and loop blocks.
583
  /// \param NamePrefix Optional name prefix for if.then if.else blocks.
584
  void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond,
585
                       ValueToValueMapTy &VMap, const Twine &NamePrefix = "");
586
 
587
public:
588
  /// Modifies the canonical loop to be a workshare loop.
589
  ///
590
  /// This takes a \p LoopInfo representing a canonical loop, such as the one
591
  /// created by \p createCanonicalLoop and emits additional instructions to
592
  /// turn it into a workshare loop. In particular, it calls to an OpenMP
593
  /// runtime function in the preheader to obtain the loop bounds to be used in
594
  /// the current thread, updates the relevant instructions in the canonical
595
  /// loop and calls to an OpenMP runtime finalization function after the loop.
596
  ///
597
  /// The concrete transformation is done by applyStaticWorkshareLoop,
598
  /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending
599
  /// on the value of \p SchedKind and \p ChunkSize.
600
  ///
601
  /// \param DL       Debug location for instructions added for the
602
  ///                 workshare-loop construct itself.
603
  /// \param CLI      A descriptor of the canonical loop to workshare.
604
  /// \param AllocaIP An insertion point for Alloca instructions usable in the
605
  ///                 preheader of the loop.
606
  /// \param NeedsBarrier Indicates whether a barrier must be insterted after
607
  ///                     the loop.
608
  /// \param SchedKind Scheduling algorithm to use.
609
  /// \param ChunkSize The chunk size for the inner loop.
610
  /// \param HasSimdModifier Whether the simd modifier is present in the
611
  ///                        schedule clause.
612
  /// \param HasMonotonicModifier Whether the monotonic modifier is present in
613
  ///                             the schedule clause.
614
  /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is
615
  ///                                present in the schedule clause.
616
  /// \param HasOrderedClause Whether the (parameterless) ordered clause is
617
  ///                         present.
618
  ///
619
  /// \returns Point where to insert code after the workshare construct.
620
  InsertPointTy applyWorkshareLoop(
621
      DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
622
      bool NeedsBarrier,
623
      llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
624
      Value *ChunkSize = nullptr, bool HasSimdModifier = false,
625
      bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
626
      bool HasOrderedClause = false);
627
 
628
  /// Tile a loop nest.
629
  ///
630
  /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
631
  /// \p/ Loops must be perfectly nested, from outermost to innermost loop
632
  /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
633
  /// of every loop and every tile sizes must be usable in the outermost
634
  /// loop's preheader. This implies that the loop nest is rectangular.
635
  ///
636
  /// Example:
637
  /// \code
638
  ///   for (int i = 0; i < 15; ++i) // Canonical loop "i"
639
  ///     for (int j = 0; j < 14; ++j) // Canonical loop "j"
640
  ///         body(i, j);
641
  /// \endcode
642
  ///
643
  /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
644
  /// \code
645
  ///   for (int i1 = 0; i1 < 3; ++i1)
646
  ///     for (int j1 = 0; j1 < 2; ++j1)
647
  ///       for (int i2 = 0; i2 < 5; ++i2)
648
  ///         for (int j2 = 0; j2 < 7; ++j2)
649
  ///           body(i1*3+i2, j1*3+j2);
650
  /// \endcode
651
  ///
652
  /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
653
  /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
654
  /// handles non-constant trip counts, non-constant tile sizes and trip counts
655
  /// that are not multiples of the tile size. In the latter case the tile loop
656
  /// of the last floor-loop iteration will have fewer iterations than specified
657
  /// as its tile size.
658
  ///
659
  ///
660
  /// @param DL        Debug location for instructions added by tiling, for
661
  ///                  instance the floor- and tile trip count computation.
662
  /// @param Loops     Loops to tile. The CanonicalLoopInfo objects are
663
  ///                  invalidated by this method, i.e. should not used after
664
  ///                  tiling.
665
  /// @param TileSizes For each loop in \p Loops, the tile size for that
666
  ///                  dimensions.
667
  ///
668
  /// \returns A list of generated loops. Contains twice as many loops as the
669
  ///          input loop nest; the first half are the floor loops and the
670
  ///          second half are the tile loops.
671
  std::vector<CanonicalLoopInfo *>
672
  tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
673
            ArrayRef<Value *> TileSizes);
674
 
675
  /// Fully unroll a loop.
676
  ///
677
  /// Instead of unrolling the loop immediately (and duplicating its body
678
  /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
679
  /// metadata.
680
  ///
681
  /// \param DL   Debug location for instructions added by unrolling.
682
  /// \param Loop The loop to unroll. The loop will be invalidated.
683
  void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop);
684
 
685
  /// Fully or partially unroll a loop. How the loop is unrolled is determined
686
  /// using LLVM's LoopUnrollPass.
687
  ///
688
  /// \param DL   Debug location for instructions added by unrolling.
689
  /// \param Loop The loop to unroll. The loop will be invalidated.
690
  void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop);
691
 
692
  /// Partially unroll a loop.
693
  ///
694
  /// The CanonicalLoopInfo of the unrolled loop for use with chained
695
  /// loop-associated directive can be requested using \p UnrolledCLI. Not
696
  /// needing the CanonicalLoopInfo allows more efficient code generation by
697
  /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
698
  /// A loop-associated directive applied to the unrolled loop needs to know the
699
  /// new trip count which means that if using a heuristically determined unroll
700
  /// factor (\p Factor == 0), that factor must be computed immediately. We are
701
  /// using the same logic as the LoopUnrollPass to derived the unroll factor,
702
  /// but which assumes that some canonicalization has taken place (e.g.
703
  /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
704
  /// better when the unrolled loop's CanonicalLoopInfo is not needed.
705
  ///
706
  /// \param DL          Debug location for instructions added by unrolling.
707
  /// \param Loop        The loop to unroll. The loop will be invalidated.
708
  /// \param Factor      The factor to unroll the loop by. A factor of 0
709
  ///                    indicates that a heuristic should be used to determine
710
  ///                    the unroll-factor.
711
  /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
712
  ///                    partially unrolled loop. Otherwise, uses loop metadata
713
  ///                    to defer unrolling to the LoopUnrollPass.
714
  void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
715
                         CanonicalLoopInfo **UnrolledCLI);
716
 
717
  /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop
718
  /// is cloned. The metadata which prevents vectorization is added to
719
  /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
720
  /// to false.
721
  ///
722
  /// \param Loop        The loop to simd-ize.
723
  /// \param AlignedVars The map which containts pairs of the pointer
724
  ///                    and its corresponding alignment.
725
  /// \param IfCond      The value which corresponds to the if clause
726
  ///                    condition.
727
  /// \param Order       The enum to map order clause.
728
  /// \param Simdlen     The Simdlen length to apply to the simd loop.
729
  /// \param Safelen     The Safelen length to apply to the simd loop.
730
  void applySimd(CanonicalLoopInfo *Loop,
731
                 MapVector<Value *, Value *> AlignedVars, Value *IfCond,
732
                 omp::OrderKind Order, ConstantInt *Simdlen,
733
                 ConstantInt *Safelen);
734
 
735
  /// Generator for '#omp flush'
736
  ///
737
  /// \param Loc The location where the flush directive was encountered
738
  void createFlush(const LocationDescription &Loc);
739
 
740
  /// Generator for '#omp taskwait'
741
  ///
742
  /// \param Loc The location where the taskwait directive was encountered.
743
  void createTaskwait(const LocationDescription &Loc);
744
 
745
  /// Generator for '#omp taskyield'
746
  ///
747
  /// \param Loc The location where the taskyield directive was encountered.
748
  void createTaskyield(const LocationDescription &Loc);
749
 
750
  /// A struct to pack the relevant information for an OpenMP depend clause.
751
  struct DependData {
752
    omp::RTLDependenceKindTy DepKind = omp::RTLDependenceKindTy::DepUnknown;
753
    Type *DepValueType;
754
    Value *DepVal;
755
    explicit DependData() = default;
756
    DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType,
757
               Value *DepVal)
758
        : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {}
759
  };
760
 
761
  /// Generator for `#omp task`
762
  ///
763
  /// \param Loc The location where the task construct was encountered.
764
  /// \param AllocaIP The insertion point to be used for alloca instructions.
765
  /// \param BodyGenCB Callback that will generate the region code.
766
  /// \param Tied True if the task is tied, false if the task is untied.
767
  /// \param Final i1 value which is `true` if the task is final, `false` if the
768
  ///              task is not final.
769
  /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred
770
  ///                    task is generated, and the encountering thread must
771
  ///                    suspend the current task region, for which execution
772
  ///                    cannot be resumed until execution of the structured
773
  ///                    block that is associated with the generated task is
774
  ///                    completed.
775
  InsertPointTy createTask(const LocationDescription &Loc,
776
                           InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
777
                           bool Tied = true, Value *Final = nullptr,
778
                           Value *IfCondition = nullptr,
779
                           SmallVector<DependData> Dependencies = {});
780
 
781
  /// Generator for the taskgroup construct
782
  ///
783
  /// \param Loc The location where the taskgroup construct was encountered.
784
  /// \param AllocaIP The insertion point to be used for alloca instructions.
785
  /// \param BodyGenCB Callback that will generate the region code.
786
  InsertPointTy createTaskgroup(const LocationDescription &Loc,
787
                                InsertPointTy AllocaIP,
788
                                BodyGenCallbackTy BodyGenCB);
789
 
790
  /// Functions used to generate reductions. Such functions take two Values
791
  /// representing LHS and RHS of the reduction, respectively, and a reference
792
  /// to the value that is updated to refer to the reduction result.
793
  using ReductionGenTy =
794
      function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>;
795
 
796
  /// Functions used to generate atomic reductions. Such functions take two
797
  /// Values representing pointers to LHS and RHS of the reduction, as well as
798
  /// the element type of these pointers. They are expected to atomically
799
  /// update the LHS to the reduced value.
800
  using AtomicReductionGenTy =
801
      function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>;
802
 
803
  /// Information about an OpenMP reduction.
804
  struct ReductionInfo {
805
    ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable,
806
                  ReductionGenTy ReductionGen,
807
                  AtomicReductionGenTy AtomicReductionGen)
808
        : ElementType(ElementType), Variable(Variable),
809
          PrivateVariable(PrivateVariable), ReductionGen(ReductionGen),
810
          AtomicReductionGen(AtomicReductionGen) {
811
      assert(cast<PointerType>(Variable->getType())
812
          ->isOpaqueOrPointeeTypeMatches(ElementType) && "Invalid elem type");
813
    }
814
 
815
    /// Reduction element type, must match pointee type of variable.
816
    Type *ElementType;
817
 
818
    /// Reduction variable of pointer type.
819
    Value *Variable;
820
 
821
    /// Thread-private partial reduction variable.
822
    Value *PrivateVariable;
823
 
824
    /// Callback for generating the reduction body. The IR produced by this will
825
    /// be used to combine two values in a thread-safe context, e.g., under
826
    /// lock or within the same thread, and therefore need not be atomic.
827
    ReductionGenTy ReductionGen;
828
 
829
    /// Callback for generating the atomic reduction body, may be null. The IR
830
    /// produced by this will be used to atomically combine two values during
831
    /// reduction. If null, the implementation will use the non-atomic version
832
    /// along with the appropriate synchronization mechanisms.
833
    AtomicReductionGenTy AtomicReductionGen;
834
  };
835
 
836
  // TODO: provide atomic and non-atomic reduction generators for reduction
837
  // operators defined by the OpenMP specification.
838
 
839
  /// Generator for '#omp reduction'.
840
  ///
841
  /// Emits the IR instructing the runtime to perform the specific kind of
842
  /// reductions. Expects reduction variables to have been privatized and
843
  /// initialized to reduction-neutral values separately. Emits the calls to
844
  /// runtime functions as well as the reduction function and the basic blocks
845
  /// performing the reduction atomically and non-atomically.
846
  ///
847
  /// The code emitted for the following:
848
  ///
849
  /// \code
850
  ///   type var_1;
851
  ///   type var_2;
852
  ///   #pragma omp <directive> reduction(reduction-op:var_1,var_2)
853
  ///   /* body */;
854
  /// \endcode
855
  ///
856
  /// corresponds to the following sketch.
857
  ///
858
  /// \code
859
  /// void _outlined_par() {
860
  ///   // N is the number of different reductions.
861
  ///   void *red_array[] = {privatized_var_1, privatized_var_2, ...};
862
  ///   switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
863
  ///                        _omp_reduction_func,
864
  ///                        _gomp_critical_user.reduction.var)) {
865
  ///   case 1: {
866
  ///     var_1 = var_1 <reduction-op> privatized_var_1;
867
  ///     var_2 = var_2 <reduction-op> privatized_var_2;
868
  ///     // ...
869
  ///    __kmpc_end_reduce(...);
870
  ///     break;
871
  ///   }
872
  ///   case 2: {
873
  ///     _Atomic<ReductionOp>(var_1, privatized_var_1);
874
  ///     _Atomic<ReductionOp>(var_2, privatized_var_2);
875
  ///     // ...
876
  ///     break;
877
  ///   }
878
  ///   default: break;
879
  ///   }
880
  /// }
881
  ///
882
  /// void _omp_reduction_func(void **lhs, void **rhs) {
883
  ///   *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
884
  ///   *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
885
  ///   // ...
886
  /// }
887
  /// \endcode
888
  ///
889
  /// \param Loc                The location where the reduction was
890
  ///                           encountered. Must be within the associate
891
  ///                           directive and after the last local access to the
892
  ///                           reduction variables.
893
  /// \param AllocaIP           An insertion point suitable for allocas usable
894
  ///                           in reductions.
895
  /// \param ReductionInfos     A list of info on each reduction variable.
896
  /// \param IsNoWait           A flag set if the reduction is marked as nowait.
897
  InsertPointTy createReductions(const LocationDescription &Loc,
898
                                 InsertPointTy AllocaIP,
899
                                 ArrayRef<ReductionInfo> ReductionInfos,
900
                                 bool IsNoWait = false);
901
 
902
  ///}
903
 
904
  /// Return the insertion point used by the underlying IRBuilder.
905
  InsertPointTy getInsertionPoint() { return Builder.saveIP(); }
906
 
907
  /// Update the internal location to \p Loc.
908
  bool updateToLocation(const LocationDescription &Loc) {
909
    Builder.restoreIP(Loc.IP);
910
    Builder.SetCurrentDebugLocation(Loc.DL);
911
    return Loc.IP.getBlock() != nullptr;
912
  }
913
 
914
  /// Return the function declaration for the runtime function with \p FnID.
915
  FunctionCallee getOrCreateRuntimeFunction(Module &M,
916
                                            omp::RuntimeFunction FnID);
917
 
918
  Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID);
919
 
920
  /// Return the (LLVM-IR) string describing the source location \p LocStr.
921
  Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize);
922
 
923
  /// Return the (LLVM-IR) string describing the default source location.
924
  Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize);
925
 
926
  /// Return the (LLVM-IR) string describing the source location identified by
927
  /// the arguments.
928
  Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
929
                                 unsigned Line, unsigned Column,
930
                                 uint32_t &SrcLocStrSize);
931
 
932
  /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
933
  /// fallback if \p DL does not specify the function name.
934
  Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize,
935
                                 Function *F = nullptr);
936
 
937
  /// Return the (LLVM-IR) string describing the source location \p Loc.
938
  Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
939
                                 uint32_t &SrcLocStrSize);
940
 
941
  /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
942
  /// TODO: Create a enum class for the Reserve2Flags
943
  Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize,
944
                             omp::IdentFlag Flags = omp::IdentFlag(0),
945
                             unsigned Reserve2Flags = 0);
946
 
947
  /// Create a hidden global flag \p Name in the module with initial value \p
948
  /// Value.
949
  GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
950
 
951
  /// Create an offloading section struct used to register this global at
952
  /// runtime.
953
  ///
954
  /// Type struct __tgt_offload_entry{
955
  ///   void    *addr;      // Pointer to the offload entry info.
956
  ///                       // (function or global)
957
  ///   char    *name;      // Name of the function or global.
958
  ///   size_t  size;       // Size of the entry info (0 if it a function).
959
  ///   int32_t flags;
960
  ///   int32_t reserved;
961
  /// };
962
  ///
963
  /// \param Addr The pointer to the global being registered.
964
  /// \param Name The symbol name associated with the global.
965
  /// \param Size The size in bytes of the global (0 for functions).
966
  /// \param Flags Flags associated with the entry.
967
  /// \param SectionName The section this entry will be placed at.
968
  void emitOffloadingEntry(Constant *Addr, StringRef Name, uint64_t Size,
969
                           int32_t Flags,
970
                           StringRef SectionName = "omp_offloading_entries");
971
 
972
  /// Generate control flow and cleanup for cancellation.
973
  ///
974
  /// \param CancelFlag Flag indicating if the cancellation is performed.
975
  /// \param CanceledDirective The kind of directive that is cancled.
976
  /// \param ExitCB Extra code to be generated in the exit block.
977
  void emitCancelationCheckImpl(Value *CancelFlag,
978
                                omp::Directive CanceledDirective,
979
                                FinalizeCallbackTy ExitCB = {});
980
 
981
  /// Generate a target region entry call.
982
  ///
983
  /// \param Loc The location at which the request originated and is fulfilled.
984
  /// \param Return Return value of the created function returned by reference.
985
  /// \param DeviceID Identifier for the device via the 'device' clause.
986
  /// \param NumTeams Numer of teams for the region via the 'num_teams' clause
987
  ///                 or 0 if unspecified and -1 if there is no 'teams' clause.
988
  /// \param NumThreads Number of threads via the 'thread_limit' clause.
989
  /// \param HostPtr Pointer to the host-side pointer of the target kernel.
990
  /// \param KernelArgs Array of arguments to the kernel.
991
  InsertPointTy emitTargetKernel(const LocationDescription &Loc, Value *&Return,
992
                                 Value *Ident, Value *DeviceID, Value *NumTeams,
993
                                 Value *NumThreads, Value *HostPtr,
994
                                 ArrayRef<Value *> KernelArgs);
995
 
996
  /// Generate a barrier runtime call.
997
  ///
998
  /// \param Loc The location at which the request originated and is fulfilled.
999
  /// \param DK The directive which caused the barrier
1000
  /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
1001
  /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
1002
  ///                        should be checked and acted upon.
1003
  ///
1004
  /// \returns The insertion point after the barrier.
1005
  InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
1006
                                omp::Directive DK, bool ForceSimpleCall,
1007
                                bool CheckCancelFlag);
1008
 
1009
  /// Generate a flush runtime call.
1010
  ///
1011
  /// \param Loc The location at which the request originated and is fulfilled.
1012
  void emitFlush(const LocationDescription &Loc);
1013
 
1014
  /// The finalization stack made up of finalize callbacks currently in-flight,
1015
  /// wrapped into FinalizationInfo objects that reference also the finalization
1016
  /// target block and the kind of cancellable directive.
1017
  SmallVector<FinalizationInfo, 8> FinalizationStack;
1018
 
1019
  /// Return true if the last entry in the finalization stack is of kind \p DK
1020
  /// and cancellable.
1021
  bool isLastFinalizationInfoCancellable(omp::Directive DK) {
1022
    return !FinalizationStack.empty() &&
1023
           FinalizationStack.back().IsCancellable &&
1024
           FinalizationStack.back().DK == DK;
1025
  }
1026
 
1027
  /// Generate a taskwait runtime call.
1028
  ///
1029
  /// \param Loc The location at which the request originated and is fulfilled.
1030
  void emitTaskwaitImpl(const LocationDescription &Loc);
1031
 
1032
  /// Generate a taskyield runtime call.
1033
  ///
1034
  /// \param Loc The location at which the request originated and is fulfilled.
1035
  void emitTaskyieldImpl(const LocationDescription &Loc);
1036
 
1037
  /// Return the current thread ID.
1038
  ///
1039
  /// \param Ident The ident (ident_t*) describing the query origin.
1040
  Value *getOrCreateThreadID(Value *Ident);
1041
 
1042
  /// The OpenMPIRBuilder Configuration
1043
  OpenMPIRBuilderConfig Config;
1044
 
1045
  /// The underlying LLVM-IR module
1046
  Module &M;
1047
 
1048
  /// The LLVM-IR Builder used to create IR.
1049
  IRBuilder<> Builder;
1050
 
1051
  /// Map to remember source location strings
1052
  StringMap<Constant *> SrcLocStrMap;
1053
 
1054
  /// Map to remember existing ident_t*.
1055
  DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap;
1056
 
1057
  /// Helper that contains information about regions we need to outline
1058
  /// during finalization.
1059
  struct OutlineInfo {
1060
    using PostOutlineCBTy = std::function<void(Function &)>;
1061
    PostOutlineCBTy PostOutlineCB;
1062
    BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB;
1063
    SmallVector<Value *, 2> ExcludeArgsFromAggregate;
1064
 
1065
    /// Collect all blocks in between EntryBB and ExitBB in both the given
1066
    /// vector and set.
1067
    void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
1068
                       SmallVectorImpl<BasicBlock *> &BlockVector);
1069
 
1070
    /// Return the function that contains the region to be outlined.
1071
    Function *getFunction() const { return EntryBB->getParent(); }
1072
  };
1073
 
1074
  /// Collection of regions that need to be outlined during finalization.
1075
  SmallVector<OutlineInfo, 16> OutlineInfos;
1076
 
1077
  /// Collection of owned canonical loop objects that eventually need to be
1078
  /// free'd.
1079
  std::forward_list<CanonicalLoopInfo> LoopInfos;
1080
 
1081
  /// Add a new region that will be outlined later.
1082
  void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
1083
 
1084
  /// An ordered map of auto-generated variables to their unique names.
1085
  /// It stores variables with the following names: 1) ".gomp_critical_user_" +
1086
  /// <critical_section_name> + ".var" for "omp critical" directives; 2)
1087
  /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
1088
  /// variables.
1089
  StringMap<Constant*, BumpPtrAllocator> InternalVars;
1090
 
1091
  /// Create the global variable holding the offload mappings information.
1092
  GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
1093
                                        std::string VarName);
1094
 
1095
  /// Create the global variable holding the offload names information.
1096
  GlobalVariable *
1097
  createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
1098
                        std::string VarName);
1099
 
1100
  struct MapperAllocas {
1101
    AllocaInst *ArgsBase = nullptr;
1102
    AllocaInst *Args = nullptr;
1103
    AllocaInst *ArgSizes = nullptr;
1104
  };
1105
 
1106
  /// Create the allocas instruction used in call to mapper functions.
1107
  void createMapperAllocas(const LocationDescription &Loc,
1108
                           InsertPointTy AllocaIP, unsigned NumOperands,
1109
                           struct MapperAllocas &MapperAllocas);
1110
 
1111
  /// Create the call for the target mapper function.
1112
  /// \param Loc The source location description.
1113
  /// \param MapperFunc Function to be called.
1114
  /// \param SrcLocInfo Source location information global.
1115
  /// \param MaptypesArg The argument types.
1116
  /// \param MapnamesArg The argument names.
1117
  /// \param MapperAllocas The AllocaInst used for the call.
1118
  /// \param DeviceID Device ID for the call.
1119
  /// \param NumOperands Number of operands in the call.
1120
  void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
1121
                      Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
1122
                      struct MapperAllocas &MapperAllocas, int64_t DeviceID,
1123
                      unsigned NumOperands);
1124
 
1125
  /// Container for the arguments used to pass data to the runtime library.
1126
  struct TargetDataRTArgs {
1127
    explicit TargetDataRTArgs() {}
1128
    /// The array of base pointer passed to the runtime library.
1129
    Value *BasePointersArray = nullptr;
1130
    /// The array of section pointers passed to the runtime library.
1131
    Value *PointersArray = nullptr;
1132
    /// The array of sizes passed to the runtime library.
1133
    Value *SizesArray = nullptr;
1134
    /// The array of map types passed to the runtime library for the beginning
1135
    /// of the region or for the entire region if there are no separate map
1136
    /// types for the region end.
1137
    Value *MapTypesArray = nullptr;
1138
    /// The array of map types passed to the runtime library for the end of the
1139
    /// region, or nullptr if there are no separate map types for the region
1140
    /// end.
1141
    Value *MapTypesArrayEnd = nullptr;
1142
    /// The array of user-defined mappers passed to the runtime library.
1143
    Value *MappersArray = nullptr;
1144
    /// The array of original declaration names of mapped pointers sent to the
1145
    /// runtime library for debugging
1146
    Value *MapNamesArray = nullptr;
1147
  };
1148
 
1149
  /// Struct that keeps the information that should be kept throughout
1150
  /// a 'target data' region.
1151
  class TargetDataInfo {
1152
    /// Set to true if device pointer information have to be obtained.
1153
    bool RequiresDevicePointerInfo = false;
1154
    /// Set to true if Clang emits separate runtime calls for the beginning and
1155
    /// end of the region.  These calls might have separate map type arrays.
1156
    bool SeparateBeginEndCalls = false;
1157
 
1158
  public:
1159
    TargetDataRTArgs RTArgs;
1160
 
1161
    /// Indicate whether any user-defined mapper exists.
1162
    bool HasMapper = false;
1163
    /// The total number of pointers passed to the runtime library.
1164
    unsigned NumberOfPtrs = 0u;
1165
 
1166
    explicit TargetDataInfo() {}
1167
    explicit TargetDataInfo(bool RequiresDevicePointerInfo,
1168
                            bool SeparateBeginEndCalls)
1169
        : RequiresDevicePointerInfo(RequiresDevicePointerInfo),
1170
          SeparateBeginEndCalls(SeparateBeginEndCalls) {}
1171
    /// Clear information about the data arrays.
1172
    void clearArrayInfo() {
1173
      RTArgs = TargetDataRTArgs();
1174
      HasMapper = false;
1175
      NumberOfPtrs = 0u;
1176
    }
1177
    /// Return true if the current target data information has valid arrays.
1178
    bool isValid() {
1179
      return RTArgs.BasePointersArray && RTArgs.PointersArray &&
1180
             RTArgs.SizesArray && RTArgs.MapTypesArray &&
1181
             (!HasMapper || RTArgs.MappersArray) && NumberOfPtrs;
1182
    }
1183
    bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
1184
    bool separateBeginEndCalls() { return SeparateBeginEndCalls; }
1185
  };
1186
 
1187
  /// Emit the arguments to be passed to the runtime library based on the
1188
  /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
1189
  /// ForEndCall, emit map types to be passed for the end of the region instead
1190
  /// of the beginning.
1191
  void emitOffloadingArraysArgument(IRBuilderBase &Builder,
1192
                                    OpenMPIRBuilder::TargetDataRTArgs &RTArgs,
1193
                                    OpenMPIRBuilder::TargetDataInfo &Info,
1194
                                    bool EmitDebug = false,
1195
                                    bool ForEndCall = false);
1196
 
1197
  /// Creates offloading entry for the provided entry ID \a ID, address \a
1198
  /// Addr, size \a Size, and flags \a Flags.
1199
  void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
1200
                          int32_t Flags, GlobalValue::LinkageTypes);
1201
 
1202
  /// The kind of errors that can occur when emitting the offload entries and
1203
  /// metadata.
1204
  enum EmitMetadataErrorKind {
1205
    EMIT_MD_TARGET_REGION_ERROR,
1206
    EMIT_MD_DECLARE_TARGET_ERROR,
1207
    EMIT_MD_GLOBAL_VAR_LINK_ERROR
1208
  };
1209
 
1210
  /// Callback function type
1211
  using EmitMetadataErrorReportFunctionTy =
1212
      std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>;
1213
 
1214
  // Emit the offloading entries and metadata so that the device codegen side
1215
  // can easily figure out what to emit. The produced metadata looks like
1216
  // this:
1217
  //
1218
  // !omp_offload.info = !{!1, ...}
1219
  //
1220
  // We only generate metadata for function that contain target regions.
1221
  void createOffloadEntriesAndInfoMetadata(
1222
      OffloadEntriesInfoManager &OffloadEntriesInfoManager,
1223
      EmitMetadataErrorReportFunctionTy &ErrorReportFunction);
1224
 
1225
public:
1226
  /// Generator for __kmpc_copyprivate
1227
  ///
1228
  /// \param Loc The source location description.
1229
  /// \param BufSize Number of elements in the buffer.
1230
  /// \param CpyBuf List of pointers to data to be copied.
1231
  /// \param CpyFn function to call for copying data.
1232
  /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
1233
  ///
1234
  /// \return The insertion position *after* the CopyPrivate call.
1235
 
1236
  InsertPointTy createCopyPrivate(const LocationDescription &Loc,
1237
                                  llvm::Value *BufSize, llvm::Value *CpyBuf,
1238
                                  llvm::Value *CpyFn, llvm::Value *DidIt);
1239
 
1240
  /// Generator for '#omp single'
1241
  ///
1242
  /// \param Loc The source location description.
1243
  /// \param BodyGenCB Callback that will generate the region code.
1244
  /// \param FiniCB Callback to finalize variable copies.
1245
  /// \param IsNowait If false, a barrier is emitted.
1246
  /// \param DidIt Local variable used as a flag to indicate 'single' thread
1247
  ///
1248
  /// \returns The insertion position *after* the single call.
1249
  InsertPointTy createSingle(const LocationDescription &Loc,
1250
                             BodyGenCallbackTy BodyGenCB,
1251
                             FinalizeCallbackTy FiniCB, bool IsNowait,
1252
                             llvm::Value *DidIt);
1253
 
1254
  /// Generator for '#omp master'
1255
  ///
1256
  /// \param Loc The insert and source location description.
1257
  /// \param BodyGenCB Callback that will generate the region code.
1258
  /// \param FiniCB Callback to finalize variable copies.
1259
  ///
1260
  /// \returns The insertion position *after* the master.
1261
  InsertPointTy createMaster(const LocationDescription &Loc,
1262
                             BodyGenCallbackTy BodyGenCB,
1263
                             FinalizeCallbackTy FiniCB);
1264
 
1265
  /// Generator for '#omp masked'
1266
  ///
1267
  /// \param Loc The insert and source location description.
1268
  /// \param BodyGenCB Callback that will generate the region code.
1269
  /// \param FiniCB Callback to finialize variable copies.
1270
  ///
1271
  /// \returns The insertion position *after* the masked.
1272
  InsertPointTy createMasked(const LocationDescription &Loc,
1273
                             BodyGenCallbackTy BodyGenCB,
1274
                             FinalizeCallbackTy FiniCB, Value *Filter);
1275
 
1276
  /// Generator for '#omp critical'
1277
  ///
1278
  /// \param Loc The insert and source location description.
1279
  /// \param BodyGenCB Callback that will generate the region body code.
1280
  /// \param FiniCB Callback to finalize variable copies.
1281
  /// \param CriticalName name of the lock used by the critical directive
1282
  /// \param HintInst Hint Instruction for hint clause associated with critical
1283
  ///
1284
  /// \returns The insertion position *after* the critical.
1285
  InsertPointTy createCritical(const LocationDescription &Loc,
1286
                               BodyGenCallbackTy BodyGenCB,
1287
                               FinalizeCallbackTy FiniCB,
1288
                               StringRef CriticalName, Value *HintInst);
1289
 
1290
  /// Generator for '#omp ordered depend (source | sink)'
1291
  ///
1292
  /// \param Loc The insert and source location description.
1293
  /// \param AllocaIP The insertion point to be used for alloca instructions.
1294
  /// \param NumLoops The number of loops in depend clause.
1295
  /// \param StoreValues The value will be stored in vector address.
1296
  /// \param Name The name of alloca instruction.
1297
  /// \param IsDependSource If true, depend source; otherwise, depend sink.
1298
  ///
1299
  /// \return The insertion position *after* the ordered.
1300
  InsertPointTy createOrderedDepend(const LocationDescription &Loc,
1301
                                    InsertPointTy AllocaIP, unsigned NumLoops,
1302
                                    ArrayRef<llvm::Value *> StoreValues,
1303
                                    const Twine &Name, bool IsDependSource);
1304
 
1305
  /// Generator for '#omp ordered [threads | simd]'
1306
  ///
1307
  /// \param Loc The insert and source location description.
1308
  /// \param BodyGenCB Callback that will generate the region code.
1309
  /// \param FiniCB Callback to finalize variable copies.
1310
  /// \param IsThreads If true, with threads clause or without clause;
1311
  /// otherwise, with simd clause;
1312
  ///
1313
  /// \returns The insertion position *after* the ordered.
1314
  InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc,
1315
                                         BodyGenCallbackTy BodyGenCB,
1316
                                         FinalizeCallbackTy FiniCB,
1317
                                         bool IsThreads);
1318
 
1319
  /// Generator for '#omp sections'
1320
  ///
1321
  /// \param Loc The insert and source location description.
1322
  /// \param AllocaIP The insertion points to be used for alloca instructions.
1323
  /// \param SectionCBs Callbacks that will generate body of each section.
1324
  /// \param PrivCB Callback to copy a given variable (think copy constructor).
1325
  /// \param FiniCB Callback to finalize variable copies.
1326
  /// \param IsCancellable Flag to indicate a cancellable parallel region.
1327
  /// \param IsNowait If true, barrier - to ensure all sections are executed
1328
  /// before moving forward will not be generated.
1329
  /// \returns The insertion position *after* the sections.
1330
  InsertPointTy createSections(const LocationDescription &Loc,
1331
                               InsertPointTy AllocaIP,
1332
                               ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
1333
                               PrivatizeCallbackTy PrivCB,
1334
                               FinalizeCallbackTy FiniCB, bool IsCancellable,
1335
                               bool IsNowait);
1336
 
1337
  /// Generator for '#omp section'
1338
  ///
1339
  /// \param Loc The insert and source location description.
1340
  /// \param BodyGenCB Callback that will generate the region body code.
1341
  /// \param FiniCB Callback to finalize variable copies.
1342
  /// \returns The insertion position *after* the section.
1343
  InsertPointTy createSection(const LocationDescription &Loc,
1344
                              BodyGenCallbackTy BodyGenCB,
1345
                              FinalizeCallbackTy FiniCB);
1346
 
1347
  /// Generate conditional branch and relevant BasicBlocks through which private
1348
  /// threads copy the 'copyin' variables from Master copy to threadprivate
1349
  /// copies.
1350
  ///
1351
  /// \param IP insertion block for copyin conditional
1352
  /// \param MasterVarPtr a pointer to the master variable
1353
  /// \param PrivateVarPtr a pointer to the threadprivate variable
1354
  /// \param IntPtrTy Pointer size type
1355
  /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
1356
  //                             and copy.in.end block
1357
  ///
1358
  /// \returns The insertion point where copying operation to be emitted.
1359
  InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr,
1360
                                         Value *PrivateAddr,
1361
                                         llvm::IntegerType *IntPtrTy,
1362
                                         bool BranchtoEnd = true);
1363
 
1364
  /// Create a runtime call for kmpc_Alloc
1365
  ///
1366
  /// \param Loc The insert and source location description.
1367
  /// \param Size Size of allocated memory space
1368
  /// \param Allocator Allocator information instruction
1369
  /// \param Name Name of call Instruction for OMP_alloc
1370
  ///
1371
  /// \returns CallInst to the OMP_Alloc call
1372
  CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
1373
                           Value *Allocator, std::string Name = "");
1374
 
1375
  /// Create a runtime call for kmpc_free
1376
  ///
1377
  /// \param Loc The insert and source location description.
1378
  /// \param Addr Address of memory space to be freed
1379
  /// \param Allocator Allocator information instruction
1380
  /// \param Name Name of call Instruction for OMP_Free
1381
  ///
1382
  /// \returns CallInst to the OMP_Free call
1383
  CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
1384
                          Value *Allocator, std::string Name = "");
1385
 
1386
  /// Create a runtime call for kmpc_threadprivate_cached
1387
  ///
1388
  /// \param Loc The insert and source location description.
1389
  /// \param Pointer pointer to data to be cached
1390
  /// \param Size size of data to be cached
1391
  /// \param Name Name of call Instruction for callinst
1392
  ///
1393
  /// \returns CallInst to the thread private cache call.
1394
  CallInst *createCachedThreadPrivate(const LocationDescription &Loc,
1395
                                      llvm::Value *Pointer,
1396
                                      llvm::ConstantInt *Size,
1397
                                      const llvm::Twine &Name = Twine(""));
1398
 
1399
  /// Create a runtime call for __tgt_interop_init
1400
  ///
1401
  /// \param Loc The insert and source location description.
1402
  /// \param InteropVar variable to be allocated
1403
  /// \param InteropType type of interop operation
1404
  /// \param Device devide to which offloading will occur
1405
  /// \param NumDependences  number of dependence variables
1406
  /// \param DependenceAddress pointer to dependence variables
1407
  /// \param HaveNowaitClause does nowait clause exist
1408
  ///
1409
  /// \returns CallInst to the __tgt_interop_init call
1410
  CallInst *createOMPInteropInit(const LocationDescription &Loc,
1411
                                 Value *InteropVar,
1412
                                 omp::OMPInteropType InteropType, Value *Device,
1413
                                 Value *NumDependences,
1414
                                 Value *DependenceAddress,
1415
                                 bool HaveNowaitClause);
1416
 
1417
  /// Create a runtime call for __tgt_interop_destroy
1418
  ///
1419
  /// \param Loc The insert and source location description.
1420
  /// \param InteropVar variable to be allocated
1421
  /// \param Device devide to which offloading will occur
1422
  /// \param NumDependences  number of dependence variables
1423
  /// \param DependenceAddress pointer to dependence variables
1424
  /// \param HaveNowaitClause does nowait clause exist
1425
  ///
1426
  /// \returns CallInst to the __tgt_interop_destroy call
1427
  CallInst *createOMPInteropDestroy(const LocationDescription &Loc,
1428
                                    Value *InteropVar, Value *Device,
1429
                                    Value *NumDependences,
1430
                                    Value *DependenceAddress,
1431
                                    bool HaveNowaitClause);
1432
 
1433
  /// Create a runtime call for __tgt_interop_use
1434
  ///
1435
  /// \param Loc The insert and source location description.
1436
  /// \param InteropVar variable to be allocated
1437
  /// \param Device devide to which offloading will occur
1438
  /// \param NumDependences  number of dependence variables
1439
  /// \param DependenceAddress pointer to dependence variables
1440
  /// \param HaveNowaitClause does nowait clause exist
1441
  ///
1442
  /// \returns CallInst to the __tgt_interop_use call
1443
  CallInst *createOMPInteropUse(const LocationDescription &Loc,
1444
                                Value *InteropVar, Value *Device,
1445
                                Value *NumDependences, Value *DependenceAddress,
1446
                                bool HaveNowaitClause);
1447
 
1448
  /// The `omp target` interface
1449
  ///
1450
  /// For more information about the usage of this interface,
1451
  /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
1452
  ///
1453
  ///{
1454
 
1455
  /// Create a runtime call for kmpc_target_init
1456
  ///
1457
  /// \param Loc The insert and source location description.
1458
  /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
1459
  InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD);
1460
 
1461
  /// Create a runtime call for kmpc_target_deinit
1462
  ///
1463
  /// \param Loc The insert and source location description.
1464
  /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
1465
  void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD);
1466
 
1467
  ///}
1468
 
1469
private:
1470
  // Sets the function attributes expected for the outlined function
1471
  void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn,
1472
                                                 int32_t NumTeams,
1473
                                                 int32_t NumThreads);
1474
 
1475
  // Creates the function ID/Address for the given outlined function.
1476
  // In the case of an embedded device function the address of the function is
1477
  // used, in the case of a non-offload function a constant is created.
1478
  Constant *createOutlinedFunctionID(Function *OutlinedFn,
1479
                                     StringRef EntryFnIDName);
1480
 
1481
  // Creates the region entry address for the outlined function
1482
  Constant *createTargetRegionEntryAddr(Function *OutlinedFunction,
1483
                                        StringRef EntryFnName);
1484
 
1485
public:
1486
  /// Functions used to generate a function with the given name.
1487
  using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>;
1488
 
1489
  /// Create a unique name for the entry function using the source location
1490
  /// information of the current target region. The name will be something like:
1491
  ///
1492
  /// __omp_offloading_DD_FFFF_PP_lBB[_CC]
1493
  ///
1494
  /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
1495
  /// mangled name of the function that encloses the target region and BB is the
1496
  /// line number of the target region. CC is a count added when more than one
1497
  /// region is located at the same location.
1498
  ///
1499
  /// If this target outline function is not an offload entry, we don't need to
1500
  /// register it. This may happen if it is guarded by an if clause that is
1501
  /// false at compile time, or no target archs have been specified.
1502
  ///
1503
  /// The created target region ID is used by the runtime library to identify
1504
  /// the current target region, so it only has to be unique and not
1505
  /// necessarily point to anything. It could be the pointer to the outlined
1506
  /// function that implements the target region, but we aren't using that so
1507
  /// that the compiler doesn't need to keep that, and could therefore inline
1508
  /// the host function if proven worthwhile during optimization. In the other
1509
  /// hand, if emitting code for the device, the ID has to be the function
1510
  /// address so that it can retrieved from the offloading entry and launched
1511
  /// by the runtime library. We also mark the outlined function to have
1512
  /// external linkage in case we are emitting code for the device, because
1513
  /// these functions will be entry points to the device.
1514
  ///
1515
  /// \param InfoManager The info manager keeping track of the offload entries
1516
  /// \param EntryInfo The entry information about the function
1517
  /// \param GenerateFunctionCallback The callback function to generate the code
1518
  /// \param NumTeams Number default teams
1519
  /// \param NumThreads Number default threads
1520
  /// \param OutlinedFunction Pointer to the outlined function
1521
  /// \param EntryFnIDName Name of the ID o be created
1522
  void emitTargetRegionFunction(OffloadEntriesInfoManager &InfoManager,
1523
                                TargetRegionEntryInfo &EntryInfo,
1524
                                FunctionGenCallback &GenerateFunctionCallback,
1525
                                int32_t NumTeams, int32_t NumThreads,
1526
                                bool IsOffloadEntry, Function *&OutlinedFn,
1527
                                Constant *&OutlinedFnID);
1528
 
1529
  /// Registers the given function and sets up the attribtues of the function
1530
  /// Returns the FunctionID.
1531
  ///
1532
  /// \param InfoManager The info manager keeping track of the offload entries
1533
  /// \param EntryInfo The entry information about the function
1534
  /// \param OutlinedFunction Pointer to the outlined function
1535
  /// \param EntryFnName Name of the outlined function
1536
  /// \param EntryFnIDName Name of the ID o be created
1537
  /// \param NumTeams Number default teams
1538
  /// \param NumThreads Number default threads
1539
  Constant *registerTargetRegionFunction(OffloadEntriesInfoManager &InfoManager,
1540
                                         TargetRegionEntryInfo &EntryInfo,
1541
                                         Function *OutlinedFunction,
1542
                                         StringRef EntryFnName,
1543
                                         StringRef EntryFnIDName,
1544
                                         int32_t NumTeams, int32_t NumThreads);
1545
 
1546
  /// Declarations for LLVM-IR types (simple, array, function and structure) are
1547
  /// generated below. Their names are defined and used in OpenMPKinds.def. Here
1548
  /// we provide the declarations, the initializeTypes function will provide the
1549
  /// values.
1550
  ///
1551
  ///{
1552
#define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
1553
#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize)                             \
1554
  ArrayType *VarName##Ty = nullptr;                                            \
1555
  PointerType *VarName##PtrTy = nullptr;
1556
#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...)                  \
1557
  FunctionType *VarName = nullptr;                                             \
1558
  PointerType *VarName##Ptr = nullptr;
1559
#define OMP_STRUCT_TYPE(VarName, StrName, ...)                                 \
1560
  StructType *VarName = nullptr;                                               \
1561
  PointerType *VarName##Ptr = nullptr;
1562
#include "llvm/Frontend/OpenMP/OMPKinds.def"
1563
 
1564
  ///}
1565
 
1566
private:
1567
  /// Create all simple and struct types exposed by the runtime and remember
1568
  /// the llvm::PointerTypes of them for easy access later.
1569
  void initializeTypes(Module &M);
1570
 
1571
  /// Common interface for generating entry calls for OMP Directives.
1572
  /// if the directive has a region/body, It will set the insertion
1573
  /// point to the body
1574
  ///
1575
  /// \param OMPD Directive to generate entry blocks for
1576
  /// \param EntryCall Call to the entry OMP Runtime Function
1577
  /// \param ExitBB block where the region ends.
1578
  /// \param Conditional indicate if the entry call result will be used
1579
  ///        to evaluate a conditional of whether a thread will execute
1580
  ///        body code or not.
1581
  ///
1582
  /// \return The insertion position in exit block
1583
  InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
1584
                                         BasicBlock *ExitBB,
1585
                                         bool Conditional = false);
1586
 
1587
  /// Common interface to finalize the region
1588
  ///
1589
  /// \param OMPD Directive to generate exiting code for
1590
  /// \param FinIP Insertion point for emitting Finalization code and exit call
1591
  /// \param ExitCall Call to the ending OMP Runtime Function
1592
  /// \param HasFinalize indicate if the directive will require finalization
1593
  ///         and has a finalization callback in the stack that
1594
  ///        should be called.
1595
  ///
1596
  /// \return The insertion position in exit block
1597
  InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
1598
                                        InsertPointTy FinIP,
1599
                                        Instruction *ExitCall,
1600
                                        bool HasFinalize = true);
1601
 
1602
  /// Common Interface to generate OMP inlined regions
1603
  ///
1604
  /// \param OMPD Directive to generate inlined region for
1605
  /// \param EntryCall Call to the entry OMP Runtime Function
1606
  /// \param ExitCall Call to the ending OMP Runtime Function
1607
  /// \param BodyGenCB Body code generation callback.
1608
  /// \param FiniCB Finalization Callback. Will be called when finalizing region
1609
  /// \param Conditional indicate if the entry call result will be used
1610
  ///        to evaluate a conditional of whether a thread will execute
1611
  ///        body code or not.
1612
  /// \param HasFinalize indicate if the directive will require finalization
1613
  ///        and has a finalization callback in the stack that
1614
  ///        should be called.
1615
  /// \param IsCancellable if HasFinalize is set to true, indicate if the
1616
  ///        the directive should be cancellable.
1617
  /// \return The insertion point after the region
1618
 
1619
  InsertPointTy
1620
  EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
1621
                       Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
1622
                       FinalizeCallbackTy FiniCB, bool Conditional = false,
1623
                       bool HasFinalize = true, bool IsCancellable = false);
1624
 
1625
  /// Get the platform-specific name separator.
1626
  /// \param Parts different parts of the final name that needs separation
1627
  /// \param FirstSeparator First separator used between the initial two
1628
  ///        parts of the name.
1629
  /// \param Separator separator used between all of the rest consecutive
1630
  ///        parts of the name
1631
  static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
1632
                                           StringRef FirstSeparator,
1633
                                           StringRef Separator);
1634
 
1635
  /// Returns corresponding lock object for the specified critical region
1636
  /// name. If the lock object does not exist it is created, otherwise the
1637
  /// reference to the existing copy is returned.
1638
  /// \param CriticalName Name of the critical region.
1639
  ///
1640
  Value *getOMPCriticalRegionLock(StringRef CriticalName);
1641
 
1642
  /// Callback type for Atomic Expression update
1643
  /// ex:
1644
  /// \code{.cpp}
1645
  /// unsigned x = 0;
1646
  /// #pragma omp atomic update
1647
  /// x = Expr(x_old);  //Expr() is any legal operation
1648
  /// \endcode
1649
  ///
1650
  /// \param XOld the value of the atomic memory address to use for update
1651
  /// \param IRB reference to the IRBuilder to use
1652
  ///
1653
  /// \returns Value to update X to.
1654
  using AtomicUpdateCallbackTy =
1655
      const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
1656
 
1657
private:
1658
  enum AtomicKind { Read, Write, Update, Capture, Compare };
1659
 
1660
  /// Determine whether to emit flush or not
1661
  ///
1662
  /// \param Loc    The insert and source location description.
1663
  /// \param AO     The required atomic ordering
1664
  /// \param AK     The OpenMP atomic operation kind used.
1665
  ///
1666
  /// \returns          wether a flush was emitted or not
1667
  bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
1668
                                    AtomicOrdering AO, AtomicKind AK);
1669
 
1670
  /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
1671
  /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
1672
  /// Only Scalar data types.
1673
  ///
1674
  /// \param AllocaIP     The insertion point to be used for alloca
1675
  ///                   instructions.
1676
  /// \param X                      The target atomic pointer to be updated
1677
  /// \param XElemTy    The element type of the atomic pointer.
1678
  /// \param Expr                   The value to update X with.
1679
  /// \param AO                     Atomic ordering of the generated atomic
1680
  ///                   instructions.
1681
  /// \param RMWOp                The binary operation used for update. If
1682
  ///                   operation is not supported by atomicRMW,
1683
  ///                   or belong to {FADD, FSUB, BAD_BINOP}.
1684
  ///                   Then a `cmpExch` based  atomic will be generated.
1685
  /// \param UpdateOp   Code generator for complex expressions that cannot be
1686
  ///                   expressed through atomicrmw instruction.
1687
  /// \param VolatileX       true if \a X volatile?
1688
  /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
1689
  ///                     update expression, false otherwise.
1690
  ///                     (e.g. true for X = X BinOp Expr)
1691
  ///
1692
  /// \returns A pair of the old value of X before the update, and the value
1693
  ///          used for the update.
1694
  std::pair<Value *, Value *>
1695
  emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
1696
                   AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
1697
                   AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
1698
                   bool IsXBinopExpr);
1699
 
1700
  /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
1701
  ///
1702
  /// \Return The instruction
1703
  Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
1704
                                AtomicRMWInst::BinOp RMWOp);
1705
 
1706
public:
1707
  /// a struct to pack relevant information while generating atomic Ops
1708
  struct AtomicOpValue {
1709
    Value *Var = nullptr;
1710
    Type *ElemTy = nullptr;
1711
    bool IsSigned = false;
1712
    bool IsVolatile = false;
1713
  };
1714
 
1715
  /// Emit atomic Read for : V = X --- Only Scalar data types.
1716
  ///
1717
  /// \param Loc    The insert and source location description.
1718
  /// \param X                  The target pointer to be atomically read
1719
  /// \param V                  Memory address where to store atomically read
1720
  ///                                       value
1721
  /// \param AO                 Atomic ordering of the generated atomic
1722
  ///                                       instructions.
1723
  ///
1724
  /// \return Insertion point after generated atomic read IR.
1725
  InsertPointTy createAtomicRead(const LocationDescription &Loc,
1726
                                 AtomicOpValue &X, AtomicOpValue &V,
1727
                                 AtomicOrdering AO);
1728
 
1729
  /// Emit atomic write for : X = Expr --- Only Scalar data types.
1730
  ///
1731
  /// \param Loc    The insert and source location description.
1732
  /// \param X                  The target pointer to be atomically written to
1733
  /// \param Expr               The value to store.
1734
  /// \param AO                 Atomic ordering of the generated atomic
1735
  ///               instructions.
1736
  ///
1737
  /// \return Insertion point after generated atomic Write IR.
1738
  InsertPointTy createAtomicWrite(const LocationDescription &Loc,
1739
                                  AtomicOpValue &X, Value *Expr,
1740
                                  AtomicOrdering AO);
1741
 
1742
  /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
1743
  /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
1744
  /// Only Scalar data types.
1745
  ///
1746
  /// \param Loc      The insert and source location description.
1747
  /// \param AllocaIP The insertion point to be used for alloca instructions.
1748
  /// \param X        The target atomic pointer to be updated
1749
  /// \param Expr     The value to update X with.
1750
  /// \param AO       Atomic ordering of the generated atomic instructions.
1751
  /// \param RMWOp    The binary operation used for update. If operation
1752
  ///                 is        not supported by atomicRMW, or belong to
1753
  ///                   {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
1754
  ///                 atomic will be generated.
1755
  /// \param UpdateOp   Code generator for complex expressions that cannot be
1756
  ///                   expressed through atomicrmw instruction.
1757
  /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
1758
  ///                     update expression, false otherwise.
1759
  ///                       (e.g. true for X = X BinOp Expr)
1760
  ///
1761
  /// \return Insertion point after generated atomic update IR.
1762
  InsertPointTy createAtomicUpdate(const LocationDescription &Loc,
1763
                                   InsertPointTy AllocaIP, AtomicOpValue &X,
1764
                                   Value *Expr, AtomicOrdering AO,
1765
                                   AtomicRMWInst::BinOp RMWOp,
1766
                                   AtomicUpdateCallbackTy &UpdateOp,
1767
                                   bool IsXBinopExpr);
1768
 
1769
  /// Emit atomic update for constructs: --- Only Scalar data types
1770
  /// V = X; X = X BinOp Expr ,
1771
  /// X = X BinOp Expr; V = X,
1772
  /// V = X; X = Expr BinOp X,
1773
  /// X = Expr BinOp X; V = X,
1774
  /// V = X; X = UpdateOp(X),
1775
  /// X = UpdateOp(X); V = X,
1776
  ///
1777
  /// \param Loc        The insert and source location description.
1778
  /// \param AllocaIP   The insertion point to be used for alloca instructions.
1779
  /// \param X          The target atomic pointer to be updated
1780
  /// \param V          Memory address where to store captured value
1781
  /// \param Expr       The value to update X with.
1782
  /// \param AO         Atomic ordering of the generated atomic instructions
1783
  /// \param RMWOp      The binary operation used for update. If
1784
  ///                   operation is not supported by atomicRMW, or belong to
1785
  ///                     {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
1786
  ///                   atomic will be generated.
1787
  /// \param UpdateOp   Code generator for complex expressions that cannot be
1788
  ///                   expressed through atomicrmw instruction.
1789
  /// \param UpdateExpr true if X is an in place update of the form
1790
  ///                   X = X BinOp Expr or X = Expr BinOp X
1791
  /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the
1792
  ///                     update expression, false otherwise.
1793
  ///                     (e.g. true for X = X BinOp Expr)
1794
  /// \param IsPostfixUpdate true if original value of 'x' must be stored in
1795
  ///                        'v', not an updated one.
1796
  ///
1797
  /// \return Insertion point after generated atomic capture IR.
1798
  InsertPointTy
1799
  createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP,
1800
                      AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
1801
                      AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
1802
                      AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
1803
                      bool IsPostfixUpdate, bool IsXBinopExpr);
1804
 
1805
  /// Emit atomic compare for constructs: --- Only scalar data types
1806
  /// cond-expr-stmt:
1807
  /// x = x ordop expr ? expr : x;
1808
  /// x = expr ordop x ? expr : x;
1809
  /// x = x == e ? d : x;
1810
  /// x = e == x ? d : x; (this one is not in the spec)
1811
  /// cond-update-stmt:
1812
  /// if (x ordop expr) { x = expr; }
1813
  /// if (expr ordop x) { x = expr; }
1814
  /// if (x == e) { x = d; }
1815
  /// if (e == x) { x = d; } (this one is not in the spec)
1816
  /// conditional-update-capture-atomic:
1817
  /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false)
1818
  /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false)
1819
  /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false,
1820
  ///                                         IsFailOnly=true)
1821
  /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false)
1822
  /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false,
1823
  ///                                                IsFailOnly=true)
1824
  ///
1825
  /// \param Loc          The insert and source location description.
1826
  /// \param X            The target atomic pointer to be updated.
1827
  /// \param V            Memory address where to store captured value (for
1828
  ///                     compare capture only).
1829
  /// \param R            Memory address where to store comparison result
1830
  ///                     (for compare capture with '==' only).
1831
  /// \param E            The expected value ('e') for forms that use an
1832
  ///                     equality comparison or an expression ('expr') for
1833
  ///                     forms that use 'ordop' (logically an atomic maximum or
1834
  ///                     minimum).
1835
  /// \param D            The desired value for forms that use an equality
1836
  ///                     comparison. If forms that use 'ordop', it should be
1837
  ///                     \p nullptr.
1838
  /// \param AO           Atomic ordering of the generated atomic instructions.
1839
  /// \param Op           Atomic compare operation. It can only be ==, <, or >.
1840
  /// \param IsXBinopExpr True if the conditional statement is in the form where
1841
  ///                     x is on LHS. It only matters for < or >.
1842
  /// \param IsPostfixUpdate  True if original value of 'x' must be stored in
1843
  ///                         'v', not an updated one (for compare capture
1844
  ///                         only).
1845
  /// \param IsFailOnly   True if the original value of 'x' is stored to 'v'
1846
  ///                     only when the comparison fails. This is only valid for
1847
  ///                     the case the comparison is '=='.
1848
  ///
1849
  /// \return Insertion point after generated atomic capture IR.
1850
  InsertPointTy
1851
  createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X,
1852
                      AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D,
1853
                      AtomicOrdering AO, omp::OMPAtomicCompareOp Op,
1854
                      bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
1855
 
1856
  /// Create the control flow structure of a canonical OpenMP loop.
1857
  ///
1858
  /// The emitted loop will be disconnected, i.e. no edge to the loop's
1859
  /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
1860
  /// IRBuilder location is not preserved.
1861
  ///
1862
  /// \param DL        DebugLoc used for the instructions in the skeleton.
1863
  /// \param TripCount Value to be used for the trip count.
1864
  /// \param F         Function in which to insert the BasicBlocks.
1865
  /// \param PreInsertBefore  Where to insert BBs that execute before the body,
1866
  ///                         typically the body itself.
1867
  /// \param PostInsertBefore Where to insert BBs that execute after the body.
1868
  /// \param Name      Base name used to derive BB
1869
  ///                  and instruction names.
1870
  ///
1871
  /// \returns The CanonicalLoopInfo that represents the emitted loop.
1872
  CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount,
1873
                                        Function *F,
1874
                                        BasicBlock *PreInsertBefore,
1875
                                        BasicBlock *PostInsertBefore,
1876
                                        const Twine &Name = {});
1877
  /// OMP Offload Info Metadata name string
1878
  const std::string ompOffloadInfoName = "omp_offload.info";
1879
 
1880
  /// Loads all the offload entries information from the host IR
1881
  /// metadata. This function is only meant to be used with device code
1882
  /// generation.
1883
  ///
1884
  /// \param M         Module to load Metadata info from. Module passed maybe
1885
  /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module.
1886
  /// \param OffloadEntriesInfoManager Initialize Offload Entry information.
1887
  void
1888
  loadOffloadInfoMetadata(Module &M,
1889
                          OffloadEntriesInfoManager &OffloadEntriesInfoManager);
1890
 
1891
  /// Gets (if variable with the given name already exist) or creates
1892
  /// internal global variable with the specified Name. The created variable has
1893
  /// linkage CommonLinkage by default and is initialized by null value.
1894
  /// \param Ty Type of the global variable. If it is exist already the type
1895
  /// must be the same.
1896
  /// \param Name Name of the variable.
1897
  GlobalVariable *getOrCreateInternalVariable(Type *Ty, const StringRef &Name,
1898
                                              unsigned AddressSpace = 0);
1899
};
1900
 
1901
/// Data structure to contain the information needed to uniquely identify
1902
/// a target entry.
1903
struct TargetRegionEntryInfo {
1904
  std::string ParentName;
1905
  unsigned DeviceID;
1906
  unsigned FileID;
1907
  unsigned Line;
1908
  unsigned Count;
1909
 
1910
  TargetRegionEntryInfo()
1911
      : ParentName(""), DeviceID(0), FileID(0), Line(0), Count(0) {}
1912
  TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID,
1913
                        unsigned FileID, unsigned Line, unsigned Count = 0)
1914
      : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line),
1915
        Count(Count) {}
1916
 
1917
  static void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
1918
                                         StringRef ParentName,
1919
                                         unsigned DeviceID, unsigned FileID,
1920
                                         unsigned Line, unsigned Count);
1921
 
1922
  bool operator<(const TargetRegionEntryInfo RHS) const {
1923
    return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) <
1924
           std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line,
1925
                           RHS.Count);
1926
  }
1927
};
1928
 
1929
/// Class that manages information about offload code regions and data
1930
class OffloadEntriesInfoManager {
1931
  /// Number of entries registered so far.
1932
  OpenMPIRBuilderConfig Config;
1933
  unsigned OffloadingEntriesNum = 0;
1934
 
1935
public:
1936
  void setConfig(OpenMPIRBuilderConfig C) { Config = C; }
1937
 
1938
  /// Base class of the entries info.
1939
  class OffloadEntryInfo {
1940
  public:
1941
    /// Kind of a given entry.
1942
    enum OffloadingEntryInfoKinds : unsigned {
1943
      /// Entry is a target region.
1944
      OffloadingEntryInfoTargetRegion = 0,
1945
      /// Entry is a declare target variable.
1946
      OffloadingEntryInfoDeviceGlobalVar = 1,
1947
      /// Invalid entry info.
1948
      OffloadingEntryInfoInvalid = ~0u
1949
    };
1950
 
1951
  protected:
1952
    OffloadEntryInfo() = delete;
1953
    explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {}
1954
    explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
1955
                              uint32_t Flags)
1956
        : Flags(Flags), Order(Order), Kind(Kind) {}
1957
    ~OffloadEntryInfo() = default;
1958
 
1959
  public:
1960
    bool isValid() const { return Order != ~0u; }
1961
    unsigned getOrder() const { return Order; }
1962
    OffloadingEntryInfoKinds getKind() const { return Kind; }
1963
    uint32_t getFlags() const { return Flags; }
1964
    void setFlags(uint32_t NewFlags) { Flags = NewFlags; }
1965
    Constant *getAddress() const { return cast_or_null<Constant>(Addr); }
1966
    void setAddress(Constant *V) {
1967
      assert(!Addr.pointsToAliveValue() && "Address has been set before!");
1968
      Addr = V;
1969
    }
1970
    static bool classof(const OffloadEntryInfo *Info) { return true; }
1971
 
1972
  private:
1973
    /// Address of the entity that has to be mapped for offloading.
1974
    WeakTrackingVH Addr;
1975
 
1976
    /// Flags associated with the device global.
1977
    uint32_t Flags = 0u;
1978
 
1979
    /// Order this entry was emitted.
1980
    unsigned Order = ~0u;
1981
 
1982
    OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid;
1983
  };
1984
 
1985
  /// Return true if a there are no entries defined.
1986
  bool empty() const;
1987
  /// Return number of entries defined so far.
1988
  unsigned size() const { return OffloadingEntriesNum; }
1989
 
1990
  OffloadEntriesInfoManager() : Config() {}
1991
 
1992
  //
1993
  // Target region entries related.
1994
  //
1995
 
1996
  /// Kind of the target registry entry.
1997
  enum OMPTargetRegionEntryKind : uint32_t {
1998
    /// Mark the entry as target region.
1999
    OMPTargetRegionEntryTargetRegion = 0x0,
2000
    /// Mark the entry as a global constructor.
2001
    OMPTargetRegionEntryCtor = 0x02,
2002
    /// Mark the entry as a global destructor.
2003
    OMPTargetRegionEntryDtor = 0x04,
2004
  };
2005
 
2006
  /// Target region entries info.
2007
  class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo {
2008
    /// Address that can be used as the ID of the entry.
2009
    Constant *ID = nullptr;
2010
 
2011
  public:
2012
    OffloadEntryInfoTargetRegion()
2013
        : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {}
2014
    explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr,
2015
                                          Constant *ID,
2016
                                          OMPTargetRegionEntryKind Flags)
2017
        : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags),
2018
          ID(ID) {
2019
      setAddress(Addr);
2020
    }
2021
 
2022
    Constant *getID() const { return ID; }
2023
    void setID(Constant *V) {
2024
      assert(!ID && "ID has been set before!");
2025
      ID = V;
2026
    }
2027
    static bool classof(const OffloadEntryInfo *Info) {
2028
      return Info->getKind() == OffloadingEntryInfoTargetRegion;
2029
    }
2030
  };
2031
 
2032
  /// Initialize target region entry.
2033
  /// This is ONLY needed for DEVICE compilation.
2034
  void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo,
2035
                                       unsigned Order);
2036
  /// Register target region entry.
2037
  void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
2038
                                     Constant *Addr, Constant *ID,
2039
                                     OMPTargetRegionEntryKind Flags);
2040
  /// Return true if a target region entry with the provided information
2041
  /// exists.
2042
  bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
2043
                                bool IgnoreAddressId = false) const;
2044
 
2045
  // Return the Name based on \a EntryInfo using the next available Count.
2046
  void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
2047
                                  const TargetRegionEntryInfo &EntryInfo);
2048
 
2049
  /// brief Applies action \a Action on all registered entries.
2050
  typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo,
2051
                            const OffloadEntryInfoTargetRegion &)>
2052
      OffloadTargetRegionEntryInfoActTy;
2053
  void
2054
  actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action);
2055
 
2056
  //
2057
  // Device global variable entries related.
2058
  //
2059
 
2060
  /// Kind of the global variable entry..
2061
  enum OMPTargetGlobalVarEntryKind : uint32_t {
2062
    /// Mark the entry as a to declare target.
2063
    OMPTargetGlobalVarEntryTo = 0x0,
2064
    /// Mark the entry as a to declare target link.
2065
    OMPTargetGlobalVarEntryLink = 0x1,
2066
  };
2067
 
2068
  /// Device global variable entries info.
2069
  class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo {
2070
    /// Type of the global variable.
2071
    int64_t VarSize;
2072
    GlobalValue::LinkageTypes Linkage;
2073
 
2074
  public:
2075
    OffloadEntryInfoDeviceGlobalVar()
2076
        : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {}
2077
    explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order,
2078
                                             OMPTargetGlobalVarEntryKind Flags)
2079
        : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {}
2080
    explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr,
2081
                                             int64_t VarSize,
2082
                                             OMPTargetGlobalVarEntryKind Flags,
2083
                                             GlobalValue::LinkageTypes Linkage)
2084
        : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags),
2085
          VarSize(VarSize), Linkage(Linkage) {
2086
      setAddress(Addr);
2087
    }
2088
 
2089
    int64_t getVarSize() const { return VarSize; }
2090
    void setVarSize(int64_t Size) { VarSize = Size; }
2091
    GlobalValue::LinkageTypes getLinkage() const { return Linkage; }
2092
    void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; }
2093
    static bool classof(const OffloadEntryInfo *Info) {
2094
      return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar;
2095
    }
2096
  };
2097
 
2098
  /// Initialize device global variable entry.
2099
  /// This is ONLY used for DEVICE compilation.
2100
  void initializeDeviceGlobalVarEntryInfo(StringRef Name,
2101
                                          OMPTargetGlobalVarEntryKind Flags,
2102
                                          unsigned Order);
2103
 
2104
  /// Register device global variable entry.
2105
  void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr,
2106
                                        int64_t VarSize,
2107
                                        OMPTargetGlobalVarEntryKind Flags,
2108
                                        GlobalValue::LinkageTypes Linkage);
2109
  /// Checks if the variable with the given name has been registered already.
2110
  bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const {
2111
    return OffloadEntriesDeviceGlobalVar.count(VarName) > 0;
2112
  }
2113
  /// Applies action \a Action on all registered entries.
2114
  typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)>
2115
      OffloadDeviceGlobalVarEntryInfoActTy;
2116
  void actOnDeviceGlobalVarEntriesInfo(
2117
      const OffloadDeviceGlobalVarEntryInfoActTy &Action);
2118
 
2119
private:
2120
  /// Return the count of entries at a particular source location.
2121
  unsigned
2122
  getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const;
2123
 
2124
  /// Update the count of entries at a particular source location.
2125
  void
2126
  incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo);
2127
 
2128
  static TargetRegionEntryInfo
2129
  getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) {
2130
    return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID,
2131
                                 EntryInfo.FileID, EntryInfo.Line, 0);
2132
  }
2133
 
2134
  // Count of entries at a location.
2135
  std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount;
2136
 
2137
  // Storage for target region entries kind.
2138
  typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion>
2139
      OffloadEntriesTargetRegionTy;
2140
  OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
2141
  /// Storage for device global variable entries kind. The storage is to be
2142
  /// indexed by mangled name.
2143
  typedef StringMap<OffloadEntryInfoDeviceGlobalVar>
2144
      OffloadEntriesDeviceGlobalVarTy;
2145
  OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar;
2146
};
2147
 
2148
/// Class to represented the control flow structure of an OpenMP canonical loop.
2149
///
2150
/// The control-flow structure is standardized for easy consumption by
2151
/// directives associated with loops. For instance, the worksharing-loop
2152
/// construct may change this control flow such that each loop iteration is
2153
/// executed on only one thread. The constraints of a canonical loop in brief
2154
/// are:
2155
///
2156
///  * The number of loop iterations must have been computed before entering the
2157
///    loop.
2158
///
2159
///  * Has an (unsigned) logical induction variable that starts at zero and
2160
///    increments by one.
2161
///
2162
///  * The loop's CFG itself has no side-effects. The OpenMP specification
2163
///    itself allows side-effects, but the order in which they happen, including
2164
///    how often or whether at all, is unspecified. We expect that the frontend
2165
///    will emit those side-effect instructions somewhere (e.g. before the loop)
2166
///    such that the CanonicalLoopInfo itself can be side-effect free.
2167
///
2168
/// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
2169
/// execution of a loop body that satifies these constraints. It does NOT
2170
/// represent arbitrary SESE regions that happen to contain a loop. Do not use
2171
/// CanonicalLoopInfo for such purposes.
2172
///
2173
/// The control flow can be described as follows:
2174
///
2175
///     Preheader
2176
///        |
2177
///  /-> Header
2178
///  |     |
2179
///  |    Cond---\
2180
///  |     |     |
2181
///  |    Body   |
2182
///  |    | |    |
2183
///  |   <...>   |
2184
///  |    | |    |
2185
///   \--Latch   |
2186
///              |
2187
///             Exit
2188
///              |
2189
///            After
2190
///
2191
/// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
2192
/// including) and end at AfterIP (at the After's first instruction, excluding).
2193
/// That is, instructions in the Preheader and After blocks (except the
2194
/// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
2195
/// side-effects. Typically, the Preheader is used to compute the loop's trip
2196
/// count. The instructions from BodyIP (at the Body block's first instruction,
2197
/// excluding) until the Latch are also considered outside CanonicalLoopInfo's
2198
/// control and thus can have side-effects. The body block is the single entry
2199
/// point into the loop body, which may contain arbitrary control flow as long
2200
/// as all control paths eventually branch to the Latch block.
2201
///
2202
/// TODO: Consider adding another standardized BasicBlock between Body CFG and
2203
/// Latch to guarantee that there is only a single edge to the latch. It would
2204
/// make loop transformations easier to not needing to consider multiple
2205
/// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
2206
/// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
2207
/// executes after each body iteration.
2208
///
2209
/// There must be no loop-carried dependencies through llvm::Values. This is
2210
/// equivalant to that the Latch has no PHINode and the Header's only PHINode is
2211
/// for the induction variable.
2212
///
2213
/// All code in Header, Cond, Latch and Exit (plus the terminator of the
2214
/// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
2215
/// by assertOK(). They are expected to not be modified unless explicitly
2216
/// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
2217
/// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
2218
/// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
2219
/// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
2220
/// anymore as its underlying control flow may not exist anymore.
2221
/// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
2222
/// may also return a new CanonicalLoopInfo that can be passed to other
2223
/// loop-associated construct implementing methods. These loop-transforming
2224
/// methods may either create a new CanonicalLoopInfo usually using
2225
/// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
2226
/// modify one of the input CanonicalLoopInfo and return it as representing the
2227
/// modified loop. What is done is an implementation detail of
2228
/// transformation-implementing method and callers should always assume that the
2229
/// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
2230
/// Returned CanonicalLoopInfo have the same structure and guarantees as the one
2231
/// created by createCanonicalLoop, such that transforming methods do not have
2232
/// to special case where the CanonicalLoopInfo originated from.
2233
///
2234
/// Generally, methods consuming CanonicalLoopInfo do not need an
2235
/// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
2236
/// CanonicalLoopInfo to insert new or modify existing instructions. Unless
2237
/// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
2238
/// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
2239
/// any InsertPoint in the Preheader, After or Block can still be used after
2240
/// calling such a method.
2241
///
2242
/// TODO: Provide mechanisms for exception handling and cancellation points.
2243
///
2244
/// Defined outside OpenMPIRBuilder because nested classes cannot be
2245
/// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
2246
class CanonicalLoopInfo {
2247
  friend class OpenMPIRBuilder;
2248
 
2249
private:
2250
  BasicBlock *Header = nullptr;
2251
  BasicBlock *Cond = nullptr;
2252
  BasicBlock *Latch = nullptr;
2253
  BasicBlock *Exit = nullptr;
2254
 
2255
  /// Add the control blocks of this loop to \p BBs.
2256
  ///
2257
  /// This does not include any block from the body, including the one returned
2258
  /// by getBody().
2259
  ///
2260
  /// FIXME: This currently includes the Preheader and After blocks even though
2261
  /// their content is (mostly) not under CanonicalLoopInfo's control.
2262
  /// Re-evaluated whether this makes sense.
2263
  void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
2264
 
2265
  /// Sets the number of loop iterations to the given value. This value must be
2266
  /// valid in the condition block (i.e., defined in the preheader) and is
2267
  /// interpreted as an unsigned integer.
2268
  void setTripCount(Value *TripCount);
2269
 
2270
  /// Replace all uses of the canonical induction variable in the loop body with
2271
  /// a new one.
2272
  ///
2273
  /// The intended use case is to update the induction variable for an updated
2274
  /// iteration space such that it can stay normalized in the 0...tripcount-1
2275
  /// range.
2276
  ///
2277
  /// The \p Updater is called with the (presumable updated) current normalized
2278
  /// induction variable and is expected to return the value that uses of the
2279
  /// pre-updated induction values should use instead, typically dependent on
2280
  /// the new induction variable. This is a lambda (instead of e.g. just passing
2281
  /// the new value) to be able to distinguish the uses of the pre-updated
2282
  /// induction variable and uses of the induction varible to compute the
2283
  /// updated induction variable value.
2284
  void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater);
2285
 
2286
public:
2287
  /// Returns whether this object currently represents the IR of a loop. If
2288
  /// returning false, it may have been consumed by a loop transformation or not
2289
  /// been intialized. Do not use in this case;
2290
  bool isValid() const { return Header; }
2291
 
2292
  /// The preheader ensures that there is only a single edge entering the loop.
2293
  /// Code that must be execute before any loop iteration can be emitted here,
2294
  /// such as computing the loop trip count and begin lifetime markers. Code in
2295
  /// the preheader is not considered part of the canonical loop.
2296
  BasicBlock *getPreheader() const;
2297
 
2298
  /// The header is the entry for each iteration. In the canonical control flow,
2299
  /// it only contains the PHINode for the induction variable.
2300
  BasicBlock *getHeader() const {
2301
    assert(isValid() && "Requires a valid canonical loop");
2302
    return Header;
2303
  }
2304
 
2305
  /// The condition block computes whether there is another loop iteration. If
2306
  /// yes, branches to the body; otherwise to the exit block.
2307
  BasicBlock *getCond() const {
2308
    assert(isValid() && "Requires a valid canonical loop");
2309
    return Cond;
2310
  }
2311
 
2312
  /// The body block is the single entry for a loop iteration and not controlled
2313
  /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
2314
  /// eventually branch to the \p Latch block.
2315
  BasicBlock *getBody() const {
2316
    assert(isValid() && "Requires a valid canonical loop");
2317
    return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0);
2318
  }
2319
 
2320
  /// Reaching the latch indicates the end of the loop body code. In the
2321
  /// canonical control flow, it only contains the increment of the induction
2322
  /// variable.
2323
  BasicBlock *getLatch() const {
2324
    assert(isValid() && "Requires a valid canonical loop");
2325
    return Latch;
2326
  }
2327
 
2328
  /// Reaching the exit indicates no more iterations are being executed.
2329
  BasicBlock *getExit() const {
2330
    assert(isValid() && "Requires a valid canonical loop");
2331
    return Exit;
2332
  }
2333
 
2334
  /// The after block is intended for clean-up code such as lifetime end
2335
  /// markers. It is separate from the exit block to ensure, analogous to the
2336
  /// preheader, it having just a single entry edge and being free from PHI
2337
  /// nodes should there be multiple loop exits (such as from break
2338
  /// statements/cancellations).
2339
  BasicBlock *getAfter() const {
2340
    assert(isValid() && "Requires a valid canonical loop");
2341
    return Exit->getSingleSuccessor();
2342
  }
2343
 
2344
  /// Returns the llvm::Value containing the number of loop iterations. It must
2345
  /// be valid in the preheader and always interpreted as an unsigned integer of
2346
  /// any bit-width.
2347
  Value *getTripCount() const {
2348
    assert(isValid() && "Requires a valid canonical loop");
2349
    Instruction *CmpI = &Cond->front();
2350
    assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
2351
    return CmpI->getOperand(1);
2352
  }
2353
 
2354
  /// Returns the instruction representing the current logical induction
2355
  /// variable. Always unsigned, always starting at 0 with an increment of one.
2356
  Instruction *getIndVar() const {
2357
    assert(isValid() && "Requires a valid canonical loop");
2358
    Instruction *IndVarPHI = &Header->front();
2359
    assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
2360
    return IndVarPHI;
2361
  }
2362
 
2363
  /// Return the type of the induction variable (and the trip count).
2364
  Type *getIndVarType() const {
2365
    assert(isValid() && "Requires a valid canonical loop");
2366
    return getIndVar()->getType();
2367
  }
2368
 
2369
  /// Return the insertion point for user code before the loop.
2370
  OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
2371
    assert(isValid() && "Requires a valid canonical loop");
2372
    BasicBlock *Preheader = getPreheader();
2373
    return {Preheader, std::prev(Preheader->end())};
2374
  };
2375
 
2376
  /// Return the insertion point for user code in the body.
2377
  OpenMPIRBuilder::InsertPointTy getBodyIP() const {
2378
    assert(isValid() && "Requires a valid canonical loop");
2379
    BasicBlock *Body = getBody();
2380
    return {Body, Body->begin()};
2381
  };
2382
 
2383
  /// Return the insertion point for user code after the loop.
2384
  OpenMPIRBuilder::InsertPointTy getAfterIP() const {
2385
    assert(isValid() && "Requires a valid canonical loop");
2386
    BasicBlock *After = getAfter();
2387
    return {After, After->begin()};
2388
  };
2389
 
2390
  Function *getFunction() const {
2391
    assert(isValid() && "Requires a valid canonical loop");
2392
    return Header->getParent();
2393
  }
2394
 
2395
  /// Consistency self-check.
2396
  void assertOK() const;
2397
 
2398
  /// Invalidate this loop. That is, the underlying IR does not fulfill the
2399
  /// requirements of an OpenMP canonical loop anymore.
2400
  void invalidate();
2401
};
2402
 
2403
} // end namespace llvm
2404
 
2405
#endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H