Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the OpenMPIRBuilder class and helpers used as a convenient
  10. // way to create LLVM instructions for OpenMP directives.
  11. //
  12. //===----------------------------------------------------------------------===//
  13.  
  14. #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
  15. #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
  16.  
  17. #include "llvm/Analysis/MemorySSAUpdater.h"
  18. #include "llvm/Frontend/OpenMP/OMPConstants.h"
  19. #include "llvm/IR/DebugLoc.h"
  20. #include "llvm/IR/IRBuilder.h"
  21. #include "llvm/Support/Allocator.h"
  22. #include <forward_list>
  23. #include <map>
  24. #include <optional>
  25.  
  26. namespace llvm {
  27. class CanonicalLoopInfo;
  28. struct TargetRegionEntryInfo;
  29. class OffloadEntriesInfoManager;
  30.  
  31. /// Move the instruction after an InsertPoint to the beginning of another
  32. /// BasicBlock.
  33. ///
  34. /// The instructions after \p IP are moved to the beginning of \p New which must
  35. /// not have any PHINodes. If \p CreateBranch is true, a branch instruction to
  36. /// \p New will be added such that there is no semantic change. Otherwise, the
  37. /// \p IP insert block remains degenerate and it is up to the caller to insert a
  38. /// terminator.
  39. void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New,
  40.               bool CreateBranch);
  41.  
  42. /// Splice a BasicBlock at an IRBuilder's current insertion point. Its new
  43. /// insert location will stick to after the instruction before the insertion
  44. /// point (instead of moving with the instruction the InsertPoint stores
  45. /// internally).
  46. void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch);
  47.  
  48. /// Split a BasicBlock at an InsertPoint, even if the block is degenerate
  49. /// (missing the terminator).
  50. ///
  51. /// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed
  52. /// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch
  53. /// is true, a branch to the new successor will new created such that
  54. /// semantically there is no change; otherwise the block of the insertion point
  55. /// remains degenerate and it is the caller's responsibility to insert a
  56. /// terminator. Returns the new successor block.
  57. BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch,
  58.                     llvm::Twine Name = {});
  59.  
  60. /// Split a BasicBlock at \p Builder's insertion point, even if the block is
  61. /// degenerate (missing the terminator).  Its new insert location will stick to
  62. /// after the instruction before the insertion point (instead of moving with the
  63. /// instruction the InsertPoint stores internally).
  64. BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch,
  65.                     llvm::Twine Name = {});
  66.  
  67. /// Split a BasicBlock at \p Builder's insertion point, even if the block is
  68. /// degenerate (missing the terminator).  Its new insert location will stick to
  69. /// after the instruction before the insertion point (instead of moving with the
  70. /// instruction the InsertPoint stores internally).
  71. BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name);
  72.  
  73. /// Like splitBB, but reuses the current block's name for the new name.
  74. BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
  75.                               llvm::Twine Suffix = ".split");
  76.  
  77. /// Captures attributes that affect generating LLVM-IR using the
  78. /// OpenMPIRBuilder and related classes. Note that not all attributes are
  79. /// required for all classes or functions. In some use cases the configuration
  80. /// is not necessary at all, because because the only functions that are called
  81. /// are ones that are not dependent on the configuration.
  82. class OpenMPIRBuilderConfig {
  83. public:
  84.   /// Flag for specifying if the compilation is done for embedded device code
  85.   /// or host code.
  86.   std::optional<bool> IsEmbedded;
  87.  
  88.   /// Flag for specifying if the compilation is done for an offloading target,
  89.   /// like GPU.
  90.   std::optional<bool> IsTargetCodegen;
  91.  
  92.   /// Flag for specifying weather a requires unified_shared_memory
  93.   /// directive is present or not.
  94.   std::optional<bool> HasRequiresUnifiedSharedMemory;
  95.  
  96.   // Flag for specifying if offloading is mandatory.
  97.   std::optional<bool> OpenMPOffloadMandatory;
  98.  
  99.   /// First separator used between the initial two parts of a name.
  100.   std::optional<StringRef> FirstSeparator;
  101.   /// Separator used between all of the rest consecutive parts of s name
  102.   std::optional<StringRef> Separator;
  103.  
  104.   OpenMPIRBuilderConfig() {}
  105.   OpenMPIRBuilderConfig(bool IsEmbedded, bool IsTargetCodegen,
  106.                         bool HasRequiresUnifiedSharedMemory,
  107.                         bool OpenMPOffloadMandatory)
  108.       : IsEmbedded(IsEmbedded), IsTargetCodegen(IsTargetCodegen),
  109.         HasRequiresUnifiedSharedMemory(HasRequiresUnifiedSharedMemory),
  110.         OpenMPOffloadMandatory(OpenMPOffloadMandatory) {}
  111.  
  112.   // Getters functions that assert if the required values are not present.
  113.   bool isEmbedded() const {
  114.     assert(IsEmbedded.has_value() && "IsEmbedded is not set");
  115.     return *IsEmbedded;
  116.   }
  117.  
  118.   bool isTargetCodegen() const {
  119.     assert(IsTargetCodegen.has_value() && "IsTargetCodegen is not set");
  120.     return *IsTargetCodegen;
  121.   }
  122.  
  123.   bool hasRequiresUnifiedSharedMemory() const {
  124.     assert(HasRequiresUnifiedSharedMemory.has_value() &&
  125.            "HasUnifiedSharedMemory is not set");
  126.     return *HasRequiresUnifiedSharedMemory;
  127.   }
  128.  
  129.   bool openMPOffloadMandatory() const {
  130.     assert(OpenMPOffloadMandatory.has_value() &&
  131.            "OpenMPOffloadMandatory is not set");
  132.     return *OpenMPOffloadMandatory;
  133.   }
  134.   // Returns the FirstSeparator if set, otherwise use the default
  135.   // separator depending on isTargetCodegen
  136.   StringRef firstSeparator() const {
  137.     if (FirstSeparator.has_value())
  138.       return *FirstSeparator;
  139.     if (isTargetCodegen())
  140.       return "_";
  141.     return ".";
  142.   }
  143.  
  144.   // Returns the Separator if set, otherwise use the default
  145.   // separator depending on isTargetCodegen
  146.   StringRef separator() const {
  147.     if (Separator.has_value())
  148.       return *Separator;
  149.     if (isTargetCodegen())
  150.       return "$";
  151.     return ".";
  152.   }
  153.  
  154.   void setIsEmbedded(bool Value) { IsEmbedded = Value; }
  155.   void setIsTargetCodegen(bool Value) { IsTargetCodegen = Value; }
  156.   void setHasRequiresUnifiedSharedMemory(bool Value) {
  157.     HasRequiresUnifiedSharedMemory = Value;
  158.   }
  159.   void setFirstSeparator(StringRef FS) { FirstSeparator = FS; }
  160.   void setSeparator(StringRef S) { Separator = S; }
  161. };
  162.  
  163. /// An interface to create LLVM-IR for OpenMP directives.
  164. ///
  165. /// Each OpenMP directive has a corresponding public generator method.
  166. class OpenMPIRBuilder {
  167. public:
  168.   /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
  169.   /// not have an effect on \p M (see initialize)
  170.   OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {}
  171.   ~OpenMPIRBuilder();
  172.  
  173.   /// Initialize the internal state, this will put structures types and
  174.   /// potentially other helpers into the underlying module. Must be called
  175.   /// before any other method and only once!
  176.   void initialize();
  177.  
  178.   void setConfig(OpenMPIRBuilderConfig C) { Config = C; }
  179.  
  180.   /// Finalize the underlying module, e.g., by outlining regions.
  181.   /// \param Fn                    The function to be finalized. If not used,
  182.   ///                              all functions are finalized.
  183.   void finalize(Function *Fn = nullptr);
  184.  
  185.   /// Add attributes known for \p FnID to \p Fn.
  186.   void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
  187.  
  188.   /// Type used throughout for insertion points.
  189.   using InsertPointTy = IRBuilder<>::InsertPoint;
  190.  
  191.   /// Get the create a name using the platform specific separators.
  192.   /// \param Parts parts of the final name that needs separation
  193.   /// The created name has a first separator between the first and second part
  194.   /// and a second separator between all other parts.
  195.   /// E.g. with FirstSeparator "$" and Separator "." and
  196.   /// parts: "p1", "p2", "p3", "p4"
  197.   /// The resulting name is "p1$p2.p3.p4"
  198.   /// The separators are retrieved from the OpenMPIRBuilderConfig.
  199.   std::string createPlatformSpecificName(ArrayRef<StringRef> Parts) const;
  200.  
  201.   /// Callback type for variable finalization (think destructors).
  202.   ///
  203.   /// \param CodeGenIP is the insertion point at which the finalization code
  204.   ///                  should be placed.
  205.   ///
  206.   /// A finalize callback knows about all objects that need finalization, e.g.
  207.   /// destruction, when the scope of the currently generated construct is left
  208.   /// at the time, and location, the callback is invoked.
  209.   using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
  210.  
  211.   struct FinalizationInfo {
  212.     /// The finalization callback provided by the last in-flight invocation of
  213.     /// createXXXX for the directive of kind DK.
  214.     FinalizeCallbackTy FiniCB;
  215.  
  216.     /// The directive kind of the innermost directive that has an associated
  217.     /// region which might require finalization when it is left.
  218.     omp::Directive DK;
  219.  
  220.     /// Flag to indicate if the directive is cancellable.
  221.     bool IsCancellable;
  222.   };
  223.  
  224.   /// Push a finalization callback on the finalization stack.
  225.   ///
  226.   /// NOTE: Temporary solution until Clang CG is gone.
  227.   void pushFinalizationCB(const FinalizationInfo &FI) {
  228.     FinalizationStack.push_back(FI);
  229.   }
  230.  
  231.   /// Pop the last finalization callback from the finalization stack.
  232.   ///
  233.   /// NOTE: Temporary solution until Clang CG is gone.
  234.   void popFinalizationCB() { FinalizationStack.pop_back(); }
  235.  
  236.   /// Callback type for body (=inner region) code generation
  237.   ///
  238.   /// The callback takes code locations as arguments, each describing a
  239.   /// location where additional instructions can be inserted.
  240.   ///
  241.   /// The CodeGenIP may be in the middle of a basic block or point to the end of
  242.   /// it. The basic block may have a terminator or be degenerate. The callback
  243.   /// function may just insert instructions at that position, but also split the
  244.   /// block (without the Before argument of BasicBlock::splitBasicBlock such
  245.   /// that the identify of the split predecessor block is preserved) and insert
  246.   /// additional control flow, including branches that do not lead back to what
  247.   /// follows the CodeGenIP. Note that since the callback is allowed to split
  248.   /// the block, callers must assume that InsertPoints to positions in the
  249.   /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If
  250.   /// such InsertPoints need to be preserved, it can split the block itself
  251.   /// before calling the callback.
  252.   ///
  253.   /// AllocaIP and CodeGenIP must not point to the same position.
  254.   ///
  255.   /// \param AllocaIP is the insertion point at which new alloca instructions
  256.   ///                 should be placed. The BasicBlock it is pointing to must
  257.   ///                 not be split.
  258.   /// \param CodeGenIP is the insertion point at which the body code should be
  259.   ///                  placed.
  260.   using BodyGenCallbackTy =
  261.       function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
  262.  
  263.   // This is created primarily for sections construct as llvm::function_ref
  264.   // (BodyGenCallbackTy) is not storable (as described in the comments of
  265.   // function_ref class - function_ref contains non-ownable reference
  266.   // to the callable.
  267.   using StorableBodyGenCallbackTy =
  268.       std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
  269.  
  270.   /// Callback type for loop body code generation.
  271.   ///
  272.   /// \param CodeGenIP is the insertion point where the loop's body code must be
  273.   ///                  placed. This will be a dedicated BasicBlock with a
  274.   ///                  conditional branch from the loop condition check and
  275.   ///                  terminated with an unconditional branch to the loop
  276.   ///                  latch.
  277.   /// \param IndVar    is the induction variable usable at the insertion point.
  278.   using LoopBodyGenCallbackTy =
  279.       function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
  280.  
  281.   /// Callback type for variable privatization (think copy & default
  282.   /// constructor).
  283.   ///
  284.   /// \param AllocaIP is the insertion point at which new alloca instructions
  285.   ///                 should be placed.
  286.   /// \param CodeGenIP is the insertion point at which the privatization code
  287.   ///                  should be placed.
  288.   /// \param Original The value being copied/created, should not be used in the
  289.   ///                 generated IR.
  290.   /// \param Inner The equivalent of \p Original that should be used in the
  291.   ///              generated IR; this is equal to \p Original if the value is
  292.   ///              a pointer and can thus be passed directly, otherwise it is
  293.   ///              an equivalent but different value.
  294.   /// \param ReplVal The replacement value, thus a copy or new created version
  295.   ///                of \p Inner.
  296.   ///
  297.   /// \returns The new insertion point where code generation continues and
  298.   ///          \p ReplVal the replacement value.
  299.   using PrivatizeCallbackTy = function_ref<InsertPointTy(
  300.       InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
  301.       Value &Inner, Value *&ReplVal)>;
  302.  
  303.   /// Description of a LLVM-IR insertion point (IP) and a debug/source location
  304.   /// (filename, line, column, ...).
  305.   struct LocationDescription {
  306.     LocationDescription(const IRBuilderBase &IRB)
  307.         : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
  308.     LocationDescription(const InsertPointTy &IP) : IP(IP) {}
  309.     LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
  310.         : IP(IP), DL(DL) {}
  311.     InsertPointTy IP;
  312.     DebugLoc DL;
  313.   };
  314.  
  315.   /// Emitter methods for OpenMP directives.
  316.   ///
  317.   ///{
  318.  
  319.   /// Generator for '#omp barrier'
  320.   ///
  321.   /// \param Loc The location where the barrier directive was encountered.
  322.   /// \param DK The kind of directive that caused the barrier.
  323.   /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
  324.   /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
  325.   ///                        should be checked and acted upon.
  326.   ///
  327.   /// \returns The insertion point after the barrier.
  328.   InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
  329.                               bool ForceSimpleCall = false,
  330.                               bool CheckCancelFlag = true);
  331.  
  332.   /// Generator for '#omp cancel'
  333.   ///
  334.   /// \param Loc The location where the directive was encountered.
  335.   /// \param IfCondition The evaluated 'if' clause expression, if any.
  336.   /// \param CanceledDirective The kind of directive that is cancled.
  337.   ///
  338.   /// \returns The insertion point after the barrier.
  339.   InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
  340.                              omp::Directive CanceledDirective);
  341.  
  342.   /// Generator for '#omp parallel'
  343.   ///
  344.   /// \param Loc The insert and source location description.
  345.   /// \param AllocaIP The insertion points to be used for alloca instructions.
  346.   /// \param BodyGenCB Callback that will generate the region code.
  347.   /// \param PrivCB Callback to copy a given variable (think copy constructor).
  348.   /// \param FiniCB Callback to finalize variable copies.
  349.   /// \param IfCondition The evaluated 'if' clause expression, if any.
  350.   /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
  351.   /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
  352.   /// \param IsCancellable Flag to indicate a cancellable parallel region.
  353.   ///
  354.   /// \returns The insertion position *after* the parallel.
  355.   IRBuilder<>::InsertPoint
  356.   createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
  357.                  BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
  358.                  FinalizeCallbackTy FiniCB, Value *IfCondition,
  359.                  Value *NumThreads, omp::ProcBindKind ProcBind,
  360.                  bool IsCancellable);
  361.  
  362.   /// Generator for the control flow structure of an OpenMP canonical loop.
  363.   ///
  364.   /// This generator operates on the logical iteration space of the loop, i.e.
  365.   /// the caller only has to provide a loop trip count of the loop as defined by
  366.   /// base language semantics. The trip count is interpreted as an unsigned
  367.   /// integer. The induction variable passed to \p BodyGenCB will be of the same
  368.   /// type and run from 0 to \p TripCount - 1. It is up to the callback to
  369.   /// convert the logical iteration variable to the loop counter variable in the
  370.   /// loop body.
  371.   ///
  372.   /// \param Loc       The insert and source location description. The insert
  373.   ///                  location can be between two instructions or the end of a
  374.   ///                  degenerate block (e.g. a BB under construction).
  375.   /// \param BodyGenCB Callback that will generate the loop body code.
  376.   /// \param TripCount Number of iterations the loop body is executed.
  377.   /// \param Name      Base name used to derive BB and instruction names.
  378.   ///
  379.   /// \returns An object representing the created control flow structure which
  380.   ///          can be used for loop-associated directives.
  381.   CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
  382.                                          LoopBodyGenCallbackTy BodyGenCB,
  383.                                          Value *TripCount,
  384.                                          const Twine &Name = "loop");
  385.  
  386.   /// Generator for the control flow structure of an OpenMP canonical loop.
  387.   ///
  388.   /// Instead of a logical iteration space, this allows specifying user-defined
  389.   /// loop counter values using increment, upper- and lower bounds. To
  390.   /// disambiguate the terminology when counting downwards, instead of lower
  391.   /// bounds we use \p Start for the loop counter value in the first body
  392.   /// iteration.
  393.   ///
  394.   /// Consider the following limitations:
  395.   ///
  396.   ///  * A loop counter space over all integer values of its bit-width cannot be
  397.   ///    represented. E.g using uint8_t, its loop trip count of 256 cannot be
  398.   ///    stored into an 8 bit integer):
  399.   ///
  400.   ///      DO I = 0, 255, 1
  401.   ///
  402.   ///  * Unsigned wrapping is only supported when wrapping only "once"; E.g.
  403.   ///    effectively counting downwards:
  404.   ///
  405.   ///      for (uint8_t i = 100u; i > 0; i += 127u)
  406.   ///
  407.   ///
  408.   /// TODO: May need to add additional parameters to represent:
  409.   ///
  410.   ///  * Allow representing downcounting with unsigned integers.
  411.   ///
  412.   ///  * Sign of the step and the comparison operator might disagree:
  413.   ///
  414.   ///      for (int i = 0; i < 42; i -= 1u)
  415.   ///
  416.   //
  417.   /// \param Loc       The insert and source location description.
  418.   /// \param BodyGenCB Callback that will generate the loop body code.
  419.   /// \param Start     Value of the loop counter for the first iterations.
  420.   /// \param Stop      Loop counter values past this will stop the loop.
  421.   /// \param Step      Loop counter increment after each iteration; negative
  422.   ///                  means counting down.
  423.   /// \param IsSigned  Whether Start, Stop and Step are signed integers.
  424.   /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
  425.   ///                      counter.
  426.   /// \param ComputeIP Insertion point for instructions computing the trip
  427.   ///                  count. Can be used to ensure the trip count is available
  428.   ///                  at the outermost loop of a loop nest. If not set,
  429.   ///                  defaults to the preheader of the generated loop.
  430.   /// \param Name      Base name used to derive BB and instruction names.
  431.   ///
  432.   /// \returns An object representing the created control flow structure which
  433.   ///          can be used for loop-associated directives.
  434.   CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
  435.                                          LoopBodyGenCallbackTy BodyGenCB,
  436.                                          Value *Start, Value *Stop, Value *Step,
  437.                                          bool IsSigned, bool InclusiveStop,
  438.                                          InsertPointTy ComputeIP = {},
  439.                                          const Twine &Name = "loop");
  440.  
  441.   /// Collapse a loop nest into a single loop.
  442.   ///
  443.   /// Merges loops of a loop nest into a single CanonicalLoopNest representation
  444.   /// that has the same number of innermost loop iterations as the origin loop
  445.   /// nest. The induction variables of the input loops are derived from the
  446.   /// collapsed loop's induction variable. This is intended to be used to
  447.   /// implement OpenMP's collapse clause. Before applying a directive,
  448.   /// collapseLoops normalizes a loop nest to contain only a single loop and the
  449.   /// directive's implementation does not need to handle multiple loops itself.
  450.   /// This does not remove the need to handle all loop nest handling by
  451.   /// directives, such as the ordered(<n>) clause or the simd schedule-clause
  452.   /// modifier of the worksharing-loop directive.
  453.   ///
  454.   /// Example:
  455.   /// \code
  456.   ///   for (int i = 0; i < 7; ++i) // Canonical loop "i"
  457.   ///     for (int j = 0; j < 9; ++j) // Canonical loop "j"
  458.   ///       body(i, j);
  459.   /// \endcode
  460.   ///
  461.   /// After collapsing with Loops={i,j}, the loop is changed to
  462.   /// \code
  463.   ///   for (int ij = 0; ij < 63; ++ij) {
  464.   ///     int i = ij / 9;
  465.   ///     int j = ij % 9;
  466.   ///     body(i, j);
  467.   ///   }
  468.   /// \endcode
  469.   ///
  470.   /// In the current implementation, the following limitations apply:
  471.   ///
  472.   ///  * All input loops have an induction variable of the same type.
  473.   ///
  474.   ///  * The collapsed loop will have the same trip count integer type as the
  475.   ///    input loops. Therefore it is possible that the collapsed loop cannot
  476.   ///    represent all iterations of the input loops. For instance, assuming a
  477.   ///    32 bit integer type, and two input loops both iterating 2^16 times, the
  478.   ///    theoretical trip count of the collapsed loop would be 2^32 iteration,
  479.   ///    which cannot be represented in an 32-bit integer. Behavior is undefined
  480.   ///    in this case.
  481.   ///
  482.   ///  * The trip counts of every input loop must be available at \p ComputeIP.
  483.   ///    Non-rectangular loops are not yet supported.
  484.   ///
  485.   ///  * At each nest level, code between a surrounding loop and its nested loop
  486.   ///    is hoisted into the loop body, and such code will be executed more
  487.   ///    often than before collapsing (or not at all if any inner loop iteration
  488.   ///    has a trip count of 0). This is permitted by the OpenMP specification.
  489.   ///
  490.   /// \param DL        Debug location for instructions added for collapsing,
  491.   ///                  such as instructions to compute/derive the input loop's
  492.   ///                  induction variables.
  493.   /// \param Loops     Loops in the loop nest to collapse. Loops are specified
  494.   ///                  from outermost-to-innermost and every control flow of a
  495.   ///                  loop's body must pass through its directly nested loop.
  496.   /// \param ComputeIP Where additional instruction that compute the collapsed
  497.   ///                  trip count. If not set, defaults to before the generated
  498.   ///                  loop.
  499.   ///
  500.   /// \returns The CanonicalLoopInfo object representing the collapsed loop.
  501.   CanonicalLoopInfo *collapseLoops(DebugLoc DL,
  502.                                    ArrayRef<CanonicalLoopInfo *> Loops,
  503.                                    InsertPointTy ComputeIP);
  504.  
  505. private:
  506.   /// Modifies the canonical loop to be a statically-scheduled workshare loop.
  507.   ///
  508.   /// This takes a \p LoopInfo representing a canonical loop, such as the one
  509.   /// created by \p createCanonicalLoop and emits additional instructions to
  510.   /// turn it into a workshare loop. In particular, it calls to an OpenMP
  511.   /// runtime function in the preheader to obtain the loop bounds to be used in
  512.   /// the current thread, updates the relevant instructions in the canonical
  513.   /// loop and calls to an OpenMP runtime finalization function after the loop.
  514.   ///
  515.   /// \param DL       Debug location for instructions added for the
  516.   ///                 workshare-loop construct itself.
  517.   /// \param CLI      A descriptor of the canonical loop to workshare.
  518.   /// \param AllocaIP An insertion point for Alloca instructions usable in the
  519.   ///                 preheader of the loop.
  520.   /// \param NeedsBarrier Indicates whether a barrier must be inserted after
  521.   ///                     the loop.
  522.   ///
  523.   /// \returns Point where to insert code after the workshare construct.
  524.   InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
  525.                                          InsertPointTy AllocaIP,
  526.                                          bool NeedsBarrier);
  527.  
  528.   /// Modifies the canonical loop a statically-scheduled workshare loop with a
  529.   /// user-specified chunk size.
  530.   ///
  531.   /// \param DL           Debug location for instructions added for the
  532.   ///                     workshare-loop construct itself.
  533.   /// \param CLI          A descriptor of the canonical loop to workshare.
  534.   /// \param AllocaIP     An insertion point for Alloca instructions usable in
  535.   ///                     the preheader of the loop.
  536.   /// \param NeedsBarrier Indicates whether a barrier must be inserted after the
  537.   ///                     loop.
  538.   /// \param ChunkSize    The user-specified chunk size.
  539.   ///
  540.   /// \returns Point where to insert code after the workshare construct.
  541.   InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL,
  542.                                                 CanonicalLoopInfo *CLI,
  543.                                                 InsertPointTy AllocaIP,
  544.                                                 bool NeedsBarrier,
  545.                                                 Value *ChunkSize);
  546.  
  547.   /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
  548.   ///
  549.   /// This takes a \p LoopInfo representing a canonical loop, such as the one
  550.   /// created by \p createCanonicalLoop and emits additional instructions to
  551.   /// turn it into a workshare loop. In particular, it calls to an OpenMP
  552.   /// runtime function in the preheader to obtain, and then in each iteration
  553.   /// to update the loop counter.
  554.   ///
  555.   /// \param DL       Debug location for instructions added for the
  556.   ///                 workshare-loop construct itself.
  557.   /// \param CLI      A descriptor of the canonical loop to workshare.
  558.   /// \param AllocaIP An insertion point for Alloca instructions usable in the
  559.   ///                 preheader of the loop.
  560.   /// \param SchedType Type of scheduling to be passed to the init function.
  561.   /// \param NeedsBarrier Indicates whether a barrier must be insterted after
  562.   ///                     the loop.
  563.   /// \param Chunk    The size of loop chunk considered as a unit when
  564.   ///                 scheduling. If \p nullptr, defaults to 1.
  565.   ///
  566.   /// \returns Point where to insert code after the workshare construct.
  567.   InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
  568.                                           InsertPointTy AllocaIP,
  569.                                           omp::OMPScheduleType SchedType,
  570.                                           bool NeedsBarrier,
  571.                                           Value *Chunk = nullptr);
  572.  
  573.   /// Create alternative version of the loop to support if clause
  574.   ///
  575.   /// OpenMP if clause can require to generate second loop. This loop
  576.   /// will be executed when if clause condition is not met. createIfVersion
  577.   /// adds branch instruction to the copied loop if \p  ifCond is not met.
  578.   ///
  579.   /// \param Loop       Original loop which should be versioned.
  580.   /// \param IfCond     Value which corresponds to if clause condition
  581.   /// \param VMap       Value to value map to define relation between
  582.   ///                   original and copied loop values and loop blocks.
  583.   /// \param NamePrefix Optional name prefix for if.then if.else blocks.
  584.   void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond,
  585.                        ValueToValueMapTy &VMap, const Twine &NamePrefix = "");
  586.  
  587. public:
  588.   /// Modifies the canonical loop to be a workshare loop.
  589.   ///
  590.   /// This takes a \p LoopInfo representing a canonical loop, such as the one
  591.   /// created by \p createCanonicalLoop and emits additional instructions to
  592.   /// turn it into a workshare loop. In particular, it calls to an OpenMP
  593.   /// runtime function in the preheader to obtain the loop bounds to be used in
  594.   /// the current thread, updates the relevant instructions in the canonical
  595.   /// loop and calls to an OpenMP runtime finalization function after the loop.
  596.   ///
  597.   /// The concrete transformation is done by applyStaticWorkshareLoop,
  598.   /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending
  599.   /// on the value of \p SchedKind and \p ChunkSize.
  600.   ///
  601.   /// \param DL       Debug location for instructions added for the
  602.   ///                 workshare-loop construct itself.
  603.   /// \param CLI      A descriptor of the canonical loop to workshare.
  604.   /// \param AllocaIP An insertion point for Alloca instructions usable in the
  605.   ///                 preheader of the loop.
  606.   /// \param NeedsBarrier Indicates whether a barrier must be insterted after
  607.   ///                     the loop.
  608.   /// \param SchedKind Scheduling algorithm to use.
  609.   /// \param ChunkSize The chunk size for the inner loop.
  610.   /// \param HasSimdModifier Whether the simd modifier is present in the
  611.   ///                        schedule clause.
  612.   /// \param HasMonotonicModifier Whether the monotonic modifier is present in
  613.   ///                             the schedule clause.
  614.   /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is
  615.   ///                                present in the schedule clause.
  616.   /// \param HasOrderedClause Whether the (parameterless) ordered clause is
  617.   ///                         present.
  618.   ///
  619.   /// \returns Point where to insert code after the workshare construct.
  620.   InsertPointTy applyWorkshareLoop(
  621.       DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
  622.       bool NeedsBarrier,
  623.       llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
  624.       Value *ChunkSize = nullptr, bool HasSimdModifier = false,
  625.       bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
  626.       bool HasOrderedClause = false);
  627.  
  628.   /// Tile a loop nest.
  629.   ///
  630.   /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
  631.   /// \p/ Loops must be perfectly nested, from outermost to innermost loop
  632.   /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
  633.   /// of every loop and every tile sizes must be usable in the outermost
  634.   /// loop's preheader. This implies that the loop nest is rectangular.
  635.   ///
  636.   /// Example:
  637.   /// \code
  638.   ///   for (int i = 0; i < 15; ++i) // Canonical loop "i"
  639.   ///     for (int j = 0; j < 14; ++j) // Canonical loop "j"
  640.   ///         body(i, j);
  641.   /// \endcode
  642.   ///
  643.   /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
  644.   /// \code
  645.   ///   for (int i1 = 0; i1 < 3; ++i1)
  646.   ///     for (int j1 = 0; j1 < 2; ++j1)
  647.   ///       for (int i2 = 0; i2 < 5; ++i2)
  648.   ///         for (int j2 = 0; j2 < 7; ++j2)
  649.   ///           body(i1*3+i2, j1*3+j2);
  650.   /// \endcode
  651.   ///
  652.   /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
  653.   /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
  654.   /// handles non-constant trip counts, non-constant tile sizes and trip counts
  655.   /// that are not multiples of the tile size. In the latter case the tile loop
  656.   /// of the last floor-loop iteration will have fewer iterations than specified
  657.   /// as its tile size.
  658.   ///
  659.   ///
  660.   /// @param DL        Debug location for instructions added by tiling, for
  661.   ///                  instance the floor- and tile trip count computation.
  662.   /// @param Loops     Loops to tile. The CanonicalLoopInfo objects are
  663.   ///                  invalidated by this method, i.e. should not used after
  664.   ///                  tiling.
  665.   /// @param TileSizes For each loop in \p Loops, the tile size for that
  666.   ///                  dimensions.
  667.   ///
  668.   /// \returns A list of generated loops. Contains twice as many loops as the
  669.   ///          input loop nest; the first half are the floor loops and the
  670.   ///          second half are the tile loops.
  671.   std::vector<CanonicalLoopInfo *>
  672.   tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
  673.             ArrayRef<Value *> TileSizes);
  674.  
  675.   /// Fully unroll a loop.
  676.   ///
  677.   /// Instead of unrolling the loop immediately (and duplicating its body
  678.   /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
  679.   /// metadata.
  680.   ///
  681.   /// \param DL   Debug location for instructions added by unrolling.
  682.   /// \param Loop The loop to unroll. The loop will be invalidated.
  683.   void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop);
  684.  
  685.   /// Fully or partially unroll a loop. How the loop is unrolled is determined
  686.   /// using LLVM's LoopUnrollPass.
  687.   ///
  688.   /// \param DL   Debug location for instructions added by unrolling.
  689.   /// \param Loop The loop to unroll. The loop will be invalidated.
  690.   void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop);
  691.  
  692.   /// Partially unroll a loop.
  693.   ///
  694.   /// The CanonicalLoopInfo of the unrolled loop for use with chained
  695.   /// loop-associated directive can be requested using \p UnrolledCLI. Not
  696.   /// needing the CanonicalLoopInfo allows more efficient code generation by
  697.   /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
  698.   /// A loop-associated directive applied to the unrolled loop needs to know the
  699.   /// new trip count which means that if using a heuristically determined unroll
  700.   /// factor (\p Factor == 0), that factor must be computed immediately. We are
  701.   /// using the same logic as the LoopUnrollPass to derived the unroll factor,
  702.   /// but which assumes that some canonicalization has taken place (e.g.
  703.   /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
  704.   /// better when the unrolled loop's CanonicalLoopInfo is not needed.
  705.   ///
  706.   /// \param DL          Debug location for instructions added by unrolling.
  707.   /// \param Loop        The loop to unroll. The loop will be invalidated.
  708.   /// \param Factor      The factor to unroll the loop by. A factor of 0
  709.   ///                    indicates that a heuristic should be used to determine
  710.   ///                    the unroll-factor.
  711.   /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
  712.   ///                    partially unrolled loop. Otherwise, uses loop metadata
  713.   ///                    to defer unrolling to the LoopUnrollPass.
  714.   void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
  715.                          CanonicalLoopInfo **UnrolledCLI);
  716.  
  717.   /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop
  718.   /// is cloned. The metadata which prevents vectorization is added to
  719.   /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
  720.   /// to false.
  721.   ///
  722.   /// \param Loop        The loop to simd-ize.
  723.   /// \param AlignedVars The map which containts pairs of the pointer
  724.   ///                    and its corresponding alignment.
  725.   /// \param IfCond      The value which corresponds to the if clause
  726.   ///                    condition.
  727.   /// \param Order       The enum to map order clause.
  728.   /// \param Simdlen     The Simdlen length to apply to the simd loop.
  729.   /// \param Safelen     The Safelen length to apply to the simd loop.
  730.   void applySimd(CanonicalLoopInfo *Loop,
  731.                  MapVector<Value *, Value *> AlignedVars, Value *IfCond,
  732.                  omp::OrderKind Order, ConstantInt *Simdlen,
  733.                  ConstantInt *Safelen);
  734.  
  735.   /// Generator for '#omp flush'
  736.   ///
  737.   /// \param Loc The location where the flush directive was encountered
  738.   void createFlush(const LocationDescription &Loc);
  739.  
  740.   /// Generator for '#omp taskwait'
  741.   ///
  742.   /// \param Loc The location where the taskwait directive was encountered.
  743.   void createTaskwait(const LocationDescription &Loc);
  744.  
  745.   /// Generator for '#omp taskyield'
  746.   ///
  747.   /// \param Loc The location where the taskyield directive was encountered.
  748.   void createTaskyield(const LocationDescription &Loc);
  749.  
  750.   /// A struct to pack the relevant information for an OpenMP depend clause.
  751.   struct DependData {
  752.     omp::RTLDependenceKindTy DepKind = omp::RTLDependenceKindTy::DepUnknown;
  753.     Type *DepValueType;
  754.     Value *DepVal;
  755.     explicit DependData() = default;
  756.     DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType,
  757.                Value *DepVal)
  758.         : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {}
  759.   };
  760.  
  761.   /// Generator for `#omp task`
  762.   ///
  763.   /// \param Loc The location where the task construct was encountered.
  764.   /// \param AllocaIP The insertion point to be used for alloca instructions.
  765.   /// \param BodyGenCB Callback that will generate the region code.
  766.   /// \param Tied True if the task is tied, false if the task is untied.
  767.   /// \param Final i1 value which is `true` if the task is final, `false` if the
  768.   ///              task is not final.
  769.   /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred
  770.   ///                    task is generated, and the encountering thread must
  771.   ///                    suspend the current task region, for which execution
  772.   ///                    cannot be resumed until execution of the structured
  773.   ///                    block that is associated with the generated task is
  774.   ///                    completed.
  775.   InsertPointTy createTask(const LocationDescription &Loc,
  776.                            InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
  777.                            bool Tied = true, Value *Final = nullptr,
  778.                            Value *IfCondition = nullptr,
  779.                            SmallVector<DependData> Dependencies = {});
  780.  
  781.   /// Generator for the taskgroup construct
  782.   ///
  783.   /// \param Loc The location where the taskgroup construct was encountered.
  784.   /// \param AllocaIP The insertion point to be used for alloca instructions.
  785.   /// \param BodyGenCB Callback that will generate the region code.
  786.   InsertPointTy createTaskgroup(const LocationDescription &Loc,
  787.                                 InsertPointTy AllocaIP,
  788.                                 BodyGenCallbackTy BodyGenCB);
  789.  
  790.   /// Functions used to generate reductions. Such functions take two Values
  791.   /// representing LHS and RHS of the reduction, respectively, and a reference
  792.   /// to the value that is updated to refer to the reduction result.
  793.   using ReductionGenTy =
  794.       function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>;
  795.  
  796.   /// Functions used to generate atomic reductions. Such functions take two
  797.   /// Values representing pointers to LHS and RHS of the reduction, as well as
  798.   /// the element type of these pointers. They are expected to atomically
  799.   /// update the LHS to the reduced value.
  800.   using AtomicReductionGenTy =
  801.       function_ref<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>;
  802.  
  803.   /// Information about an OpenMP reduction.
  804.   struct ReductionInfo {
  805.     ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable,
  806.                   ReductionGenTy ReductionGen,
  807.                   AtomicReductionGenTy AtomicReductionGen)
  808.         : ElementType(ElementType), Variable(Variable),
  809.           PrivateVariable(PrivateVariable), ReductionGen(ReductionGen),
  810.           AtomicReductionGen(AtomicReductionGen) {
  811.       assert(cast<PointerType>(Variable->getType())
  812.           ->isOpaqueOrPointeeTypeMatches(ElementType) && "Invalid elem type");
  813.     }
  814.  
  815.     /// Reduction element type, must match pointee type of variable.
  816.     Type *ElementType;
  817.  
  818.     /// Reduction variable of pointer type.
  819.     Value *Variable;
  820.  
  821.     /// Thread-private partial reduction variable.
  822.     Value *PrivateVariable;
  823.  
  824.     /// Callback for generating the reduction body. The IR produced by this will
  825.     /// be used to combine two values in a thread-safe context, e.g., under
  826.     /// lock or within the same thread, and therefore need not be atomic.
  827.     ReductionGenTy ReductionGen;
  828.  
  829.     /// Callback for generating the atomic reduction body, may be null. The IR
  830.     /// produced by this will be used to atomically combine two values during
  831.     /// reduction. If null, the implementation will use the non-atomic version
  832.     /// along with the appropriate synchronization mechanisms.
  833.     AtomicReductionGenTy AtomicReductionGen;
  834.   };
  835.  
  836.   // TODO: provide atomic and non-atomic reduction generators for reduction
  837.   // operators defined by the OpenMP specification.
  838.  
  839.   /// Generator for '#omp reduction'.
  840.   ///
  841.   /// Emits the IR instructing the runtime to perform the specific kind of
  842.   /// reductions. Expects reduction variables to have been privatized and
  843.   /// initialized to reduction-neutral values separately. Emits the calls to
  844.   /// runtime functions as well as the reduction function and the basic blocks
  845.   /// performing the reduction atomically and non-atomically.
  846.   ///
  847.   /// The code emitted for the following:
  848.   ///
  849.   /// \code
  850.   ///   type var_1;
  851.   ///   type var_2;
  852.   ///   #pragma omp <directive> reduction(reduction-op:var_1,var_2)
  853.   ///   /* body */;
  854.   /// \endcode
  855.   ///
  856.   /// corresponds to the following sketch.
  857.   ///
  858.   /// \code
  859.   /// void _outlined_par() {
  860.   ///   // N is the number of different reductions.
  861.   ///   void *red_array[] = {privatized_var_1, privatized_var_2, ...};
  862.   ///   switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
  863.   ///                        _omp_reduction_func,
  864.   ///                        _gomp_critical_user.reduction.var)) {
  865.   ///   case 1: {
  866.   ///     var_1 = var_1 <reduction-op> privatized_var_1;
  867.   ///     var_2 = var_2 <reduction-op> privatized_var_2;
  868.   ///     // ...
  869.   ///    __kmpc_end_reduce(...);
  870.   ///     break;
  871.   ///   }
  872.   ///   case 2: {
  873.   ///     _Atomic<ReductionOp>(var_1, privatized_var_1);
  874.   ///     _Atomic<ReductionOp>(var_2, privatized_var_2);
  875.   ///     // ...
  876.   ///     break;
  877.   ///   }
  878.   ///   default: break;
  879.   ///   }
  880.   /// }
  881.   ///
  882.   /// void _omp_reduction_func(void **lhs, void **rhs) {
  883.   ///   *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
  884.   ///   *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
  885.   ///   // ...
  886.   /// }
  887.   /// \endcode
  888.   ///
  889.   /// \param Loc                The location where the reduction was
  890.   ///                           encountered. Must be within the associate
  891.   ///                           directive and after the last local access to the
  892.   ///                           reduction variables.
  893.   /// \param AllocaIP           An insertion point suitable for allocas usable
  894.   ///                           in reductions.
  895.   /// \param ReductionInfos     A list of info on each reduction variable.
  896.   /// \param IsNoWait           A flag set if the reduction is marked as nowait.
  897.   InsertPointTy createReductions(const LocationDescription &Loc,
  898.                                  InsertPointTy AllocaIP,
  899.                                  ArrayRef<ReductionInfo> ReductionInfos,
  900.                                  bool IsNoWait = false);
  901.  
  902.   ///}
  903.  
  904.   /// Return the insertion point used by the underlying IRBuilder.
  905.   InsertPointTy getInsertionPoint() { return Builder.saveIP(); }
  906.  
  907.   /// Update the internal location to \p Loc.
  908.   bool updateToLocation(const LocationDescription &Loc) {
  909.     Builder.restoreIP(Loc.IP);
  910.     Builder.SetCurrentDebugLocation(Loc.DL);
  911.     return Loc.IP.getBlock() != nullptr;
  912.   }
  913.  
  914.   /// Return the function declaration for the runtime function with \p FnID.
  915.   FunctionCallee getOrCreateRuntimeFunction(Module &M,
  916.                                             omp::RuntimeFunction FnID);
  917.  
  918.   Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID);
  919.  
  920.   /// Return the (LLVM-IR) string describing the source location \p LocStr.
  921.   Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize);
  922.  
  923.   /// Return the (LLVM-IR) string describing the default source location.
  924.   Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize);
  925.  
  926.   /// Return the (LLVM-IR) string describing the source location identified by
  927.   /// the arguments.
  928.   Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
  929.                                  unsigned Line, unsigned Column,
  930.                                  uint32_t &SrcLocStrSize);
  931.  
  932.   /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
  933.   /// fallback if \p DL does not specify the function name.
  934.   Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize,
  935.                                  Function *F = nullptr);
  936.  
  937.   /// Return the (LLVM-IR) string describing the source location \p Loc.
  938.   Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
  939.                                  uint32_t &SrcLocStrSize);
  940.  
  941.   /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
  942.   /// TODO: Create a enum class for the Reserve2Flags
  943.   Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize,
  944.                              omp::IdentFlag Flags = omp::IdentFlag(0),
  945.                              unsigned Reserve2Flags = 0);
  946.  
  947.   /// Create a hidden global flag \p Name in the module with initial value \p
  948.   /// Value.
  949.   GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
  950.  
  951.   /// Create an offloading section struct used to register this global at
  952.   /// runtime.
  953.   ///
  954.   /// Type struct __tgt_offload_entry{
  955.   ///   void    *addr;      // Pointer to the offload entry info.
  956.   ///                       // (function or global)
  957.   ///   char    *name;      // Name of the function or global.
  958.   ///   size_t  size;       // Size of the entry info (0 if it a function).
  959.   ///   int32_t flags;
  960.   ///   int32_t reserved;
  961.   /// };
  962.   ///
  963.   /// \param Addr The pointer to the global being registered.
  964.   /// \param Name The symbol name associated with the global.
  965.   /// \param Size The size in bytes of the global (0 for functions).
  966.   /// \param Flags Flags associated with the entry.
  967.   /// \param SectionName The section this entry will be placed at.
  968.   void emitOffloadingEntry(Constant *Addr, StringRef Name, uint64_t Size,
  969.                            int32_t Flags,
  970.                            StringRef SectionName = "omp_offloading_entries");
  971.  
  972.   /// Generate control flow and cleanup for cancellation.
  973.   ///
  974.   /// \param CancelFlag Flag indicating if the cancellation is performed.
  975.   /// \param CanceledDirective The kind of directive that is cancled.
  976.   /// \param ExitCB Extra code to be generated in the exit block.
  977.   void emitCancelationCheckImpl(Value *CancelFlag,
  978.                                 omp::Directive CanceledDirective,
  979.                                 FinalizeCallbackTy ExitCB = {});
  980.  
  981.   /// Generate a target region entry call.
  982.   ///
  983.   /// \param Loc The location at which the request originated and is fulfilled.
  984.   /// \param Return Return value of the created function returned by reference.
  985.   /// \param DeviceID Identifier for the device via the 'device' clause.
  986.   /// \param NumTeams Numer of teams for the region via the 'num_teams' clause
  987.   ///                 or 0 if unspecified and -1 if there is no 'teams' clause.
  988.   /// \param NumThreads Number of threads via the 'thread_limit' clause.
  989.   /// \param HostPtr Pointer to the host-side pointer of the target kernel.
  990.   /// \param KernelArgs Array of arguments to the kernel.
  991.   InsertPointTy emitTargetKernel(const LocationDescription &Loc, Value *&Return,
  992.                                  Value *Ident, Value *DeviceID, Value *NumTeams,
  993.                                  Value *NumThreads, Value *HostPtr,
  994.                                  ArrayRef<Value *> KernelArgs);
  995.  
  996.   /// Generate a barrier runtime call.
  997.   ///
  998.   /// \param Loc The location at which the request originated and is fulfilled.
  999.   /// \param DK The directive which caused the barrier
  1000.   /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
  1001.   /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
  1002.   ///                        should be checked and acted upon.
  1003.   ///
  1004.   /// \returns The insertion point after the barrier.
  1005.   InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
  1006.                                 omp::Directive DK, bool ForceSimpleCall,
  1007.                                 bool CheckCancelFlag);
  1008.  
  1009.   /// Generate a flush runtime call.
  1010.   ///
  1011.   /// \param Loc The location at which the request originated and is fulfilled.
  1012.   void emitFlush(const LocationDescription &Loc);
  1013.  
  1014.   /// The finalization stack made up of finalize callbacks currently in-flight,
  1015.   /// wrapped into FinalizationInfo objects that reference also the finalization
  1016.   /// target block and the kind of cancellable directive.
  1017.   SmallVector<FinalizationInfo, 8> FinalizationStack;
  1018.  
  1019.   /// Return true if the last entry in the finalization stack is of kind \p DK
  1020.   /// and cancellable.
  1021.   bool isLastFinalizationInfoCancellable(omp::Directive DK) {
  1022.     return !FinalizationStack.empty() &&
  1023.            FinalizationStack.back().IsCancellable &&
  1024.            FinalizationStack.back().DK == DK;
  1025.   }
  1026.  
  1027.   /// Generate a taskwait runtime call.
  1028.   ///
  1029.   /// \param Loc The location at which the request originated and is fulfilled.
  1030.   void emitTaskwaitImpl(const LocationDescription &Loc);
  1031.  
  1032.   /// Generate a taskyield runtime call.
  1033.   ///
  1034.   /// \param Loc The location at which the request originated and is fulfilled.
  1035.   void emitTaskyieldImpl(const LocationDescription &Loc);
  1036.  
  1037.   /// Return the current thread ID.
  1038.   ///
  1039.   /// \param Ident The ident (ident_t*) describing the query origin.
  1040.   Value *getOrCreateThreadID(Value *Ident);
  1041.  
  1042.   /// The OpenMPIRBuilder Configuration
  1043.   OpenMPIRBuilderConfig Config;
  1044.  
  1045.   /// The underlying LLVM-IR module
  1046.   Module &M;
  1047.  
  1048.   /// The LLVM-IR Builder used to create IR.
  1049.   IRBuilder<> Builder;
  1050.  
  1051.   /// Map to remember source location strings
  1052.   StringMap<Constant *> SrcLocStrMap;
  1053.  
  1054.   /// Map to remember existing ident_t*.
  1055.   DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap;
  1056.  
  1057.   /// Helper that contains information about regions we need to outline
  1058.   /// during finalization.
  1059.   struct OutlineInfo {
  1060.     using PostOutlineCBTy = std::function<void(Function &)>;
  1061.     PostOutlineCBTy PostOutlineCB;
  1062.     BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB;
  1063.     SmallVector<Value *, 2> ExcludeArgsFromAggregate;
  1064.  
  1065.     /// Collect all blocks in between EntryBB and ExitBB in both the given
  1066.     /// vector and set.
  1067.     void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
  1068.                        SmallVectorImpl<BasicBlock *> &BlockVector);
  1069.  
  1070.     /// Return the function that contains the region to be outlined.
  1071.     Function *getFunction() const { return EntryBB->getParent(); }
  1072.   };
  1073.  
  1074.   /// Collection of regions that need to be outlined during finalization.
  1075.   SmallVector<OutlineInfo, 16> OutlineInfos;
  1076.  
  1077.   /// Collection of owned canonical loop objects that eventually need to be
  1078.   /// free'd.
  1079.   std::forward_list<CanonicalLoopInfo> LoopInfos;
  1080.  
  1081.   /// Add a new region that will be outlined later.
  1082.   void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
  1083.  
  1084.   /// An ordered map of auto-generated variables to their unique names.
  1085.   /// It stores variables with the following names: 1) ".gomp_critical_user_" +
  1086.   /// <critical_section_name> + ".var" for "omp critical" directives; 2)
  1087.   /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
  1088.   /// variables.
  1089.   StringMap<Constant*, BumpPtrAllocator> InternalVars;
  1090.  
  1091.   /// Create the global variable holding the offload mappings information.
  1092.   GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
  1093.                                         std::string VarName);
  1094.  
  1095.   /// Create the global variable holding the offload names information.
  1096.   GlobalVariable *
  1097.   createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
  1098.                         std::string VarName);
  1099.  
  1100.   struct MapperAllocas {
  1101.     AllocaInst *ArgsBase = nullptr;
  1102.     AllocaInst *Args = nullptr;
  1103.     AllocaInst *ArgSizes = nullptr;
  1104.   };
  1105.  
  1106.   /// Create the allocas instruction used in call to mapper functions.
  1107.   void createMapperAllocas(const LocationDescription &Loc,
  1108.                            InsertPointTy AllocaIP, unsigned NumOperands,
  1109.                            struct MapperAllocas &MapperAllocas);
  1110.  
  1111.   /// Create the call for the target mapper function.
  1112.   /// \param Loc The source location description.
  1113.   /// \param MapperFunc Function to be called.
  1114.   /// \param SrcLocInfo Source location information global.
  1115.   /// \param MaptypesArg The argument types.
  1116.   /// \param MapnamesArg The argument names.
  1117.   /// \param MapperAllocas The AllocaInst used for the call.
  1118.   /// \param DeviceID Device ID for the call.
  1119.   /// \param NumOperands Number of operands in the call.
  1120.   void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
  1121.                       Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
  1122.                       struct MapperAllocas &MapperAllocas, int64_t DeviceID,
  1123.                       unsigned NumOperands);
  1124.  
  1125.   /// Container for the arguments used to pass data to the runtime library.
  1126.   struct TargetDataRTArgs {
  1127.     explicit TargetDataRTArgs() {}
  1128.     /// The array of base pointer passed to the runtime library.
  1129.     Value *BasePointersArray = nullptr;
  1130.     /// The array of section pointers passed to the runtime library.
  1131.     Value *PointersArray = nullptr;
  1132.     /// The array of sizes passed to the runtime library.
  1133.     Value *SizesArray = nullptr;
  1134.     /// The array of map types passed to the runtime library for the beginning
  1135.     /// of the region or for the entire region if there are no separate map
  1136.     /// types for the region end.
  1137.     Value *MapTypesArray = nullptr;
  1138.     /// The array of map types passed to the runtime library for the end of the
  1139.     /// region, or nullptr if there are no separate map types for the region
  1140.     /// end.
  1141.     Value *MapTypesArrayEnd = nullptr;
  1142.     /// The array of user-defined mappers passed to the runtime library.
  1143.     Value *MappersArray = nullptr;
  1144.     /// The array of original declaration names of mapped pointers sent to the
  1145.     /// runtime library for debugging
  1146.     Value *MapNamesArray = nullptr;
  1147.   };
  1148.  
  1149.   /// Struct that keeps the information that should be kept throughout
  1150.   /// a 'target data' region.
  1151.   class TargetDataInfo {
  1152.     /// Set to true if device pointer information have to be obtained.
  1153.     bool RequiresDevicePointerInfo = false;
  1154.     /// Set to true if Clang emits separate runtime calls for the beginning and
  1155.     /// end of the region.  These calls might have separate map type arrays.
  1156.     bool SeparateBeginEndCalls = false;
  1157.  
  1158.   public:
  1159.     TargetDataRTArgs RTArgs;
  1160.  
  1161.     /// Indicate whether any user-defined mapper exists.
  1162.     bool HasMapper = false;
  1163.     /// The total number of pointers passed to the runtime library.
  1164.     unsigned NumberOfPtrs = 0u;
  1165.  
  1166.     explicit TargetDataInfo() {}
  1167.     explicit TargetDataInfo(bool RequiresDevicePointerInfo,
  1168.                             bool SeparateBeginEndCalls)
  1169.         : RequiresDevicePointerInfo(RequiresDevicePointerInfo),
  1170.           SeparateBeginEndCalls(SeparateBeginEndCalls) {}
  1171.     /// Clear information about the data arrays.
  1172.     void clearArrayInfo() {
  1173.       RTArgs = TargetDataRTArgs();
  1174.       HasMapper = false;
  1175.       NumberOfPtrs = 0u;
  1176.     }
  1177.     /// Return true if the current target data information has valid arrays.
  1178.     bool isValid() {
  1179.       return RTArgs.BasePointersArray && RTArgs.PointersArray &&
  1180.              RTArgs.SizesArray && RTArgs.MapTypesArray &&
  1181.              (!HasMapper || RTArgs.MappersArray) && NumberOfPtrs;
  1182.     }
  1183.     bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
  1184.     bool separateBeginEndCalls() { return SeparateBeginEndCalls; }
  1185.   };
  1186.  
  1187.   /// Emit the arguments to be passed to the runtime library based on the
  1188.   /// arrays of base pointers, pointers, sizes, map types, and mappers.  If
  1189.   /// ForEndCall, emit map types to be passed for the end of the region instead
  1190.   /// of the beginning.
  1191.   void emitOffloadingArraysArgument(IRBuilderBase &Builder,
  1192.                                     OpenMPIRBuilder::TargetDataRTArgs &RTArgs,
  1193.                                     OpenMPIRBuilder::TargetDataInfo &Info,
  1194.                                     bool EmitDebug = false,
  1195.                                     bool ForEndCall = false);
  1196.  
  1197.   /// Creates offloading entry for the provided entry ID \a ID, address \a
  1198.   /// Addr, size \a Size, and flags \a Flags.
  1199.   void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
  1200.                           int32_t Flags, GlobalValue::LinkageTypes);
  1201.  
  1202.   /// The kind of errors that can occur when emitting the offload entries and
  1203.   /// metadata.
  1204.   enum EmitMetadataErrorKind {
  1205.     EMIT_MD_TARGET_REGION_ERROR,
  1206.     EMIT_MD_DECLARE_TARGET_ERROR,
  1207.     EMIT_MD_GLOBAL_VAR_LINK_ERROR
  1208.   };
  1209.  
  1210.   /// Callback function type
  1211.   using EmitMetadataErrorReportFunctionTy =
  1212.       std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>;
  1213.  
  1214.   // Emit the offloading entries and metadata so that the device codegen side
  1215.   // can easily figure out what to emit. The produced metadata looks like
  1216.   // this:
  1217.   //
  1218.   // !omp_offload.info = !{!1, ...}
  1219.   //
  1220.   // We only generate metadata for function that contain target regions.
  1221.   void createOffloadEntriesAndInfoMetadata(
  1222.       OffloadEntriesInfoManager &OffloadEntriesInfoManager,
  1223.       EmitMetadataErrorReportFunctionTy &ErrorReportFunction);
  1224.  
  1225. public:
  1226.   /// Generator for __kmpc_copyprivate
  1227.   ///
  1228.   /// \param Loc The source location description.
  1229.   /// \param BufSize Number of elements in the buffer.
  1230.   /// \param CpyBuf List of pointers to data to be copied.
  1231.   /// \param CpyFn function to call for copying data.
  1232.   /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
  1233.   ///
  1234.   /// \return The insertion position *after* the CopyPrivate call.
  1235.  
  1236.   InsertPointTy createCopyPrivate(const LocationDescription &Loc,
  1237.                                   llvm::Value *BufSize, llvm::Value *CpyBuf,
  1238.                                   llvm::Value *CpyFn, llvm::Value *DidIt);
  1239.  
  1240.   /// Generator for '#omp single'
  1241.   ///
  1242.   /// \param Loc The source location description.
  1243.   /// \param BodyGenCB Callback that will generate the region code.
  1244.   /// \param FiniCB Callback to finalize variable copies.
  1245.   /// \param IsNowait If false, a barrier is emitted.
  1246.   /// \param DidIt Local variable used as a flag to indicate 'single' thread
  1247.   ///
  1248.   /// \returns The insertion position *after* the single call.
  1249.   InsertPointTy createSingle(const LocationDescription &Loc,
  1250.                              BodyGenCallbackTy BodyGenCB,
  1251.                              FinalizeCallbackTy FiniCB, bool IsNowait,
  1252.                              llvm::Value *DidIt);
  1253.  
  1254.   /// Generator for '#omp master'
  1255.   ///
  1256.   /// \param Loc The insert and source location description.
  1257.   /// \param BodyGenCB Callback that will generate the region code.
  1258.   /// \param FiniCB Callback to finalize variable copies.
  1259.   ///
  1260.   /// \returns The insertion position *after* the master.
  1261.   InsertPointTy createMaster(const LocationDescription &Loc,
  1262.                              BodyGenCallbackTy BodyGenCB,
  1263.                              FinalizeCallbackTy FiniCB);
  1264.  
  1265.   /// Generator for '#omp masked'
  1266.   ///
  1267.   /// \param Loc The insert and source location description.
  1268.   /// \param BodyGenCB Callback that will generate the region code.
  1269.   /// \param FiniCB Callback to finialize variable copies.
  1270.   ///
  1271.   /// \returns The insertion position *after* the masked.
  1272.   InsertPointTy createMasked(const LocationDescription &Loc,
  1273.                              BodyGenCallbackTy BodyGenCB,
  1274.                              FinalizeCallbackTy FiniCB, Value *Filter);
  1275.  
  1276.   /// Generator for '#omp critical'
  1277.   ///
  1278.   /// \param Loc The insert and source location description.
  1279.   /// \param BodyGenCB Callback that will generate the region body code.
  1280.   /// \param FiniCB Callback to finalize variable copies.
  1281.   /// \param CriticalName name of the lock used by the critical directive
  1282.   /// \param HintInst Hint Instruction for hint clause associated with critical
  1283.   ///
  1284.   /// \returns The insertion position *after* the critical.
  1285.   InsertPointTy createCritical(const LocationDescription &Loc,
  1286.                                BodyGenCallbackTy BodyGenCB,
  1287.                                FinalizeCallbackTy FiniCB,
  1288.                                StringRef CriticalName, Value *HintInst);
  1289.  
  1290.   /// Generator for '#omp ordered depend (source | sink)'
  1291.   ///
  1292.   /// \param Loc The insert and source location description.
  1293.   /// \param AllocaIP The insertion point to be used for alloca instructions.
  1294.   /// \param NumLoops The number of loops in depend clause.
  1295.   /// \param StoreValues The value will be stored in vector address.
  1296.   /// \param Name The name of alloca instruction.
  1297.   /// \param IsDependSource If true, depend source; otherwise, depend sink.
  1298.   ///
  1299.   /// \return The insertion position *after* the ordered.
  1300.   InsertPointTy createOrderedDepend(const LocationDescription &Loc,
  1301.                                     InsertPointTy AllocaIP, unsigned NumLoops,
  1302.                                     ArrayRef<llvm::Value *> StoreValues,
  1303.                                     const Twine &Name, bool IsDependSource);
  1304.  
  1305.   /// Generator for '#omp ordered [threads | simd]'
  1306.   ///
  1307.   /// \param Loc The insert and source location description.
  1308.   /// \param BodyGenCB Callback that will generate the region code.
  1309.   /// \param FiniCB Callback to finalize variable copies.
  1310.   /// \param IsThreads If true, with threads clause or without clause;
  1311.   /// otherwise, with simd clause;
  1312.   ///
  1313.   /// \returns The insertion position *after* the ordered.
  1314.   InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc,
  1315.                                          BodyGenCallbackTy BodyGenCB,
  1316.                                          FinalizeCallbackTy FiniCB,
  1317.                                          bool IsThreads);
  1318.  
  1319.   /// Generator for '#omp sections'
  1320.   ///
  1321.   /// \param Loc The insert and source location description.
  1322.   /// \param AllocaIP The insertion points to be used for alloca instructions.
  1323.   /// \param SectionCBs Callbacks that will generate body of each section.
  1324.   /// \param PrivCB Callback to copy a given variable (think copy constructor).
  1325.   /// \param FiniCB Callback to finalize variable copies.
  1326.   /// \param IsCancellable Flag to indicate a cancellable parallel region.
  1327.   /// \param IsNowait If true, barrier - to ensure all sections are executed
  1328.   /// before moving forward will not be generated.
  1329.   /// \returns The insertion position *after* the sections.
  1330.   InsertPointTy createSections(const LocationDescription &Loc,
  1331.                                InsertPointTy AllocaIP,
  1332.                                ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
  1333.                                PrivatizeCallbackTy PrivCB,
  1334.                                FinalizeCallbackTy FiniCB, bool IsCancellable,
  1335.                                bool IsNowait);
  1336.  
  1337.   /// Generator for '#omp section'
  1338.   ///
  1339.   /// \param Loc The insert and source location description.
  1340.   /// \param BodyGenCB Callback that will generate the region body code.
  1341.   /// \param FiniCB Callback to finalize variable copies.
  1342.   /// \returns The insertion position *after* the section.
  1343.   InsertPointTy createSection(const LocationDescription &Loc,
  1344.                               BodyGenCallbackTy BodyGenCB,
  1345.                               FinalizeCallbackTy FiniCB);
  1346.  
  1347.   /// Generate conditional branch and relevant BasicBlocks through which private
  1348.   /// threads copy the 'copyin' variables from Master copy to threadprivate
  1349.   /// copies.
  1350.   ///
  1351.   /// \param IP insertion block for copyin conditional
  1352.   /// \param MasterVarPtr a pointer to the master variable
  1353.   /// \param PrivateVarPtr a pointer to the threadprivate variable
  1354.   /// \param IntPtrTy Pointer size type
  1355.   /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
  1356.   //                             and copy.in.end block
  1357.   ///
  1358.   /// \returns The insertion point where copying operation to be emitted.
  1359.   InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr,
  1360.                                          Value *PrivateAddr,
  1361.                                          llvm::IntegerType *IntPtrTy,
  1362.                                          bool BranchtoEnd = true);
  1363.  
  1364.   /// Create a runtime call for kmpc_Alloc
  1365.   ///
  1366.   /// \param Loc The insert and source location description.
  1367.   /// \param Size Size of allocated memory space
  1368.   /// \param Allocator Allocator information instruction
  1369.   /// \param Name Name of call Instruction for OMP_alloc
  1370.   ///
  1371.   /// \returns CallInst to the OMP_Alloc call
  1372.   CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
  1373.                            Value *Allocator, std::string Name = "");
  1374.  
  1375.   /// Create a runtime call for kmpc_free
  1376.   ///
  1377.   /// \param Loc The insert and source location description.
  1378.   /// \param Addr Address of memory space to be freed
  1379.   /// \param Allocator Allocator information instruction
  1380.   /// \param Name Name of call Instruction for OMP_Free
  1381.   ///
  1382.   /// \returns CallInst to the OMP_Free call
  1383.   CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
  1384.                           Value *Allocator, std::string Name = "");
  1385.  
  1386.   /// Create a runtime call for kmpc_threadprivate_cached
  1387.   ///
  1388.   /// \param Loc The insert and source location description.
  1389.   /// \param Pointer pointer to data to be cached
  1390.   /// \param Size size of data to be cached
  1391.   /// \param Name Name of call Instruction for callinst
  1392.   ///
  1393.   /// \returns CallInst to the thread private cache call.
  1394.   CallInst *createCachedThreadPrivate(const LocationDescription &Loc,
  1395.                                       llvm::Value *Pointer,
  1396.                                       llvm::ConstantInt *Size,
  1397.                                       const llvm::Twine &Name = Twine(""));
  1398.  
  1399.   /// Create a runtime call for __tgt_interop_init
  1400.   ///
  1401.   /// \param Loc The insert and source location description.
  1402.   /// \param InteropVar variable to be allocated
  1403.   /// \param InteropType type of interop operation
  1404.   /// \param Device devide to which offloading will occur
  1405.   /// \param NumDependences  number of dependence variables
  1406.   /// \param DependenceAddress pointer to dependence variables
  1407.   /// \param HaveNowaitClause does nowait clause exist
  1408.   ///
  1409.   /// \returns CallInst to the __tgt_interop_init call
  1410.   CallInst *createOMPInteropInit(const LocationDescription &Loc,
  1411.                                  Value *InteropVar,
  1412.                                  omp::OMPInteropType InteropType, Value *Device,
  1413.                                  Value *NumDependences,
  1414.                                  Value *DependenceAddress,
  1415.                                  bool HaveNowaitClause);
  1416.  
  1417.   /// Create a runtime call for __tgt_interop_destroy
  1418.   ///
  1419.   /// \param Loc The insert and source location description.
  1420.   /// \param InteropVar variable to be allocated
  1421.   /// \param Device devide to which offloading will occur
  1422.   /// \param NumDependences  number of dependence variables
  1423.   /// \param DependenceAddress pointer to dependence variables
  1424.   /// \param HaveNowaitClause does nowait clause exist
  1425.   ///
  1426.   /// \returns CallInst to the __tgt_interop_destroy call
  1427.   CallInst *createOMPInteropDestroy(const LocationDescription &Loc,
  1428.                                     Value *InteropVar, Value *Device,
  1429.                                     Value *NumDependences,
  1430.                                     Value *DependenceAddress,
  1431.                                     bool HaveNowaitClause);
  1432.  
  1433.   /// Create a runtime call for __tgt_interop_use
  1434.   ///
  1435.   /// \param Loc The insert and source location description.
  1436.   /// \param InteropVar variable to be allocated
  1437.   /// \param Device devide to which offloading will occur
  1438.   /// \param NumDependences  number of dependence variables
  1439.   /// \param DependenceAddress pointer to dependence variables
  1440.   /// \param HaveNowaitClause does nowait clause exist
  1441.   ///
  1442.   /// \returns CallInst to the __tgt_interop_use call
  1443.   CallInst *createOMPInteropUse(const LocationDescription &Loc,
  1444.                                 Value *InteropVar, Value *Device,
  1445.                                 Value *NumDependences, Value *DependenceAddress,
  1446.                                 bool HaveNowaitClause);
  1447.  
  1448.   /// The `omp target` interface
  1449.   ///
  1450.   /// For more information about the usage of this interface,
  1451.   /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
  1452.   ///
  1453.   ///{
  1454.  
  1455.   /// Create a runtime call for kmpc_target_init
  1456.   ///
  1457.   /// \param Loc The insert and source location description.
  1458.   /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
  1459.   InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD);
  1460.  
  1461.   /// Create a runtime call for kmpc_target_deinit
  1462.   ///
  1463.   /// \param Loc The insert and source location description.
  1464.   /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
  1465.   void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD);
  1466.  
  1467.   ///}
  1468.  
  1469. private:
  1470.   // Sets the function attributes expected for the outlined function
  1471.   void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn,
  1472.                                                  int32_t NumTeams,
  1473.                                                  int32_t NumThreads);
  1474.  
  1475.   // Creates the function ID/Address for the given outlined function.
  1476.   // In the case of an embedded device function the address of the function is
  1477.   // used, in the case of a non-offload function a constant is created.
  1478.   Constant *createOutlinedFunctionID(Function *OutlinedFn,
  1479.                                      StringRef EntryFnIDName);
  1480.  
  1481.   // Creates the region entry address for the outlined function
  1482.   Constant *createTargetRegionEntryAddr(Function *OutlinedFunction,
  1483.                                         StringRef EntryFnName);
  1484.  
  1485. public:
  1486.   /// Functions used to generate a function with the given name.
  1487.   using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>;
  1488.  
  1489.   /// Create a unique name for the entry function using the source location
  1490.   /// information of the current target region. The name will be something like:
  1491.   ///
  1492.   /// __omp_offloading_DD_FFFF_PP_lBB[_CC]
  1493.   ///
  1494.   /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
  1495.   /// mangled name of the function that encloses the target region and BB is the
  1496.   /// line number of the target region. CC is a count added when more than one
  1497.   /// region is located at the same location.
  1498.   ///
  1499.   /// If this target outline function is not an offload entry, we don't need to
  1500.   /// register it. This may happen if it is guarded by an if clause that is
  1501.   /// false at compile time, or no target archs have been specified.
  1502.   ///
  1503.   /// The created target region ID is used by the runtime library to identify
  1504.   /// the current target region, so it only has to be unique and not
  1505.   /// necessarily point to anything. It could be the pointer to the outlined
  1506.   /// function that implements the target region, but we aren't using that so
  1507.   /// that the compiler doesn't need to keep that, and could therefore inline
  1508.   /// the host function if proven worthwhile during optimization. In the other
  1509.   /// hand, if emitting code for the device, the ID has to be the function
  1510.   /// address so that it can retrieved from the offloading entry and launched
  1511.   /// by the runtime library. We also mark the outlined function to have
  1512.   /// external linkage in case we are emitting code for the device, because
  1513.   /// these functions will be entry points to the device.
  1514.   ///
  1515.   /// \param InfoManager The info manager keeping track of the offload entries
  1516.   /// \param EntryInfo The entry information about the function
  1517.   /// \param GenerateFunctionCallback The callback function to generate the code
  1518.   /// \param NumTeams Number default teams
  1519.   /// \param NumThreads Number default threads
  1520.   /// \param OutlinedFunction Pointer to the outlined function
  1521.   /// \param EntryFnIDName Name of the ID o be created
  1522.   void emitTargetRegionFunction(OffloadEntriesInfoManager &InfoManager,
  1523.                                 TargetRegionEntryInfo &EntryInfo,
  1524.                                 FunctionGenCallback &GenerateFunctionCallback,
  1525.                                 int32_t NumTeams, int32_t NumThreads,
  1526.                                 bool IsOffloadEntry, Function *&OutlinedFn,
  1527.                                 Constant *&OutlinedFnID);
  1528.  
  1529.   /// Registers the given function and sets up the attribtues of the function
  1530.   /// Returns the FunctionID.
  1531.   ///
  1532.   /// \param InfoManager The info manager keeping track of the offload entries
  1533.   /// \param EntryInfo The entry information about the function
  1534.   /// \param OutlinedFunction Pointer to the outlined function
  1535.   /// \param EntryFnName Name of the outlined function
  1536.   /// \param EntryFnIDName Name of the ID o be created
  1537.   /// \param NumTeams Number default teams
  1538.   /// \param NumThreads Number default threads
  1539.   Constant *registerTargetRegionFunction(OffloadEntriesInfoManager &InfoManager,
  1540.                                          TargetRegionEntryInfo &EntryInfo,
  1541.                                          Function *OutlinedFunction,
  1542.                                          StringRef EntryFnName,
  1543.                                          StringRef EntryFnIDName,
  1544.                                          int32_t NumTeams, int32_t NumThreads);
  1545.  
  1546.   /// Declarations for LLVM-IR types (simple, array, function and structure) are
  1547.   /// generated below. Their names are defined and used in OpenMPKinds.def. Here
  1548.   /// we provide the declarations, the initializeTypes function will provide the
  1549.   /// values.
  1550.   ///
  1551.   ///{
  1552. #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
  1553. #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize)                             \
  1554.   ArrayType *VarName##Ty = nullptr;                                            \
  1555.   PointerType *VarName##PtrTy = nullptr;
  1556. #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...)                  \
  1557.   FunctionType *VarName = nullptr;                                             \
  1558.   PointerType *VarName##Ptr = nullptr;
  1559. #define OMP_STRUCT_TYPE(VarName, StrName, ...)                                 \
  1560.   StructType *VarName = nullptr;                                               \
  1561.   PointerType *VarName##Ptr = nullptr;
  1562. #include "llvm/Frontend/OpenMP/OMPKinds.def"
  1563.  
  1564.   ///}
  1565.  
  1566. private:
  1567.   /// Create all simple and struct types exposed by the runtime and remember
  1568.   /// the llvm::PointerTypes of them for easy access later.
  1569.   void initializeTypes(Module &M);
  1570.  
  1571.   /// Common interface for generating entry calls for OMP Directives.
  1572.   /// if the directive has a region/body, It will set the insertion
  1573.   /// point to the body
  1574.   ///
  1575.   /// \param OMPD Directive to generate entry blocks for
  1576.   /// \param EntryCall Call to the entry OMP Runtime Function
  1577.   /// \param ExitBB block where the region ends.
  1578.   /// \param Conditional indicate if the entry call result will be used
  1579.   ///        to evaluate a conditional of whether a thread will execute
  1580.   ///        body code or not.
  1581.   ///
  1582.   /// \return The insertion position in exit block
  1583.   InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
  1584.                                          BasicBlock *ExitBB,
  1585.                                          bool Conditional = false);
  1586.  
  1587.   /// Common interface to finalize the region
  1588.   ///
  1589.   /// \param OMPD Directive to generate exiting code for
  1590.   /// \param FinIP Insertion point for emitting Finalization code and exit call
  1591.   /// \param ExitCall Call to the ending OMP Runtime Function
  1592.   /// \param HasFinalize indicate if the directive will require finalization
  1593.   ///         and has a finalization callback in the stack that
  1594.   ///        should be called.
  1595.   ///
  1596.   /// \return The insertion position in exit block
  1597.   InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
  1598.                                         InsertPointTy FinIP,
  1599.                                         Instruction *ExitCall,
  1600.                                         bool HasFinalize = true);
  1601.  
  1602.   /// Common Interface to generate OMP inlined regions
  1603.   ///
  1604.   /// \param OMPD Directive to generate inlined region for
  1605.   /// \param EntryCall Call to the entry OMP Runtime Function
  1606.   /// \param ExitCall Call to the ending OMP Runtime Function
  1607.   /// \param BodyGenCB Body code generation callback.
  1608.   /// \param FiniCB Finalization Callback. Will be called when finalizing region
  1609.   /// \param Conditional indicate if the entry call result will be used
  1610.   ///        to evaluate a conditional of whether a thread will execute
  1611.   ///        body code or not.
  1612.   /// \param HasFinalize indicate if the directive will require finalization
  1613.   ///        and has a finalization callback in the stack that
  1614.   ///        should be called.
  1615.   /// \param IsCancellable if HasFinalize is set to true, indicate if the
  1616.   ///        the directive should be cancellable.
  1617.   /// \return The insertion point after the region
  1618.  
  1619.   InsertPointTy
  1620.   EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
  1621.                        Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
  1622.                        FinalizeCallbackTy FiniCB, bool Conditional = false,
  1623.                        bool HasFinalize = true, bool IsCancellable = false);
  1624.  
  1625.   /// Get the platform-specific name separator.
  1626.   /// \param Parts different parts of the final name that needs separation
  1627.   /// \param FirstSeparator First separator used between the initial two
  1628.   ///        parts of the name.
  1629.   /// \param Separator separator used between all of the rest consecutive
  1630.   ///        parts of the name
  1631.   static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
  1632.                                            StringRef FirstSeparator,
  1633.                                            StringRef Separator);
  1634.  
  1635.   /// Returns corresponding lock object for the specified critical region
  1636.   /// name. If the lock object does not exist it is created, otherwise the
  1637.   /// reference to the existing copy is returned.
  1638.   /// \param CriticalName Name of the critical region.
  1639.   ///
  1640.   Value *getOMPCriticalRegionLock(StringRef CriticalName);
  1641.  
  1642.   /// Callback type for Atomic Expression update
  1643.   /// ex:
  1644.   /// \code{.cpp}
  1645.   /// unsigned x = 0;
  1646.   /// #pragma omp atomic update
  1647.   /// x = Expr(x_old);  //Expr() is any legal operation
  1648.   /// \endcode
  1649.   ///
  1650.   /// \param XOld the value of the atomic memory address to use for update
  1651.   /// \param IRB reference to the IRBuilder to use
  1652.   ///
  1653.   /// \returns Value to update X to.
  1654.   using AtomicUpdateCallbackTy =
  1655.       const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
  1656.  
  1657. private:
  1658.   enum AtomicKind { Read, Write, Update, Capture, Compare };
  1659.  
  1660.   /// Determine whether to emit flush or not
  1661.   ///
  1662.   /// \param Loc    The insert and source location description.
  1663.   /// \param AO     The required atomic ordering
  1664.   /// \param AK     The OpenMP atomic operation kind used.
  1665.   ///
  1666.   /// \returns          wether a flush was emitted or not
  1667.   bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
  1668.                                     AtomicOrdering AO, AtomicKind AK);
  1669.  
  1670.   /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
  1671.   /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
  1672.   /// Only Scalar data types.
  1673.   ///
  1674.   /// \param AllocaIP     The insertion point to be used for alloca
  1675.   ///                   instructions.
  1676.   /// \param X                      The target atomic pointer to be updated
  1677.   /// \param XElemTy    The element type of the atomic pointer.
  1678.   /// \param Expr                   The value to update X with.
  1679.   /// \param AO                     Atomic ordering of the generated atomic
  1680.   ///                   instructions.
  1681.   /// \param RMWOp                The binary operation used for update. If
  1682.   ///                   operation is not supported by atomicRMW,
  1683.   ///                   or belong to {FADD, FSUB, BAD_BINOP}.
  1684.   ///                   Then a `cmpExch` based  atomic will be generated.
  1685.   /// \param UpdateOp   Code generator for complex expressions that cannot be
  1686.   ///                   expressed through atomicrmw instruction.
  1687.   /// \param VolatileX       true if \a X volatile?
  1688.   /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
  1689.   ///                     update expression, false otherwise.
  1690.   ///                     (e.g. true for X = X BinOp Expr)
  1691.   ///
  1692.   /// \returns A pair of the old value of X before the update, and the value
  1693.   ///          used for the update.
  1694.   std::pair<Value *, Value *>
  1695.   emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
  1696.                    AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
  1697.                    AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
  1698.                    bool IsXBinopExpr);
  1699.  
  1700.   /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
  1701.   ///
  1702.   /// \Return The instruction
  1703.   Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
  1704.                                 AtomicRMWInst::BinOp RMWOp);
  1705.  
  1706. public:
  1707.   /// a struct to pack relevant information while generating atomic Ops
  1708.   struct AtomicOpValue {
  1709.     Value *Var = nullptr;
  1710.     Type *ElemTy = nullptr;
  1711.     bool IsSigned = false;
  1712.     bool IsVolatile = false;
  1713.   };
  1714.  
  1715.   /// Emit atomic Read for : V = X --- Only Scalar data types.
  1716.   ///
  1717.   /// \param Loc    The insert and source location description.
  1718.   /// \param X                  The target pointer to be atomically read
  1719.   /// \param V                  Memory address where to store atomically read
  1720.   ///                                       value
  1721.   /// \param AO                 Atomic ordering of the generated atomic
  1722.   ///                                       instructions.
  1723.   ///
  1724.   /// \return Insertion point after generated atomic read IR.
  1725.   InsertPointTy createAtomicRead(const LocationDescription &Loc,
  1726.                                  AtomicOpValue &X, AtomicOpValue &V,
  1727.                                  AtomicOrdering AO);
  1728.  
  1729.   /// Emit atomic write for : X = Expr --- Only Scalar data types.
  1730.   ///
  1731.   /// \param Loc    The insert and source location description.
  1732.   /// \param X                  The target pointer to be atomically written to
  1733.   /// \param Expr               The value to store.
  1734.   /// \param AO                 Atomic ordering of the generated atomic
  1735.   ///               instructions.
  1736.   ///
  1737.   /// \return Insertion point after generated atomic Write IR.
  1738.   InsertPointTy createAtomicWrite(const LocationDescription &Loc,
  1739.                                   AtomicOpValue &X, Value *Expr,
  1740.                                   AtomicOrdering AO);
  1741.  
  1742.   /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
  1743.   /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
  1744.   /// Only Scalar data types.
  1745.   ///
  1746.   /// \param Loc      The insert and source location description.
  1747.   /// \param AllocaIP The insertion point to be used for alloca instructions.
  1748.   /// \param X        The target atomic pointer to be updated
  1749.   /// \param Expr     The value to update X with.
  1750.   /// \param AO       Atomic ordering of the generated atomic instructions.
  1751.   /// \param RMWOp    The binary operation used for update. If operation
  1752.   ///                 is        not supported by atomicRMW, or belong to
  1753.   ///                   {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
  1754.   ///                 atomic will be generated.
  1755.   /// \param UpdateOp   Code generator for complex expressions that cannot be
  1756.   ///                   expressed through atomicrmw instruction.
  1757.   /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
  1758.   ///                     update expression, false otherwise.
  1759.   ///                       (e.g. true for X = X BinOp Expr)
  1760.   ///
  1761.   /// \return Insertion point after generated atomic update IR.
  1762.   InsertPointTy createAtomicUpdate(const LocationDescription &Loc,
  1763.                                    InsertPointTy AllocaIP, AtomicOpValue &X,
  1764.                                    Value *Expr, AtomicOrdering AO,
  1765.                                    AtomicRMWInst::BinOp RMWOp,
  1766.                                    AtomicUpdateCallbackTy &UpdateOp,
  1767.                                    bool IsXBinopExpr);
  1768.  
  1769.   /// Emit atomic update for constructs: --- Only Scalar data types
  1770.   /// V = X; X = X BinOp Expr ,
  1771.   /// X = X BinOp Expr; V = X,
  1772.   /// V = X; X = Expr BinOp X,
  1773.   /// X = Expr BinOp X; V = X,
  1774.   /// V = X; X = UpdateOp(X),
  1775.   /// X = UpdateOp(X); V = X,
  1776.   ///
  1777.   /// \param Loc        The insert and source location description.
  1778.   /// \param AllocaIP   The insertion point to be used for alloca instructions.
  1779.   /// \param X          The target atomic pointer to be updated
  1780.   /// \param V          Memory address where to store captured value
  1781.   /// \param Expr       The value to update X with.
  1782.   /// \param AO         Atomic ordering of the generated atomic instructions
  1783.   /// \param RMWOp      The binary operation used for update. If
  1784.   ///                   operation is not supported by atomicRMW, or belong to
  1785.   ///                     {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
  1786.   ///                   atomic will be generated.
  1787.   /// \param UpdateOp   Code generator for complex expressions that cannot be
  1788.   ///                   expressed through atomicrmw instruction.
  1789.   /// \param UpdateExpr true if X is an in place update of the form
  1790.   ///                   X = X BinOp Expr or X = Expr BinOp X
  1791.   /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the
  1792.   ///                     update expression, false otherwise.
  1793.   ///                     (e.g. true for X = X BinOp Expr)
  1794.   /// \param IsPostfixUpdate true if original value of 'x' must be stored in
  1795.   ///                        'v', not an updated one.
  1796.   ///
  1797.   /// \return Insertion point after generated atomic capture IR.
  1798.   InsertPointTy
  1799.   createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP,
  1800.                       AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
  1801.                       AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
  1802.                       AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
  1803.                       bool IsPostfixUpdate, bool IsXBinopExpr);
  1804.  
  1805.   /// Emit atomic compare for constructs: --- Only scalar data types
  1806.   /// cond-expr-stmt:
  1807.   /// x = x ordop expr ? expr : x;
  1808.   /// x = expr ordop x ? expr : x;
  1809.   /// x = x == e ? d : x;
  1810.   /// x = e == x ? d : x; (this one is not in the spec)
  1811.   /// cond-update-stmt:
  1812.   /// if (x ordop expr) { x = expr; }
  1813.   /// if (expr ordop x) { x = expr; }
  1814.   /// if (x == e) { x = d; }
  1815.   /// if (e == x) { x = d; } (this one is not in the spec)
  1816.   /// conditional-update-capture-atomic:
  1817.   /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false)
  1818.   /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false)
  1819.   /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false,
  1820.   ///                                         IsFailOnly=true)
  1821.   /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false)
  1822.   /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false,
  1823.   ///                                                IsFailOnly=true)
  1824.   ///
  1825.   /// \param Loc          The insert and source location description.
  1826.   /// \param X            The target atomic pointer to be updated.
  1827.   /// \param V            Memory address where to store captured value (for
  1828.   ///                     compare capture only).
  1829.   /// \param R            Memory address where to store comparison result
  1830.   ///                     (for compare capture with '==' only).
  1831.   /// \param E            The expected value ('e') for forms that use an
  1832.   ///                     equality comparison or an expression ('expr') for
  1833.   ///                     forms that use 'ordop' (logically an atomic maximum or
  1834.   ///                     minimum).
  1835.   /// \param D            The desired value for forms that use an equality
  1836.   ///                     comparison. If forms that use 'ordop', it should be
  1837.   ///                     \p nullptr.
  1838.   /// \param AO           Atomic ordering of the generated atomic instructions.
  1839.   /// \param Op           Atomic compare operation. It can only be ==, <, or >.
  1840.   /// \param IsXBinopExpr True if the conditional statement is in the form where
  1841.   ///                     x is on LHS. It only matters for < or >.
  1842.   /// \param IsPostfixUpdate  True if original value of 'x' must be stored in
  1843.   ///                         'v', not an updated one (for compare capture
  1844.   ///                         only).
  1845.   /// \param IsFailOnly   True if the original value of 'x' is stored to 'v'
  1846.   ///                     only when the comparison fails. This is only valid for
  1847.   ///                     the case the comparison is '=='.
  1848.   ///
  1849.   /// \return Insertion point after generated atomic capture IR.
  1850.   InsertPointTy
  1851.   createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X,
  1852.                       AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D,
  1853.                       AtomicOrdering AO, omp::OMPAtomicCompareOp Op,
  1854.                       bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
  1855.  
  1856.   /// Create the control flow structure of a canonical OpenMP loop.
  1857.   ///
  1858.   /// The emitted loop will be disconnected, i.e. no edge to the loop's
  1859.   /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
  1860.   /// IRBuilder location is not preserved.
  1861.   ///
  1862.   /// \param DL        DebugLoc used for the instructions in the skeleton.
  1863.   /// \param TripCount Value to be used for the trip count.
  1864.   /// \param F         Function in which to insert the BasicBlocks.
  1865.   /// \param PreInsertBefore  Where to insert BBs that execute before the body,
  1866.   ///                         typically the body itself.
  1867.   /// \param PostInsertBefore Where to insert BBs that execute after the body.
  1868.   /// \param Name      Base name used to derive BB
  1869.   ///                  and instruction names.
  1870.   ///
  1871.   /// \returns The CanonicalLoopInfo that represents the emitted loop.
  1872.   CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount,
  1873.                                         Function *F,
  1874.                                         BasicBlock *PreInsertBefore,
  1875.                                         BasicBlock *PostInsertBefore,
  1876.                                         const Twine &Name = {});
  1877.   /// OMP Offload Info Metadata name string
  1878.   const std::string ompOffloadInfoName = "omp_offload.info";
  1879.  
  1880.   /// Loads all the offload entries information from the host IR
  1881.   /// metadata. This function is only meant to be used with device code
  1882.   /// generation.
  1883.   ///
  1884.   /// \param M         Module to load Metadata info from. Module passed maybe
  1885.   /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module.
  1886.   /// \param OffloadEntriesInfoManager Initialize Offload Entry information.
  1887.   void
  1888.   loadOffloadInfoMetadata(Module &M,
  1889.                           OffloadEntriesInfoManager &OffloadEntriesInfoManager);
  1890.  
  1891.   /// Gets (if variable with the given name already exist) or creates
  1892.   /// internal global variable with the specified Name. The created variable has
  1893.   /// linkage CommonLinkage by default and is initialized by null value.
  1894.   /// \param Ty Type of the global variable. If it is exist already the type
  1895.   /// must be the same.
  1896.   /// \param Name Name of the variable.
  1897.   GlobalVariable *getOrCreateInternalVariable(Type *Ty, const StringRef &Name,
  1898.                                               unsigned AddressSpace = 0);
  1899. };
  1900.  
  1901. /// Data structure to contain the information needed to uniquely identify
  1902. /// a target entry.
  1903. struct TargetRegionEntryInfo {
  1904.   std::string ParentName;
  1905.   unsigned DeviceID;
  1906.   unsigned FileID;
  1907.   unsigned Line;
  1908.   unsigned Count;
  1909.  
  1910.   TargetRegionEntryInfo()
  1911.       : ParentName(""), DeviceID(0), FileID(0), Line(0), Count(0) {}
  1912.   TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID,
  1913.                         unsigned FileID, unsigned Line, unsigned Count = 0)
  1914.       : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line),
  1915.         Count(Count) {}
  1916.  
  1917.   static void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
  1918.                                          StringRef ParentName,
  1919.                                          unsigned DeviceID, unsigned FileID,
  1920.                                          unsigned Line, unsigned Count);
  1921.  
  1922.   bool operator<(const TargetRegionEntryInfo RHS) const {
  1923.     return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) <
  1924.            std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line,
  1925.                            RHS.Count);
  1926.   }
  1927. };
  1928.  
  1929. /// Class that manages information about offload code regions and data
  1930. class OffloadEntriesInfoManager {
  1931.   /// Number of entries registered so far.
  1932.   OpenMPIRBuilderConfig Config;
  1933.   unsigned OffloadingEntriesNum = 0;
  1934.  
  1935. public:
  1936.   void setConfig(OpenMPIRBuilderConfig C) { Config = C; }
  1937.  
  1938.   /// Base class of the entries info.
  1939.   class OffloadEntryInfo {
  1940.   public:
  1941.     /// Kind of a given entry.
  1942.     enum OffloadingEntryInfoKinds : unsigned {
  1943.       /// Entry is a target region.
  1944.       OffloadingEntryInfoTargetRegion = 0,
  1945.       /// Entry is a declare target variable.
  1946.       OffloadingEntryInfoDeviceGlobalVar = 1,
  1947.       /// Invalid entry info.
  1948.       OffloadingEntryInfoInvalid = ~0u
  1949.     };
  1950.  
  1951.   protected:
  1952.     OffloadEntryInfo() = delete;
  1953.     explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {}
  1954.     explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
  1955.                               uint32_t Flags)
  1956.         : Flags(Flags), Order(Order), Kind(Kind) {}
  1957.     ~OffloadEntryInfo() = default;
  1958.  
  1959.   public:
  1960.     bool isValid() const { return Order != ~0u; }
  1961.     unsigned getOrder() const { return Order; }
  1962.     OffloadingEntryInfoKinds getKind() const { return Kind; }
  1963.     uint32_t getFlags() const { return Flags; }
  1964.     void setFlags(uint32_t NewFlags) { Flags = NewFlags; }
  1965.     Constant *getAddress() const { return cast_or_null<Constant>(Addr); }
  1966.     void setAddress(Constant *V) {
  1967.       assert(!Addr.pointsToAliveValue() && "Address has been set before!");
  1968.       Addr = V;
  1969.     }
  1970.     static bool classof(const OffloadEntryInfo *Info) { return true; }
  1971.  
  1972.   private:
  1973.     /// Address of the entity that has to be mapped for offloading.
  1974.     WeakTrackingVH Addr;
  1975.  
  1976.     /// Flags associated with the device global.
  1977.     uint32_t Flags = 0u;
  1978.  
  1979.     /// Order this entry was emitted.
  1980.     unsigned Order = ~0u;
  1981.  
  1982.     OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid;
  1983.   };
  1984.  
  1985.   /// Return true if a there are no entries defined.
  1986.   bool empty() const;
  1987.   /// Return number of entries defined so far.
  1988.   unsigned size() const { return OffloadingEntriesNum; }
  1989.  
  1990.   OffloadEntriesInfoManager() : Config() {}
  1991.  
  1992.   //
  1993.   // Target region entries related.
  1994.   //
  1995.  
  1996.   /// Kind of the target registry entry.
  1997.   enum OMPTargetRegionEntryKind : uint32_t {
  1998.     /// Mark the entry as target region.
  1999.     OMPTargetRegionEntryTargetRegion = 0x0,
  2000.     /// Mark the entry as a global constructor.
  2001.     OMPTargetRegionEntryCtor = 0x02,
  2002.     /// Mark the entry as a global destructor.
  2003.     OMPTargetRegionEntryDtor = 0x04,
  2004.   };
  2005.  
  2006.   /// Target region entries info.
  2007.   class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo {
  2008.     /// Address that can be used as the ID of the entry.
  2009.     Constant *ID = nullptr;
  2010.  
  2011.   public:
  2012.     OffloadEntryInfoTargetRegion()
  2013.         : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {}
  2014.     explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr,
  2015.                                           Constant *ID,
  2016.                                           OMPTargetRegionEntryKind Flags)
  2017.         : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags),
  2018.           ID(ID) {
  2019.       setAddress(Addr);
  2020.     }
  2021.  
  2022.     Constant *getID() const { return ID; }
  2023.     void setID(Constant *V) {
  2024.       assert(!ID && "ID has been set before!");
  2025.       ID = V;
  2026.     }
  2027.     static bool classof(const OffloadEntryInfo *Info) {
  2028.       return Info->getKind() == OffloadingEntryInfoTargetRegion;
  2029.     }
  2030.   };
  2031.  
  2032.   /// Initialize target region entry.
  2033.   /// This is ONLY needed for DEVICE compilation.
  2034.   void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo,
  2035.                                        unsigned Order);
  2036.   /// Register target region entry.
  2037.   void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
  2038.                                      Constant *Addr, Constant *ID,
  2039.                                      OMPTargetRegionEntryKind Flags);
  2040.   /// Return true if a target region entry with the provided information
  2041.   /// exists.
  2042.   bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
  2043.                                 bool IgnoreAddressId = false) const;
  2044.  
  2045.   // Return the Name based on \a EntryInfo using the next available Count.
  2046.   void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
  2047.                                   const TargetRegionEntryInfo &EntryInfo);
  2048.  
  2049.   /// brief Applies action \a Action on all registered entries.
  2050.   typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo,
  2051.                             const OffloadEntryInfoTargetRegion &)>
  2052.       OffloadTargetRegionEntryInfoActTy;
  2053.   void
  2054.   actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action);
  2055.  
  2056.   //
  2057.   // Device global variable entries related.
  2058.   //
  2059.  
  2060.   /// Kind of the global variable entry..
  2061.   enum OMPTargetGlobalVarEntryKind : uint32_t {
  2062.     /// Mark the entry as a to declare target.
  2063.     OMPTargetGlobalVarEntryTo = 0x0,
  2064.     /// Mark the entry as a to declare target link.
  2065.     OMPTargetGlobalVarEntryLink = 0x1,
  2066.   };
  2067.  
  2068.   /// Device global variable entries info.
  2069.   class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo {
  2070.     /// Type of the global variable.
  2071.     int64_t VarSize;
  2072.     GlobalValue::LinkageTypes Linkage;
  2073.  
  2074.   public:
  2075.     OffloadEntryInfoDeviceGlobalVar()
  2076.         : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {}
  2077.     explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order,
  2078.                                              OMPTargetGlobalVarEntryKind Flags)
  2079.         : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {}
  2080.     explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr,
  2081.                                              int64_t VarSize,
  2082.                                              OMPTargetGlobalVarEntryKind Flags,
  2083.                                              GlobalValue::LinkageTypes Linkage)
  2084.         : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags),
  2085.           VarSize(VarSize), Linkage(Linkage) {
  2086.       setAddress(Addr);
  2087.     }
  2088.  
  2089.     int64_t getVarSize() const { return VarSize; }
  2090.     void setVarSize(int64_t Size) { VarSize = Size; }
  2091.     GlobalValue::LinkageTypes getLinkage() const { return Linkage; }
  2092.     void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; }
  2093.     static bool classof(const OffloadEntryInfo *Info) {
  2094.       return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar;
  2095.     }
  2096.   };
  2097.  
  2098.   /// Initialize device global variable entry.
  2099.   /// This is ONLY used for DEVICE compilation.
  2100.   void initializeDeviceGlobalVarEntryInfo(StringRef Name,
  2101.                                           OMPTargetGlobalVarEntryKind Flags,
  2102.                                           unsigned Order);
  2103.  
  2104.   /// Register device global variable entry.
  2105.   void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr,
  2106.                                         int64_t VarSize,
  2107.                                         OMPTargetGlobalVarEntryKind Flags,
  2108.                                         GlobalValue::LinkageTypes Linkage);
  2109.   /// Checks if the variable with the given name has been registered already.
  2110.   bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const {
  2111.     return OffloadEntriesDeviceGlobalVar.count(VarName) > 0;
  2112.   }
  2113.   /// Applies action \a Action on all registered entries.
  2114.   typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)>
  2115.       OffloadDeviceGlobalVarEntryInfoActTy;
  2116.   void actOnDeviceGlobalVarEntriesInfo(
  2117.       const OffloadDeviceGlobalVarEntryInfoActTy &Action);
  2118.  
  2119. private:
  2120.   /// Return the count of entries at a particular source location.
  2121.   unsigned
  2122.   getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const;
  2123.  
  2124.   /// Update the count of entries at a particular source location.
  2125.   void
  2126.   incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo);
  2127.  
  2128.   static TargetRegionEntryInfo
  2129.   getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) {
  2130.     return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID,
  2131.                                  EntryInfo.FileID, EntryInfo.Line, 0);
  2132.   }
  2133.  
  2134.   // Count of entries at a location.
  2135.   std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount;
  2136.  
  2137.   // Storage for target region entries kind.
  2138.   typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion>
  2139.       OffloadEntriesTargetRegionTy;
  2140.   OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
  2141.   /// Storage for device global variable entries kind. The storage is to be
  2142.   /// indexed by mangled name.
  2143.   typedef StringMap<OffloadEntryInfoDeviceGlobalVar>
  2144.       OffloadEntriesDeviceGlobalVarTy;
  2145.   OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar;
  2146. };
  2147.  
  2148. /// Class to represented the control flow structure of an OpenMP canonical loop.
  2149. ///
  2150. /// The control-flow structure is standardized for easy consumption by
  2151. /// directives associated with loops. For instance, the worksharing-loop
  2152. /// construct may change this control flow such that each loop iteration is
  2153. /// executed on only one thread. The constraints of a canonical loop in brief
  2154. /// are:
  2155. ///
  2156. ///  * The number of loop iterations must have been computed before entering the
  2157. ///    loop.
  2158. ///
  2159. ///  * Has an (unsigned) logical induction variable that starts at zero and
  2160. ///    increments by one.
  2161. ///
  2162. ///  * The loop's CFG itself has no side-effects. The OpenMP specification
  2163. ///    itself allows side-effects, but the order in which they happen, including
  2164. ///    how often or whether at all, is unspecified. We expect that the frontend
  2165. ///    will emit those side-effect instructions somewhere (e.g. before the loop)
  2166. ///    such that the CanonicalLoopInfo itself can be side-effect free.
  2167. ///
  2168. /// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
  2169. /// execution of a loop body that satifies these constraints. It does NOT
  2170. /// represent arbitrary SESE regions that happen to contain a loop. Do not use
  2171. /// CanonicalLoopInfo for such purposes.
  2172. ///
  2173. /// The control flow can be described as follows:
  2174. ///
  2175. ///     Preheader
  2176. ///        |
  2177. ///  /-> Header
  2178. ///  |     |
  2179. ///  |    Cond---\
  2180. ///  |     |     |
  2181. ///  |    Body   |
  2182. ///  |    | |    |
  2183. ///  |   <...>   |
  2184. ///  |    | |    |
  2185. ///   \--Latch   |
  2186. ///              |
  2187. ///             Exit
  2188. ///              |
  2189. ///            After
  2190. ///
  2191. /// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
  2192. /// including) and end at AfterIP (at the After's first instruction, excluding).
  2193. /// That is, instructions in the Preheader and After blocks (except the
  2194. /// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
  2195. /// side-effects. Typically, the Preheader is used to compute the loop's trip
  2196. /// count. The instructions from BodyIP (at the Body block's first instruction,
  2197. /// excluding) until the Latch are also considered outside CanonicalLoopInfo's
  2198. /// control and thus can have side-effects. The body block is the single entry
  2199. /// point into the loop body, which may contain arbitrary control flow as long
  2200. /// as all control paths eventually branch to the Latch block.
  2201. ///
  2202. /// TODO: Consider adding another standardized BasicBlock between Body CFG and
  2203. /// Latch to guarantee that there is only a single edge to the latch. It would
  2204. /// make loop transformations easier to not needing to consider multiple
  2205. /// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
  2206. /// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
  2207. /// executes after each body iteration.
  2208. ///
  2209. /// There must be no loop-carried dependencies through llvm::Values. This is
  2210. /// equivalant to that the Latch has no PHINode and the Header's only PHINode is
  2211. /// for the induction variable.
  2212. ///
  2213. /// All code in Header, Cond, Latch and Exit (plus the terminator of the
  2214. /// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
  2215. /// by assertOK(). They are expected to not be modified unless explicitly
  2216. /// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
  2217. /// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
  2218. /// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
  2219. /// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
  2220. /// anymore as its underlying control flow may not exist anymore.
  2221. /// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
  2222. /// may also return a new CanonicalLoopInfo that can be passed to other
  2223. /// loop-associated construct implementing methods. These loop-transforming
  2224. /// methods may either create a new CanonicalLoopInfo usually using
  2225. /// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
  2226. /// modify one of the input CanonicalLoopInfo and return it as representing the
  2227. /// modified loop. What is done is an implementation detail of
  2228. /// transformation-implementing method and callers should always assume that the
  2229. /// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
  2230. /// Returned CanonicalLoopInfo have the same structure and guarantees as the one
  2231. /// created by createCanonicalLoop, such that transforming methods do not have
  2232. /// to special case where the CanonicalLoopInfo originated from.
  2233. ///
  2234. /// Generally, methods consuming CanonicalLoopInfo do not need an
  2235. /// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
  2236. /// CanonicalLoopInfo to insert new or modify existing instructions. Unless
  2237. /// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
  2238. /// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
  2239. /// any InsertPoint in the Preheader, After or Block can still be used after
  2240. /// calling such a method.
  2241. ///
  2242. /// TODO: Provide mechanisms for exception handling and cancellation points.
  2243. ///
  2244. /// Defined outside OpenMPIRBuilder because nested classes cannot be
  2245. /// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
  2246. class CanonicalLoopInfo {
  2247.   friend class OpenMPIRBuilder;
  2248.  
  2249. private:
  2250.   BasicBlock *Header = nullptr;
  2251.   BasicBlock *Cond = nullptr;
  2252.   BasicBlock *Latch = nullptr;
  2253.   BasicBlock *Exit = nullptr;
  2254.  
  2255.   /// Add the control blocks of this loop to \p BBs.
  2256.   ///
  2257.   /// This does not include any block from the body, including the one returned
  2258.   /// by getBody().
  2259.   ///
  2260.   /// FIXME: This currently includes the Preheader and After blocks even though
  2261.   /// their content is (mostly) not under CanonicalLoopInfo's control.
  2262.   /// Re-evaluated whether this makes sense.
  2263.   void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
  2264.  
  2265.   /// Sets the number of loop iterations to the given value. This value must be
  2266.   /// valid in the condition block (i.e., defined in the preheader) and is
  2267.   /// interpreted as an unsigned integer.
  2268.   void setTripCount(Value *TripCount);
  2269.  
  2270.   /// Replace all uses of the canonical induction variable in the loop body with
  2271.   /// a new one.
  2272.   ///
  2273.   /// The intended use case is to update the induction variable for an updated
  2274.   /// iteration space such that it can stay normalized in the 0...tripcount-1
  2275.   /// range.
  2276.   ///
  2277.   /// The \p Updater is called with the (presumable updated) current normalized
  2278.   /// induction variable and is expected to return the value that uses of the
  2279.   /// pre-updated induction values should use instead, typically dependent on
  2280.   /// the new induction variable. This is a lambda (instead of e.g. just passing
  2281.   /// the new value) to be able to distinguish the uses of the pre-updated
  2282.   /// induction variable and uses of the induction varible to compute the
  2283.   /// updated induction variable value.
  2284.   void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater);
  2285.  
  2286. public:
  2287.   /// Returns whether this object currently represents the IR of a loop. If
  2288.   /// returning false, it may have been consumed by a loop transformation or not
  2289.   /// been intialized. Do not use in this case;
  2290.   bool isValid() const { return Header; }
  2291.  
  2292.   /// The preheader ensures that there is only a single edge entering the loop.
  2293.   /// Code that must be execute before any loop iteration can be emitted here,
  2294.   /// such as computing the loop trip count and begin lifetime markers. Code in
  2295.   /// the preheader is not considered part of the canonical loop.
  2296.   BasicBlock *getPreheader() const;
  2297.  
  2298.   /// The header is the entry for each iteration. In the canonical control flow,
  2299.   /// it only contains the PHINode for the induction variable.
  2300.   BasicBlock *getHeader() const {
  2301.     assert(isValid() && "Requires a valid canonical loop");
  2302.     return Header;
  2303.   }
  2304.  
  2305.   /// The condition block computes whether there is another loop iteration. If
  2306.   /// yes, branches to the body; otherwise to the exit block.
  2307.   BasicBlock *getCond() const {
  2308.     assert(isValid() && "Requires a valid canonical loop");
  2309.     return Cond;
  2310.   }
  2311.  
  2312.   /// The body block is the single entry for a loop iteration and not controlled
  2313.   /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
  2314.   /// eventually branch to the \p Latch block.
  2315.   BasicBlock *getBody() const {
  2316.     assert(isValid() && "Requires a valid canonical loop");
  2317.     return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0);
  2318.   }
  2319.  
  2320.   /// Reaching the latch indicates the end of the loop body code. In the
  2321.   /// canonical control flow, it only contains the increment of the induction
  2322.   /// variable.
  2323.   BasicBlock *getLatch() const {
  2324.     assert(isValid() && "Requires a valid canonical loop");
  2325.     return Latch;
  2326.   }
  2327.  
  2328.   /// Reaching the exit indicates no more iterations are being executed.
  2329.   BasicBlock *getExit() const {
  2330.     assert(isValid() && "Requires a valid canonical loop");
  2331.     return Exit;
  2332.   }
  2333.  
  2334.   /// The after block is intended for clean-up code such as lifetime end
  2335.   /// markers. It is separate from the exit block to ensure, analogous to the
  2336.   /// preheader, it having just a single entry edge and being free from PHI
  2337.   /// nodes should there be multiple loop exits (such as from break
  2338.   /// statements/cancellations).
  2339.   BasicBlock *getAfter() const {
  2340.     assert(isValid() && "Requires a valid canonical loop");
  2341.     return Exit->getSingleSuccessor();
  2342.   }
  2343.  
  2344.   /// Returns the llvm::Value containing the number of loop iterations. It must
  2345.   /// be valid in the preheader and always interpreted as an unsigned integer of
  2346.   /// any bit-width.
  2347.   Value *getTripCount() const {
  2348.     assert(isValid() && "Requires a valid canonical loop");
  2349.     Instruction *CmpI = &Cond->front();
  2350.     assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
  2351.     return CmpI->getOperand(1);
  2352.   }
  2353.  
  2354.   /// Returns the instruction representing the current logical induction
  2355.   /// variable. Always unsigned, always starting at 0 with an increment of one.
  2356.   Instruction *getIndVar() const {
  2357.     assert(isValid() && "Requires a valid canonical loop");
  2358.     Instruction *IndVarPHI = &Header->front();
  2359.     assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
  2360.     return IndVarPHI;
  2361.   }
  2362.  
  2363.   /// Return the type of the induction variable (and the trip count).
  2364.   Type *getIndVarType() const {
  2365.     assert(isValid() && "Requires a valid canonical loop");
  2366.     return getIndVar()->getType();
  2367.   }
  2368.  
  2369.   /// Return the insertion point for user code before the loop.
  2370.   OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
  2371.     assert(isValid() && "Requires a valid canonical loop");
  2372.     BasicBlock *Preheader = getPreheader();
  2373.     return {Preheader, std::prev(Preheader->end())};
  2374.   };
  2375.  
  2376.   /// Return the insertion point for user code in the body.
  2377.   OpenMPIRBuilder::InsertPointTy getBodyIP() const {
  2378.     assert(isValid() && "Requires a valid canonical loop");
  2379.     BasicBlock *Body = getBody();
  2380.     return {Body, Body->begin()};
  2381.   };
  2382.  
  2383.   /// Return the insertion point for user code after the loop.
  2384.   OpenMPIRBuilder::InsertPointTy getAfterIP() const {
  2385.     assert(isValid() && "Requires a valid canonical loop");
  2386.     BasicBlock *After = getAfter();
  2387.     return {After, After->begin()};
  2388.   };
  2389.  
  2390.   Function *getFunction() const {
  2391.     assert(isValid() && "Requires a valid canonical loop");
  2392.     return Header->getParent();
  2393.   }
  2394.  
  2395.   /// Consistency self-check.
  2396.   void assertOK() const;
  2397.  
  2398.   /// Invalidate this loop. That is, the underlying IR does not fulfill the
  2399.   /// requirements of an OpenMP canonical loop anymore.
  2400.   void invalidate();
  2401. };
  2402.  
  2403. } // end namespace llvm
  2404.  
  2405. #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
  2406.