Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. //===-BlockGenerators.h - Helper to generate code for statements-*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the BlockGenerator and VectorBlockGenerator classes, which
  10. // generate sequential code and vectorized code for a polyhedral statement,
  11. // respectively.
  12. //
  13. //===----------------------------------------------------------------------===//
  14.  
  15. #ifndef POLLY_BLOCK_GENERATORS_H
  16. #define POLLY_BLOCK_GENERATORS_H
  17.  
  18. #include "polly/CodeGen/IRBuilder.h"
  19. #include "polly/Support/ScopHelper.h"
  20. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  21. #include "isl/isl-noexceptions.h"
  22.  
  23. namespace polly {
  24. using llvm::AllocaInst;
  25. using llvm::ArrayRef;
  26. using llvm::AssertingVH;
  27. using llvm::BasicBlock;
  28. using llvm::BinaryOperator;
  29. using llvm::CmpInst;
  30. using llvm::DataLayout;
  31. using llvm::DenseMap;
  32. using llvm::DominatorTree;
  33. using llvm::Function;
  34. using llvm::Instruction;
  35. using llvm::LoadInst;
  36. using llvm::Loop;
  37. using llvm::LoopInfo;
  38. using llvm::LoopToScevMapT;
  39. using llvm::MapVector;
  40. using llvm::PHINode;
  41. using llvm::ScalarEvolution;
  42. using llvm::SetVector;
  43. using llvm::SmallVector;
  44. using llvm::StoreInst;
  45. using llvm::StringRef;
  46. using llvm::Type;
  47. using llvm::UnaryInstruction;
  48. using llvm::Value;
  49.  
  50. class MemoryAccess;
  51. class ScopArrayInfo;
  52. class IslExprBuilder;
  53.  
  54. /// Generate a new basic block for a polyhedral statement.
  55. class BlockGenerator {
  56. public:
  57.   typedef llvm::SmallVector<ValueMapT, 8> VectorValueMapT;
  58.  
  59.   /// Map types to resolve scalar dependences.
  60.   ///
  61.   ///@{
  62.   using AllocaMapTy = DenseMap<const ScopArrayInfo *, AssertingVH<AllocaInst>>;
  63.  
  64.   /// Simple vector of instructions to store escape users.
  65.   using EscapeUserVectorTy = SmallVector<Instruction *, 4>;
  66.  
  67.   /// Map type to resolve escaping users for scalar instructions.
  68.   ///
  69.   /// @see The EscapeMap member.
  70.   using EscapeUsersAllocaMapTy =
  71.       MapVector<Instruction *,
  72.                 std::pair<AssertingVH<Value>, EscapeUserVectorTy>>;
  73.  
  74.   ///@}
  75.  
  76.   /// Create a generator for basic blocks.
  77.   ///
  78.   /// @param Builder     The LLVM-IR Builder used to generate the statement. The
  79.   ///                    code is generated at the location, the Builder points
  80.   ///                    to.
  81.   /// @param LI          The loop info for the current function
  82.   /// @param SE          The scalar evolution info for the current function
  83.   /// @param DT          The dominator tree of this function.
  84.   /// @param ScalarMap   Map from scalars to their demoted location.
  85.   /// @param EscapeMap   Map from scalars to their escape users and locations.
  86.   /// @param GlobalMap   A mapping from llvm::Values used in the original scop
  87.   ///                    region to a new set of llvm::Values. Each reference to
  88.   ///                    an original value appearing in this mapping is replaced
  89.   ///                    with the new value it is mapped to.
  90.   /// @param ExprBuilder An expression builder to generate new access functions.
  91.   /// @param StartBlock  The first basic block after the RTC.
  92.   BlockGenerator(PollyIRBuilder &Builder, LoopInfo &LI, ScalarEvolution &SE,
  93.                  DominatorTree &DT, AllocaMapTy &ScalarMap,
  94.                  EscapeUsersAllocaMapTy &EscapeMap, ValueMapT &GlobalMap,
  95.                  IslExprBuilder *ExprBuilder, BasicBlock *StartBlock);
  96.  
  97.   /// Copy the basic block.
  98.   ///
  99.   /// This copies the entire basic block and updates references to old values
  100.   /// with references to new values, as defined by GlobalMap.
  101.   ///
  102.   /// @param Stmt        The block statement to code generate.
  103.   /// @param LTS         A map from old loops to new induction variables as
  104.   ///                    SCEVs.
  105.   /// @param NewAccesses A map from memory access ids to new ast expressions,
  106.   ///                    which may contain new access expressions for certain
  107.   ///                    memory accesses.
  108.   void copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
  109.                 isl_id_to_ast_expr *NewAccesses);
  110.  
  111.   /// Remove a ScopArrayInfo's allocation from the ScalarMap.
  112.   ///
  113.   /// This function allows to remove values from the ScalarMap. This is useful
  114.   /// if the corresponding alloca instruction will be deleted (or moved into
  115.   /// another module), as without removing these values the underlying
  116.   /// AssertingVH will trigger due to us still keeping reference to this
  117.   /// scalar.
  118.   ///
  119.   /// @param Array The array for which the alloca was generated.
  120.   void freeScalarAlloc(ScopArrayInfo *Array) { ScalarMap.erase(Array); }
  121.  
  122.   /// Return the alloca for @p Access.
  123.   ///
  124.   /// If no alloca was mapped for @p Access a new one is created.
  125.   ///
  126.   /// @param Access    The memory access for which to generate the alloca.
  127.   ///
  128.   /// @returns The alloca for @p Access or a replacement value taken from
  129.   ///          GlobalMap.
  130.   Value *getOrCreateAlloca(const MemoryAccess &Access);
  131.  
  132.   /// Return the alloca for @p Array.
  133.   ///
  134.   /// If no alloca was mapped for @p Array a new one is created.
  135.   ///
  136.   /// @param Array The array for which to generate the alloca.
  137.   ///
  138.   /// @returns The alloca for @p Array or a replacement value taken from
  139.   ///          GlobalMap.
  140.   Value *getOrCreateAlloca(const ScopArrayInfo *Array);
  141.  
  142.   /// Finalize the code generation for the SCoP @p S.
  143.   ///
  144.   /// This will initialize and finalize the scalar variables we demoted during
  145.   /// the code generation.
  146.   ///
  147.   /// @see createScalarInitialization(Scop &)
  148.   /// @see createScalarFinalization(Region &)
  149.   void finalizeSCoP(Scop &S);
  150.  
  151.   /// An empty destructor
  152.   virtual ~BlockGenerator() {}
  153.  
  154.   BlockGenerator(const BlockGenerator &) = default;
  155.  
  156. protected:
  157.   PollyIRBuilder &Builder;
  158.   LoopInfo &LI;
  159.   ScalarEvolution &SE;
  160.   IslExprBuilder *ExprBuilder;
  161.  
  162.   /// The dominator tree of this function.
  163.   DominatorTree &DT;
  164.  
  165.   /// The entry block of the current function.
  166.   BasicBlock *EntryBB;
  167.  
  168.   /// Map to resolve scalar dependences for PHI operands and scalars.
  169.   ///
  170.   /// When translating code that contains scalar dependences as they result from
  171.   /// inter-block scalar dependences (including the use of data carrying PHI
  172.   /// nodes), we do not directly regenerate in-register SSA code, but instead
  173.   /// allocate some stack memory through which these scalar values are passed.
  174.   /// Only a later pass of -mem2reg will then (re)introduce in-register
  175.   /// computations.
  176.   ///
  177.   /// To keep track of the memory location(s) used to store the data computed by
  178.   /// a given SSA instruction, we use the map 'ScalarMap'. ScalarMap maps a
  179.   /// given ScopArrayInfo to the junk of stack allocated memory, that is
  180.   /// used for code generation.
  181.   ///
  182.   /// Up to two different ScopArrayInfo objects are associated with each
  183.   /// llvm::Value:
  184.   ///
  185.   /// MemoryType::Value objects are used for normal scalar dependences that go
  186.   /// from a scalar definition to its use. Such dependences are lowered by
  187.   /// directly writing the value an instruction computes into the corresponding
  188.   /// chunk of memory and reading it back from this chunk of memory right before
  189.   /// every use of this original scalar value. The memory allocations for
  190.   /// MemoryType::Value objects end with '.s2a'.
  191.   ///
  192.   /// MemoryType::PHI (and MemoryType::ExitPHI) objects are used to model PHI
  193.   /// nodes. For each PHI nodes we introduce, besides the Array of type
  194.   /// MemoryType::Value, a second chunk of memory into which we write at the end
  195.   /// of each basic block preceding the PHI instruction the value passed
  196.   /// through this basic block. At the place where the PHI node is executed, we
  197.   /// replace the PHI node with a load from the corresponding MemoryType::PHI
  198.   /// memory location. The memory allocations for MemoryType::PHI end with
  199.   /// '.phiops'.
  200.   ///
  201.   /// Example:
  202.   ///
  203.   ///                              Input C Code
  204.   ///                              ============
  205.   ///
  206.   ///                 S1:      x1 = ...
  207.   ///                          for (i=0...N) {
  208.   ///                 S2:           x2 = phi(x1, add)
  209.   ///                 S3:           add = x2 + 42;
  210.   ///                          }
  211.   ///                 S4:      print(x1)
  212.   ///                          print(x2)
  213.   ///                          print(add)
  214.   ///
  215.   ///
  216.   ///        Unmodified IR                         IR After expansion
  217.   ///        =============                         ==================
  218.   ///
  219.   /// S1:   x1 = ...                     S1:    x1 = ...
  220.   ///                                           x1.s2a = s1
  221.   ///                                           x2.phiops = s1
  222.   ///        |                                    |
  223.   ///        |   <--<--<--<--<                    |   <--<--<--<--<
  224.   ///        | /              \                   | /              \     .
  225.   ///        V V               \                  V V               \    .
  226.   /// S2:  x2 = phi (x1, add)   |        S2:    x2 = x2.phiops       |
  227.   ///                           |               x2.s2a = x2          |
  228.   ///                           |                                    |
  229.   /// S3:  add = x2 + 42        |        S3:    add = x2 + 42        |
  230.   ///                           |               add.s2a = add        |
  231.   ///                           |               x2.phiops = add      |
  232.   ///        | \               /                  | \               /
  233.   ///        |  \             /                   |  \             /
  234.   ///        |   >-->-->-->-->                    |   >-->-->-->-->
  235.   ///        V                                    V
  236.   ///
  237.   ///                                    S4:    x1 = x1.s2a
  238.   /// S4:  ... = x1                             ... = x1
  239.   ///                                           x2 = x2.s2a
  240.   ///      ... = x2                             ... = x2
  241.   ///                                           add = add.s2a
  242.   ///      ... = add                            ... = add
  243.   ///
  244.   ///      ScalarMap = { x1:Value -> x1.s2a, x2:Value -> x2.s2a,
  245.   ///                    add:Value -> add.s2a, x2:PHI -> x2.phiops }
  246.   ///
  247.   ///  ??? Why does a PHI-node require two memory chunks ???
  248.   ///
  249.   ///  One may wonder why a PHI node requires two memory chunks and not just
  250.   ///  all data is stored in a single location. The following example tries
  251.   ///  to store all data in .s2a and drops the .phiops location:
  252.   ///
  253.   ///      S1:    x1 = ...
  254.   ///             x1.s2a = s1
  255.   ///             x2.s2a = s1             // use .s2a instead of .phiops
  256.   ///               |
  257.   ///               |   <--<--<--<--<
  258.   ///               | /              \    .
  259.   ///               V V               \   .
  260.   ///      S2:    x2 = x2.s2a          |  // value is same as above, but read
  261.   ///                                  |  // from .s2a
  262.   ///                                  |
  263.   ///             x2.s2a = x2          |  // store into .s2a as normal
  264.   ///                                  |
  265.   ///      S3:    add = x2 + 42        |
  266.   ///             add.s2a = add        |
  267.   ///             x2.s2a = add         |  // use s2a instead of .phiops
  268.   ///               | \               /   // !!! This is wrong, as x2.s2a now
  269.   ///               |   >-->-->-->-->     // contains add instead of x2.
  270.   ///               V
  271.   ///
  272.   ///      S4:    x1 = x1.s2a
  273.   ///             ... = x1
  274.   ///             x2 = x2.s2a             // !!! We now read 'add' instead of
  275.   ///             ... = x2                // 'x2'
  276.   ///             add = add.s2a
  277.   ///             ... = add
  278.   ///
  279.   ///  As visible in the example, the SSA value of the PHI node may still be
  280.   ///  needed _after_ the basic block, which could conceptually branch to the
  281.   ///  PHI node, has been run and has overwritten the PHI's old value. Hence, a
  282.   ///  single memory location is not enough to code-generate a PHI node.
  283.   ///
  284.   /// Memory locations used for the special PHI node modeling.
  285.   AllocaMapTy &ScalarMap;
  286.  
  287.   /// Map from instructions to their escape users as well as the alloca.
  288.   EscapeUsersAllocaMapTy &EscapeMap;
  289.  
  290.   /// A map from llvm::Values referenced in the old code to a new set of
  291.   ///        llvm::Values, which is used to replace these old values during
  292.   ///        code generation.
  293.   ValueMapT &GlobalMap;
  294.  
  295.   /// The first basic block after the RTC.
  296.   BasicBlock *StartBlock;
  297.  
  298.   /// Split @p BB to create a new one we can use to clone @p BB in.
  299.   BasicBlock *splitBB(BasicBlock *BB);
  300.  
  301.   /// Copy the given basic block.
  302.   ///
  303.   /// @param Stmt      The statement to code generate.
  304.   /// @param BB        The basic block to code generate.
  305.   /// @param BBMap     A mapping from old values to their new values in this
  306.   /// block.
  307.   /// @param LTS         A map from old loops to new induction variables as
  308.   ///                    SCEVs.
  309.   /// @param NewAccesses A map from memory access ids to new ast expressions,
  310.   ///                    which may contain new access expressions for certain
  311.   ///                    memory accesses.
  312.   ///
  313.   /// @returns The copy of the basic block.
  314.   BasicBlock *copyBB(ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap,
  315.                      LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses);
  316.  
  317.   /// Copy the given basic block.
  318.   ///
  319.   /// @param Stmt      The statement to code generate.
  320.   /// @param BB        The basic block to code generate.
  321.   /// @param BBCopy    The new basic block to generate code in.
  322.   /// @param BBMap     A mapping from old values to their new values in this
  323.   /// block.
  324.   /// @param LTS         A map from old loops to new induction variables as
  325.   ///                    SCEVs.
  326.   /// @param NewAccesses A map from memory access ids to new ast expressions,
  327.   ///                    which may contain new access expressions for certain
  328.   ///                    memory accesses.
  329.   void copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *BBCopy,
  330.               ValueMapT &BBMap, LoopToScevMapT &LTS,
  331.               isl_id_to_ast_expr *NewAccesses);
  332.  
  333.   /// Generate reload of scalars demoted to memory and needed by @p Stmt.
  334.   ///
  335.   /// @param Stmt  The statement we generate code for.
  336.   /// @param LTS   A mapping from loops virtual canonical induction
  337.   ///              variable to their new values.
  338.   /// @param BBMap A mapping from old values to their new values in this block.
  339.   /// @param NewAccesses A map from memory access ids to new ast expressions.
  340.   void generateScalarLoads(ScopStmt &Stmt, LoopToScevMapT &LTS,
  341.                            ValueMapT &BBMap,
  342.                            __isl_keep isl_id_to_ast_expr *NewAccesses);
  343.  
  344.   /// When statement tracing is enabled, build the print instructions for
  345.   /// printing the current statement instance.
  346.   ///
  347.   /// The printed output looks like:
  348.   ///
  349.   ///     Stmt1(0)
  350.   ///
  351.   /// If printing of scalars is enabled, it also appends the value of each
  352.   /// scalar to the line:
  353.   ///
  354.   ///     Stmt1(0) %i=1 %sum=5
  355.   ///
  356.   /// @param Stmt  The statement we generate code for.
  357.   /// @param LTS   A mapping from loops virtual canonical induction
  358.   ///              variable to their new values.
  359.   /// @param BBMap A mapping from old values to their new values in this block.
  360.   void generateBeginStmtTrace(ScopStmt &Stmt, LoopToScevMapT &LTS,
  361.                               ValueMapT &BBMap);
  362.  
  363.   /// Generate instructions that compute whether one instance of @p Set is
  364.   /// executed.
  365.   ///
  366.   /// @param Stmt      The statement we generate code for.
  367.   /// @param Subdomain A set in the space of @p Stmt's domain. Elements not in
  368.   ///                  @p Stmt's domain are ignored.
  369.   ///
  370.   /// @return An expression of type i1, generated into the current builder
  371.   ///         position, that evaluates to 1 if the executed instance is part of
  372.   ///         @p Set.
  373.   Value *buildContainsCondition(ScopStmt &Stmt, const isl::set &Subdomain);
  374.  
  375.   /// Generate code that executes in a subset of @p Stmt's domain.
  376.   ///
  377.   /// @param Stmt        The statement we generate code for.
  378.   /// @param Subdomain   The condition for some code to be executed.
  379.   /// @param Subject     A name for the code that is executed
  380.   ///                    conditionally. Used to name new basic blocks and
  381.   ///                    instructions.
  382.   /// @param GenThenFunc Callback which generates the code to be executed
  383.   ///                    when the current executed instance is in @p Set. The
  384.   ///                    IRBuilder's position is moved to within the block that
  385.   ///                    executes conditionally for this callback.
  386.   void generateConditionalExecution(ScopStmt &Stmt, const isl::set &Subdomain,
  387.                                     StringRef Subject,
  388.                                     const std::function<void()> &GenThenFunc);
  389.  
  390.   /// Generate the scalar stores for the given statement.
  391.   ///
  392.   /// After the statement @p Stmt was copied all inner-SCoP scalar dependences
  393.   /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to
  394.   /// be demoted to memory.
  395.   ///
  396.   /// @param Stmt  The statement we generate code for.
  397.   /// @param LTS   A mapping from loops virtual canonical induction
  398.   ///              variable to their new values
  399.   ///              (for values recalculated in the new ScoP, but not
  400.   ///               within this basic block)
  401.   /// @param BBMap A mapping from old values to their new values in this block.
  402.   /// @param NewAccesses A map from memory access ids to new ast expressions.
  403.   virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT &LTS,
  404.                                     ValueMapT &BBMap,
  405.                                     __isl_keep isl_id_to_ast_expr *NewAccesses);
  406.  
  407.   /// Handle users of @p Array outside the SCoP.
  408.   ///
  409.   /// @param S         The current SCoP.
  410.   /// @param Inst      The ScopArrayInfo to handle.
  411.   void handleOutsideUsers(const Scop &S, ScopArrayInfo *Array);
  412.  
  413.   /// Find scalar statements that have outside users.
  414.   ///
  415.   /// We register these scalar values to later update subsequent scalar uses of
  416.   /// these values to either use the newly computed value from within the scop
  417.   /// (if the scop was executed) or the unchanged original code (if the run-time
  418.   /// check failed).
  419.   ///
  420.   /// @param S The scop for which to find the outside users.
  421.   void findOutsideUsers(Scop &S);
  422.  
  423.   /// Initialize the memory of demoted scalars.
  424.   ///
  425.   /// @param S The scop for which to generate the scalar initializers.
  426.   void createScalarInitialization(Scop &S);
  427.  
  428.   /// Create exit PHI node merges for PHI nodes with more than two edges
  429.   ///        from inside the scop.
  430.   ///
  431.   /// For scops which have a PHI node in the exit block that has more than two
  432.   /// incoming edges from inside the scop region, we require some special
  433.   /// handling to understand which of the possible values will be passed to the
  434.   /// PHI node from inside the optimized version of the scop. To do so ScopInfo
  435.   /// models the possible incoming values as write accesses of the ScopStmts.
  436.   ///
  437.   /// This function creates corresponding code to reload the computed outgoing
  438.   /// value from the stack slot it has been stored into and to pass it on to the
  439.   /// PHI node in the original exit block.
  440.   ///
  441.   /// @param S The scop for which to generate the exiting PHI nodes.
  442.   void createExitPHINodeMerges(Scop &S);
  443.  
  444.   /// Promote the values of demoted scalars after the SCoP.
  445.   ///
  446.   /// If a scalar value was used outside the SCoP we need to promote the value
  447.   /// stored in the memory cell allocated for that scalar and combine it with
  448.   /// the original value in the non-optimized SCoP.
  449.   void createScalarFinalization(Scop &S);
  450.  
  451.   /// Try to synthesize a new value
  452.   ///
  453.   /// Given an old value, we try to synthesize it in a new context from its
  454.   /// original SCEV expression. We start from the original SCEV expression,
  455.   /// then replace outdated parameter and loop references, and finally
  456.   /// expand it to code that computes this updated expression.
  457.   ///
  458.   /// @param Stmt      The statement to code generate
  459.   /// @param Old       The old Value
  460.   /// @param BBMap     A mapping from old values to their new values
  461.   ///                  (for values recalculated within this basic block)
  462.   /// @param LTS       A mapping from loops virtual canonical induction
  463.   ///                  variable to their new values
  464.   ///                  (for values recalculated in the new ScoP, but not
  465.   ///                   within this basic block)
  466.   /// @param L         The loop that surrounded the instruction that referenced
  467.   ///                  this value in the original code. This loop is used to
  468.   ///                  evaluate the scalar evolution at the right scope.
  469.   ///
  470.   /// @returns  o A newly synthesized value.
  471.   ///           o NULL, if synthesizing the value failed.
  472.   Value *trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
  473.                                LoopToScevMapT &LTS, Loop *L) const;
  474.  
  475.   /// Get the new version of a value.
  476.   ///
  477.   /// Given an old value, we first check if a new version of this value is
  478.   /// available in the BBMap or GlobalMap. In case it is not and the value can
  479.   /// be recomputed using SCEV, we do so. If we can not recompute a value
  480.   /// using SCEV, but we understand that the value is constant within the scop,
  481.   /// we return the old value.  If the value can still not be derived, this
  482.   /// function will assert.
  483.   ///
  484.   /// @param Stmt      The statement to code generate.
  485.   /// @param Old       The old Value.
  486.   /// @param BBMap     A mapping from old values to their new values
  487.   ///                  (for values recalculated within this basic block).
  488.   /// @param LTS       A mapping from loops virtual canonical induction
  489.   ///                  variable to their new values
  490.   ///                  (for values recalculated in the new ScoP, but not
  491.   ///                   within this basic block).
  492.   /// @param L         The loop that surrounded the instruction that referenced
  493.   ///                  this value in the original code. This loop is used to
  494.   ///                  evaluate the scalar evolution at the right scope.
  495.   ///
  496.   /// @returns  o The old value, if it is still valid.
  497.   ///           o The new value, if available.
  498.   ///           o NULL, if no value is found.
  499.   Value *getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
  500.                      LoopToScevMapT &LTS, Loop *L) const;
  501.  
  502.   void copyInstScalar(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap,
  503.                       LoopToScevMapT &LTS);
  504.  
  505.   /// Get the innermost loop that surrounds the statement @p Stmt.
  506.   Loop *getLoopForStmt(const ScopStmt &Stmt) const;
  507.  
  508.   /// Generate the operand address
  509.   /// @param NewAccesses A map from memory access ids to new ast expressions,
  510.   ///                    which may contain new access expressions for certain
  511.   ///                    memory accesses.
  512.   Value *generateLocationAccessed(ScopStmt &Stmt, MemAccInst Inst,
  513.                                   ValueMapT &BBMap, LoopToScevMapT &LTS,
  514.                                   isl_id_to_ast_expr *NewAccesses);
  515.  
  516.   /// Generate the operand address.
  517.   ///
  518.   /// @param Stmt         The statement to generate code for.
  519.   /// @param L            The innermost loop that surrounds the statement.
  520.   /// @param Pointer      If the access expression is not changed (ie. not found
  521.   ///                     in @p LTS), use this Pointer from the original code
  522.   ///                     instead.
  523.   /// @param BBMap        A mapping from old values to their new values.
  524.   /// @param LTS          A mapping from loops virtual canonical induction
  525.   ///                     variable to their new values.
  526.   /// @param NewAccesses  Ahead-of-time generated access expressions.
  527.   /// @param Id           Identifier of the MemoryAccess to generate.
  528.   /// @param ExpectedType The type the returned value should have.
  529.   ///
  530.   /// @return The generated address.
  531.   Value *generateLocationAccessed(ScopStmt &Stmt, Loop *L, Value *Pointer,
  532.                                   ValueMapT &BBMap, LoopToScevMapT &LTS,
  533.                                   isl_id_to_ast_expr *NewAccesses,
  534.                                   __isl_take isl_id *Id, Type *ExpectedType);
  535.  
  536.   /// Generate the pointer value that is accesses by @p Access.
  537.   ///
  538.   /// For write accesses, generate the target address. For read accesses,
  539.   /// generate the source address.
  540.   /// The access can be either an array access or a scalar access. In the first
  541.   /// case, the returned address will point to an element into that array. In
  542.   /// the scalar case, an alloca is used.
  543.   /// If a new AccessRelation is set for the MemoryAccess, the new relation will
  544.   /// be used.
  545.   ///
  546.   /// @param Access      The access to generate a pointer for.
  547.   /// @param L           The innermost loop that surrounds the statement.
  548.   /// @param LTS         A mapping from loops virtual canonical induction
  549.   ///                    variable to their new values.
  550.   /// @param BBMap       A mapping from old values to their new values.
  551.   /// @param NewAccesses A map from memory access ids to new ast expressions.
  552.   ///
  553.   /// @return The generated address.
  554.   Value *getImplicitAddress(MemoryAccess &Access, Loop *L, LoopToScevMapT &LTS,
  555.                             ValueMapT &BBMap,
  556.                             __isl_keep isl_id_to_ast_expr *NewAccesses);
  557.  
  558.   /// @param NewAccesses A map from memory access ids to new ast expressions,
  559.   ///                    which may contain new access expressions for certain
  560.   ///                    memory accesses.
  561.   Value *generateArrayLoad(ScopStmt &Stmt, LoadInst *load, ValueMapT &BBMap,
  562.                            LoopToScevMapT &LTS,
  563.                            isl_id_to_ast_expr *NewAccesses);
  564.  
  565.   /// @param NewAccesses A map from memory access ids to new ast expressions,
  566.   ///                    which may contain new access expressions for certain
  567.   ///                    memory accesses.
  568.   void generateArrayStore(ScopStmt &Stmt, StoreInst *store, ValueMapT &BBMap,
  569.                           LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses);
  570.  
  571.   /// Copy a single PHI instruction.
  572.   ///
  573.   /// The implementation in the BlockGenerator is trivial, however it allows
  574.   /// subclasses to handle PHIs different.
  575.   virtual void copyPHIInstruction(ScopStmt &, PHINode *, ValueMapT &,
  576.                                   LoopToScevMapT &) {}
  577.  
  578.   /// Copy a single Instruction.
  579.   ///
  580.   /// This copies a single Instruction and updates references to old values
  581.   /// with references to new values, as defined by GlobalMap and BBMap.
  582.   ///
  583.   /// @param Stmt        The statement to code generate.
  584.   /// @param Inst        The instruction to copy.
  585.   /// @param BBMap       A mapping from old values to their new values
  586.   ///                    (for values recalculated within this basic block).
  587.   /// @param GlobalMap   A mapping from old values to their new values
  588.   ///                    (for values recalculated in the new ScoP, but not
  589.   ///                    within this basic block).
  590.   /// @param LTS         A mapping from loops virtual canonical induction
  591.   ///                    variable to their new values
  592.   ///                    (for values recalculated in the new ScoP, but not
  593.   ///                     within this basic block).
  594.   /// @param NewAccesses A map from memory access ids to new ast expressions,
  595.   ///                    which may contain new access expressions for certain
  596.   ///                    memory accesses.
  597.   void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap,
  598.                        LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses);
  599.  
  600.   /// Helper to determine if @p Inst can be synthesized in @p Stmt.
  601.   ///
  602.   /// @returns false, iff @p Inst can be synthesized in @p Stmt.
  603.   bool canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst);
  604.  
  605.   /// Remove dead instructions generated for BB
  606.   ///
  607.   /// @param BB The basic block code for which code has been generated.
  608.   /// @param BBMap A local map from old to new instructions.
  609.   void removeDeadInstructions(BasicBlock *BB, ValueMapT &BBMap);
  610.  
  611.   /// Invalidate the scalar evolution expressions for a scop.
  612.   ///
  613.   /// This function invalidates the scalar evolution results for all
  614.   /// instructions that are part of a given scop, and the loops
  615.   /// surrounding the users of merge blocks. This is necessary to ensure that
  616.   /// later scops do not obtain scalar evolution expressions that reference
  617.   /// values that earlier dominated the later scop, but have been moved in the
  618.   /// conditional part of an earlier scop and consequently do not any more
  619.   /// dominate the later scop.
  620.   ///
  621.   /// @param S The scop to invalidate.
  622.   void invalidateScalarEvolution(Scop &S);
  623. };
  624.  
  625. /// Generate a new vector basic block for a polyhedral statement.
  626. ///
  627. /// The only public function exposed is generate().
  628. class VectorBlockGenerator final : BlockGenerator {
  629. public:
  630.   /// Generate a new vector basic block for a ScoPStmt.
  631.   ///
  632.   /// This code generation is similar to the normal, scalar code generation,
  633.   /// except that each instruction is code generated for several vector lanes
  634.   /// at a time. If possible instructions are issued as actual vector
  635.   /// instructions, but e.g. for address calculation instructions we currently
  636.   /// generate scalar instructions for each vector lane.
  637.   ///
  638.   /// @param BlockGen    A block generator object used as parent.
  639.   /// @param Stmt        The statement to code generate.
  640.   /// @param VLTS        A mapping from loops virtual canonical induction
  641.   ///                    variable to their new values
  642.   ///                    (for values recalculated in the new ScoP, but not
  643.   ///                     within this basic block), one for each lane.
  644.   /// @param Schedule    A map from the statement to a schedule where the
  645.   ///                    innermost dimension is the dimension of the innermost
  646.   ///                    loop containing the statement.
  647.   /// @param NewAccesses A map from memory access ids to new ast expressions,
  648.   ///                    which may contain new access expressions for certain
  649.   ///                    memory accesses.
  650.   static void generate(BlockGenerator &BlockGen, ScopStmt &Stmt,
  651.                        std::vector<LoopToScevMapT> &VLTS,
  652.                        __isl_keep isl_map *Schedule,
  653.                        __isl_keep isl_id_to_ast_expr *NewAccesses) {
  654.     VectorBlockGenerator Generator(BlockGen, VLTS, Schedule);
  655.     Generator.copyStmt(Stmt, NewAccesses);
  656.   }
  657.  
  658. private:
  659.   // This is a vector of loop->scev maps.  The first map is used for the first
  660.   // vector lane, ...
  661.   // Each map, contains information about Instructions in the old ScoP, which
  662.   // are recalculated in the new SCoP. When copying the basic block, we replace
  663.   // all references to the old instructions with their recalculated values.
  664.   //
  665.   // For example, when the code generator produces this AST:
  666.   //
  667.   //   for (int c1 = 0; c1 <= 1023; c1 += 1)
  668.   //     for (int c2 = 0; c2 <= 1023; c2 += VF)
  669.   //       for (int lane = 0; lane <= VF; lane += 1)
  670.   //         Stmt(c2 + lane + 3, c1);
  671.   //
  672.   // VLTS[lane] contains a map:
  673.   //   "outer loop in the old loop nest" -> SCEV("c2 + lane + 3"),
  674.   //   "inner loop in the old loop nest" -> SCEV("c1").
  675.   std::vector<LoopToScevMapT> &VLTS;
  676.  
  677.   // A map from the statement to a schedule where the innermost dimension is the
  678.   // dimension of the innermost loop containing the statement.
  679.   isl_map *Schedule;
  680.  
  681.   VectorBlockGenerator(BlockGenerator &BlockGen,
  682.                        std::vector<LoopToScevMapT> &VLTS,
  683.                        __isl_keep isl_map *Schedule);
  684.  
  685.   int getVectorWidth();
  686.  
  687.   Value *getVectorValue(ScopStmt &Stmt, Value *Old, ValueMapT &VectorMap,
  688.                         VectorValueMapT &ScalarMaps, Loop *L);
  689.  
  690.   /// Load a vector from a set of adjacent scalars
  691.   ///
  692.   /// In case a set of scalars is known to be next to each other in memory,
  693.   /// create a vector load that loads those scalars
  694.   ///
  695.   /// %vector_ptr= bitcast double* %p to <4 x double>*
  696.   /// %vec_full = load <4 x double>* %vector_ptr
  697.   ///
  698.   /// @param Stmt           The statement to code generate.
  699.   /// @param NegativeStride This is used to indicate a -1 stride. In such
  700.   ///                       a case we load the end of a base address and
  701.   ///                       shuffle the accesses in reverse order into the
  702.   ///                       vector. By default we would do only positive
  703.   ///                       strides.
  704.   ///
  705.   /// @param NewAccesses    A map from memory access ids to new ast
  706.   ///                       expressions, which may contain new access
  707.   ///                       expressions for certain memory accesses.
  708.   Value *generateStrideOneLoad(ScopStmt &Stmt, LoadInst *Load,
  709.                                VectorValueMapT &ScalarMaps,
  710.                                __isl_keep isl_id_to_ast_expr *NewAccesses,
  711.                                bool NegativeStride);
  712.  
  713.   /// Load a vector initialized from a single scalar in memory
  714.   ///
  715.   /// In case all elements of a vector are initialized to the same
  716.   /// scalar value, this value is loaded and shuffled into all elements
  717.   /// of the vector.
  718.   ///
  719.   /// %splat_one = load <1 x double>* %p
  720.   /// %splat = shufflevector <1 x double> %splat_one, <1 x
  721.   ///       double> %splat_one, <4 x i32> zeroinitializer
  722.   ///
  723.   /// @param NewAccesses A map from memory access ids to new ast expressions,
  724.   ///                    which may contain new access expressions for certain
  725.   ///                    memory accesses.
  726.   Value *generateStrideZeroLoad(ScopStmt &Stmt, LoadInst *Load,
  727.                                 ValueMapT &BBMap,
  728.                                 __isl_keep isl_id_to_ast_expr *NewAccesses);
  729.  
  730.   /// Load a vector from scalars distributed in memory
  731.   ///
  732.   /// In case some scalars a distributed randomly in memory. Create a vector
  733.   /// by loading each scalar and by inserting one after the other into the
  734.   /// vector.
  735.   ///
  736.   /// %scalar_1= load double* %p_1
  737.   /// %vec_1 = insertelement <2 x double> undef, double %scalar_1, i32 0
  738.   /// %scalar 2 = load double* %p_2
  739.   /// %vec_2 = insertelement <2 x double> %vec_1, double %scalar_1, i32 1
  740.   ///
  741.   /// @param NewAccesses A map from memory access ids to new ast expressions,
  742.   ///                    which may contain new access expressions for certain
  743.   ///                    memory accesses.
  744.   Value *generateUnknownStrideLoad(ScopStmt &Stmt, LoadInst *Load,
  745.                                    VectorValueMapT &ScalarMaps,
  746.                                    __isl_keep isl_id_to_ast_expr *NewAccesses);
  747.  
  748.   /// @param NewAccesses A map from memory access ids to new ast expressions,
  749.   ///                    which may contain new access expressions for certain
  750.   ///                    memory accesses.
  751.   void generateLoad(ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap,
  752.                     VectorValueMapT &ScalarMaps,
  753.                     __isl_keep isl_id_to_ast_expr *NewAccesses);
  754.  
  755.   void copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst,
  756.                      ValueMapT &VectorMap, VectorValueMapT &ScalarMaps);
  757.  
  758.   void copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst,
  759.                       ValueMapT &VectorMap, VectorValueMapT &ScalarMaps);
  760.  
  761.   /// @param NewAccesses A map from memory access ids to new ast expressions,
  762.   ///                    which may contain new access expressions for certain
  763.   ///                    memory accesses.
  764.   void copyStore(ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap,
  765.                  VectorValueMapT &ScalarMaps,
  766.                  __isl_keep isl_id_to_ast_expr *NewAccesses);
  767.  
  768.   /// @param NewAccesses A map from memory access ids to new ast expressions,
  769.   ///                    which may contain new access expressions for certain
  770.   ///                    memory accesses.
  771.   void copyInstScalarized(ScopStmt &Stmt, Instruction *Inst,
  772.                           ValueMapT &VectorMap, VectorValueMapT &ScalarMaps,
  773.                           __isl_keep isl_id_to_ast_expr *NewAccesses);
  774.  
  775.   bool extractScalarValues(const Instruction *Inst, ValueMapT &VectorMap,
  776.                            VectorValueMapT &ScalarMaps);
  777.  
  778.   bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap);
  779.  
  780.   /// Generate vector loads for scalars.
  781.   ///
  782.   /// @param Stmt           The scop statement for which to generate the loads.
  783.   /// @param VectorBlockMap A map that will be updated to relate the original
  784.   ///                       values with the newly generated vector loads.
  785.   void generateScalarVectorLoads(ScopStmt &Stmt, ValueMapT &VectorBlockMap);
  786.  
  787.   /// Verify absence of scalar stores.
  788.   ///
  789.   /// @param Stmt The scop statement to check for scalar stores.
  790.   void verifyNoScalarStores(ScopStmt &Stmt);
  791.  
  792.   /// @param NewAccesses A map from memory access ids to new ast expressions,
  793.   ///                    which may contain new access expressions for certain
  794.   ///                    memory accesses.
  795.   void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
  796.                        VectorValueMapT &ScalarMaps,
  797.                        __isl_keep isl_id_to_ast_expr *NewAccesses);
  798.  
  799.   /// @param NewAccesses A map from memory access ids to new ast expressions,
  800.   ///                    which may contain new access expressions for certain
  801.   ///                    memory accesses.
  802.   void copyStmt(ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses);
  803. };
  804.  
  805. /// Generator for new versions of polyhedral region statements.
  806. class RegionGenerator final : BlockGenerator {
  807. public:
  808.   /// Create a generator for regions.
  809.   ///
  810.   /// @param BlockGen A generator for basic blocks.
  811.   RegionGenerator(BlockGenerator &BlockGen) : BlockGenerator(BlockGen) {}
  812.  
  813.   virtual ~RegionGenerator() {}
  814.  
  815.   /// Copy the region statement @p Stmt.
  816.   ///
  817.   /// This copies the entire region represented by @p Stmt and updates
  818.   /// references to old values with references to new values, as defined by
  819.   /// GlobalMap.
  820.   ///
  821.   /// @param Stmt      The statement to code generate.
  822.   /// @param LTS       A map from old loops to new induction variables as SCEVs.
  823.   void copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
  824.                 __isl_keep isl_id_to_ast_expr *IdToAstExp);
  825.  
  826. private:
  827.   /// A map from old to the first new block in the region, that was created to
  828.   /// model the old basic block.
  829.   DenseMap<BasicBlock *, BasicBlock *> StartBlockMap;
  830.  
  831.   /// A map from old to the last new block in the region, that was created to
  832.   /// model the old basic block.
  833.   DenseMap<BasicBlock *, BasicBlock *> EndBlockMap;
  834.  
  835.   /// The "BBMaps" for the whole region (one for each block). In case a basic
  836.   /// block is code generated to multiple basic blocks (e.g., for partial
  837.   /// writes), the StartBasic is used as index for the RegionMap.
  838.   DenseMap<BasicBlock *, ValueMapT> RegionMaps;
  839.  
  840.   /// Mapping to remember PHI nodes that still need incoming values.
  841.   using PHINodePairTy = std::pair<PHINode *, PHINode *>;
  842.   DenseMap<BasicBlock *, SmallVector<PHINodePairTy, 4>> IncompletePHINodeMap;
  843.  
  844.   /// Repair the dominance tree after we created a copy block for @p BB.
  845.   ///
  846.   /// @returns The immediate dominator in the DT for @p BBCopy if in the region.
  847.   BasicBlock *repairDominance(BasicBlock *BB, BasicBlock *BBCopy);
  848.  
  849.   /// Add the new operand from the copy of @p IncomingBB to @p PHICopy.
  850.   ///
  851.   /// PHI nodes, which may have (multiple) edges that enter from outside the
  852.   /// non-affine subregion and even from outside the scop, are code generated as
  853.   /// follows:
  854.   ///
  855.   /// # Original
  856.   ///
  857.   ///   Region: %A-> %exit
  858.   ///   NonAffine Stmt: %nonaffB -> %D (includes %nonaffB, %nonaffC)
  859.   ///
  860.   ///     pre:
  861.   ///       %val = add i64 1, 1
  862.   ///
  863.   ///     A:
  864.   ///      br label %nonaff
  865.   ///
  866.   ///     nonaffB:
  867.   ///       %phi = phi i64 [%val, %A], [%valC, %nonAffC], [%valD, %D]
  868.   ///       %cmp = <nonaff>
  869.   ///       br i1 %cmp, label %C, label %nonaffC
  870.   ///
  871.   ///     nonaffC:
  872.   ///       %valC = add i64 1, 1
  873.   ///       br i1 undef, label %D, label %nonaffB
  874.   ///
  875.   ///     D:
  876.   ///       %valD = ...
  877.   ///       %exit_cond = <loopexit>
  878.   ///       br i1 %exit_cond, label %nonaffB, label %exit
  879.   ///
  880.   ///     exit:
  881.   ///       ...
  882.   ///
  883.   ///  - %start and %C enter from outside the non-affine region.
  884.   ///  - %nonaffC enters from within the non-affine region.
  885.   ///
  886.   ///  # New
  887.   ///
  888.   ///    polly.A:
  889.   ///       store i64 %val, i64* %phi.phiops
  890.   ///       br label %polly.nonaffA.entry
  891.   ///
  892.   ///    polly.nonaffB.entry:
  893.   ///       %phi.phiops.reload = load i64, i64* %phi.phiops
  894.   ///       br label %nonaffB
  895.   ///
  896.   ///    polly.nonaffB:
  897.   ///       %polly.phi = [%phi.phiops.reload, %nonaffB.entry],
  898.   ///                    [%p.valC, %polly.nonaffC]
  899.   ///
  900.   ///    polly.nonaffC:
  901.   ///       %p.valC = add i64 1, 1
  902.   ///       br i1 undef, label %polly.D, label %polly.nonaffB
  903.   ///
  904.   ///    polly.D:
  905.   ///        %p.valD = ...
  906.   ///        store i64 %p.valD, i64* %phi.phiops
  907.   ///        %p.exit_cond = <loopexit>
  908.   ///        br i1 %p.exit_cond, label %polly.nonaffB, label %exit
  909.   ///
  910.   /// Values that enter the PHI from outside the non-affine region are stored
  911.   /// into the stack slot %phi.phiops by statements %polly.A and %polly.D and
  912.   /// reloaded in %polly.nonaffB.entry, a basic block generated before the
  913.   /// actual non-affine region.
  914.   ///
  915.   /// When generating the PHI node of the non-affine region in %polly.nonaffB,
  916.   /// incoming edges from outside the region are combined into a single branch
  917.   /// from %polly.nonaffB.entry which has as incoming value the value reloaded
  918.   /// from the %phi.phiops stack slot. Incoming edges from within the region
  919.   /// refer to the copied instructions (%p.valC) and basic blocks
  920.   /// (%polly.nonaffC) of the non-affine region.
  921.   ///
  922.   /// @param Stmt       The statement to code generate.
  923.   /// @param PHI        The original PHI we copy.
  924.   /// @param PHICopy    The copy of @p PHI.
  925.   /// @param IncomingBB An incoming block of @p PHI.
  926.   /// @param LTS        A map from old loops to new induction variables as
  927.   /// SCEVs.
  928.   void addOperandToPHI(ScopStmt &Stmt, PHINode *PHI, PHINode *PHICopy,
  929.                        BasicBlock *IncomingBB, LoopToScevMapT &LTS);
  930.  
  931.   /// Create a PHI that combines the incoming values from all incoming blocks
  932.   /// that are in the subregion.
  933.   ///
  934.   /// PHIs in the subregion's exit block can have incoming edges from within and
  935.   /// outside the subregion. This function combines the incoming values from
  936.   /// within the subregion to appear as if there is only one incoming edge from
  937.   /// the subregion (an additional exit block is created by RegionGenerator).
  938.   /// This is to avoid that a value is written to the .phiops location without
  939.   /// leaving the subregion because the exiting block as an edge back into the
  940.   /// subregion.
  941.   ///
  942.   /// @param MA    The WRITE of MemoryKind::PHI/MemoryKind::ExitPHI for a PHI in
  943.   ///              the subregion's exit block.
  944.   /// @param LTS   Virtual induction variable mapping.
  945.   /// @param BBMap A mapping from old values to their new values in this block.
  946.   /// @param L     Loop surrounding this region statement.
  947.   ///
  948.   /// @returns The constructed PHI node.
  949.   PHINode *buildExitPHI(MemoryAccess *MA, LoopToScevMapT &LTS, ValueMapT &BBMap,
  950.                         Loop *L);
  951.  
  952.   /// @param Return the new value of a scalar write, creating a PHINode if
  953.   ///        necessary.
  954.   ///
  955.   /// @param MA    A scalar WRITE MemoryAccess.
  956.   /// @param LTS   Virtual induction variable mapping.
  957.   /// @param BBMap A mapping from old values to their new values in this block.
  958.   ///
  959.   /// @returns The effective value of @p MA's written value when leaving the
  960.   ///          subregion.
  961.   /// @see buildExitPHI
  962.   Value *getExitScalar(MemoryAccess *MA, LoopToScevMapT &LTS, ValueMapT &BBMap);
  963.  
  964.   /// Generate the scalar stores for the given statement.
  965.   ///
  966.   /// After the statement @p Stmt was copied all inner-SCoP scalar dependences
  967.   /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to
  968.   /// be demoted to memory.
  969.   ///
  970.   /// @param Stmt  The statement we generate code for.
  971.   /// @param LTS   A mapping from loops virtual canonical induction variable to
  972.   ///              their new values (for values recalculated in the new ScoP,
  973.   ///              but not within this basic block)
  974.   /// @param BBMap A mapping from old values to their new values in this block.
  975.   /// @param LTS   A mapping from loops virtual canonical induction variable to
  976.   /// their new values.
  977.   void
  978.   generateScalarStores(ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMAp,
  979.                        __isl_keep isl_id_to_ast_expr *NewAccesses) override;
  980.  
  981.   /// Copy a single PHI instruction.
  982.   ///
  983.   /// This copies a single PHI instruction and updates references to old values
  984.   /// with references to new values, as defined by GlobalMap and BBMap.
  985.   ///
  986.   /// @param Stmt      The statement to code generate.
  987.   /// @param PHI       The PHI instruction to copy.
  988.   /// @param BBMap     A mapping from old values to their new values
  989.   ///                  (for values recalculated within this basic block).
  990.   /// @param LTS       A map from old loops to new induction variables as SCEVs.
  991.   void copyPHIInstruction(ScopStmt &Stmt, PHINode *Inst, ValueMapT &BBMap,
  992.                           LoopToScevMapT &LTS) override;
  993. };
  994. } // namespace polly
  995. #endif
  996.