Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
//===-BlockGenerators.h - Helper to generate code for statements-*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the BlockGenerator and VectorBlockGenerator classes, which
10
// generate sequential code and vectorized code for a polyhedral statement,
11
// respectively.
12
//
13
//===----------------------------------------------------------------------===//
14
 
15
#ifndef POLLY_BLOCK_GENERATORS_H
16
#define POLLY_BLOCK_GENERATORS_H
17
 
18
#include "polly/CodeGen/IRBuilder.h"
19
#include "polly/Support/ScopHelper.h"
20
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
21
#include "isl/isl-noexceptions.h"
22
 
23
namespace polly {
24
using llvm::AllocaInst;
25
using llvm::ArrayRef;
26
using llvm::AssertingVH;
27
using llvm::BasicBlock;
28
using llvm::BinaryOperator;
29
using llvm::CmpInst;
30
using llvm::DataLayout;
31
using llvm::DenseMap;
32
using llvm::DominatorTree;
33
using llvm::Function;
34
using llvm::Instruction;
35
using llvm::LoadInst;
36
using llvm::Loop;
37
using llvm::LoopInfo;
38
using llvm::LoopToScevMapT;
39
using llvm::MapVector;
40
using llvm::PHINode;
41
using llvm::ScalarEvolution;
42
using llvm::SetVector;
43
using llvm::SmallVector;
44
using llvm::StoreInst;
45
using llvm::StringRef;
46
using llvm::Type;
47
using llvm::UnaryInstruction;
48
using llvm::Value;
49
 
50
class MemoryAccess;
51
class ScopArrayInfo;
52
class IslExprBuilder;
53
 
54
/// Generate a new basic block for a polyhedral statement.
55
class BlockGenerator {
56
public:
57
  typedef llvm::SmallVector<ValueMapT, 8> VectorValueMapT;
58
 
59
  /// Map types to resolve scalar dependences.
60
  ///
61
  ///@{
62
  using AllocaMapTy = DenseMap<const ScopArrayInfo *, AssertingVH<AllocaInst>>;
63
 
64
  /// Simple vector of instructions to store escape users.
65
  using EscapeUserVectorTy = SmallVector<Instruction *, 4>;
66
 
67
  /// Map type to resolve escaping users for scalar instructions.
68
  ///
69
  /// @see The EscapeMap member.
70
  using EscapeUsersAllocaMapTy =
71
      MapVector<Instruction *,
72
                std::pair<AssertingVH<Value>, EscapeUserVectorTy>>;
73
 
74
  ///@}
75
 
76
  /// Create a generator for basic blocks.
77
  ///
78
  /// @param Builder     The LLVM-IR Builder used to generate the statement. The
79
  ///                    code is generated at the location, the Builder points
80
  ///                    to.
81
  /// @param LI          The loop info for the current function
82
  /// @param SE          The scalar evolution info for the current function
83
  /// @param DT          The dominator tree of this function.
84
  /// @param ScalarMap   Map from scalars to their demoted location.
85
  /// @param EscapeMap   Map from scalars to their escape users and locations.
86
  /// @param GlobalMap   A mapping from llvm::Values used in the original scop
87
  ///                    region to a new set of llvm::Values. Each reference to
88
  ///                    an original value appearing in this mapping is replaced
89
  ///                    with the new value it is mapped to.
90
  /// @param ExprBuilder An expression builder to generate new access functions.
91
  /// @param StartBlock  The first basic block after the RTC.
92
  BlockGenerator(PollyIRBuilder &Builder, LoopInfo &LI, ScalarEvolution &SE,
93
                 DominatorTree &DT, AllocaMapTy &ScalarMap,
94
                 EscapeUsersAllocaMapTy &EscapeMap, ValueMapT &GlobalMap,
95
                 IslExprBuilder *ExprBuilder, BasicBlock *StartBlock);
96
 
97
  /// Copy the basic block.
98
  ///
99
  /// This copies the entire basic block and updates references to old values
100
  /// with references to new values, as defined by GlobalMap.
101
  ///
102
  /// @param Stmt        The block statement to code generate.
103
  /// @param LTS         A map from old loops to new induction variables as
104
  ///                    SCEVs.
105
  /// @param NewAccesses A map from memory access ids to new ast expressions,
106
  ///                    which may contain new access expressions for certain
107
  ///                    memory accesses.
108
  void copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
109
                isl_id_to_ast_expr *NewAccesses);
110
 
111
  /// Remove a ScopArrayInfo's allocation from the ScalarMap.
112
  ///
113
  /// This function allows to remove values from the ScalarMap. This is useful
114
  /// if the corresponding alloca instruction will be deleted (or moved into
115
  /// another module), as without removing these values the underlying
116
  /// AssertingVH will trigger due to us still keeping reference to this
117
  /// scalar.
118
  ///
119
  /// @param Array The array for which the alloca was generated.
120
  void freeScalarAlloc(ScopArrayInfo *Array) { ScalarMap.erase(Array); }
121
 
122
  /// Return the alloca for @p Access.
123
  ///
124
  /// If no alloca was mapped for @p Access a new one is created.
125
  ///
126
  /// @param Access    The memory access for which to generate the alloca.
127
  ///
128
  /// @returns The alloca for @p Access or a replacement value taken from
129
  ///          GlobalMap.
130
  Value *getOrCreateAlloca(const MemoryAccess &Access);
131
 
132
  /// Return the alloca for @p Array.
133
  ///
134
  /// If no alloca was mapped for @p Array a new one is created.
135
  ///
136
  /// @param Array The array for which to generate the alloca.
137
  ///
138
  /// @returns The alloca for @p Array or a replacement value taken from
139
  ///          GlobalMap.
140
  Value *getOrCreateAlloca(const ScopArrayInfo *Array);
141
 
142
  /// Finalize the code generation for the SCoP @p S.
143
  ///
144
  /// This will initialize and finalize the scalar variables we demoted during
145
  /// the code generation.
146
  ///
147
  /// @see createScalarInitialization(Scop &)
148
  /// @see createScalarFinalization(Region &)
149
  void finalizeSCoP(Scop &S);
150
 
151
  /// An empty destructor
152
  virtual ~BlockGenerator() {}
153
 
154
  BlockGenerator(const BlockGenerator &) = default;
155
 
156
protected:
157
  PollyIRBuilder &Builder;
158
  LoopInfo &LI;
159
  ScalarEvolution &SE;
160
  IslExprBuilder *ExprBuilder;
161
 
162
  /// The dominator tree of this function.
163
  DominatorTree &DT;
164
 
165
  /// The entry block of the current function.
166
  BasicBlock *EntryBB;
167
 
168
  /// Map to resolve scalar dependences for PHI operands and scalars.
169
  ///
170
  /// When translating code that contains scalar dependences as they result from
171
  /// inter-block scalar dependences (including the use of data carrying PHI
172
  /// nodes), we do not directly regenerate in-register SSA code, but instead
173
  /// allocate some stack memory through which these scalar values are passed.
174
  /// Only a later pass of -mem2reg will then (re)introduce in-register
175
  /// computations.
176
  ///
177
  /// To keep track of the memory location(s) used to store the data computed by
178
  /// a given SSA instruction, we use the map 'ScalarMap'. ScalarMap maps a
179
  /// given ScopArrayInfo to the junk of stack allocated memory, that is
180
  /// used for code generation.
181
  ///
182
  /// Up to two different ScopArrayInfo objects are associated with each
183
  /// llvm::Value:
184
  ///
185
  /// MemoryType::Value objects are used for normal scalar dependences that go
186
  /// from a scalar definition to its use. Such dependences are lowered by
187
  /// directly writing the value an instruction computes into the corresponding
188
  /// chunk of memory and reading it back from this chunk of memory right before
189
  /// every use of this original scalar value. The memory allocations for
190
  /// MemoryType::Value objects end with '.s2a'.
191
  ///
192
  /// MemoryType::PHI (and MemoryType::ExitPHI) objects are used to model PHI
193
  /// nodes. For each PHI nodes we introduce, besides the Array of type
194
  /// MemoryType::Value, a second chunk of memory into which we write at the end
195
  /// of each basic block preceding the PHI instruction the value passed
196
  /// through this basic block. At the place where the PHI node is executed, we
197
  /// replace the PHI node with a load from the corresponding MemoryType::PHI
198
  /// memory location. The memory allocations for MemoryType::PHI end with
199
  /// '.phiops'.
200
  ///
201
  /// Example:
202
  ///
203
  ///                              Input C Code
204
  ///                              ============
205
  ///
206
  ///                 S1:      x1 = ...
207
  ///                          for (i=0...N) {
208
  ///                 S2:           x2 = phi(x1, add)
209
  ///                 S3:           add = x2 + 42;
210
  ///                          }
211
  ///                 S4:      print(x1)
212
  ///                          print(x2)
213
  ///                          print(add)
214
  ///
215
  ///
216
  ///        Unmodified IR                         IR After expansion
217
  ///        =============                         ==================
218
  ///
219
  /// S1:   x1 = ...                     S1:    x1 = ...
220
  ///                                           x1.s2a = s1
221
  ///                                           x2.phiops = s1
222
  ///        |                                    |
223
  ///        |   <--<--<--<--<                    |   <--<--<--<--<
224
  ///        | /              \                   | /              \     .
225
  ///        V V               \                  V V               \    .
226
  /// S2:  x2 = phi (x1, add)   |        S2:    x2 = x2.phiops       |
227
  ///                           |               x2.s2a = x2          |
228
  ///                           |                                    |
229
  /// S3:  add = x2 + 42        |        S3:    add = x2 + 42        |
230
  ///                           |               add.s2a = add        |
231
  ///                           |               x2.phiops = add      |
232
  ///        | \               /                  | \               /
233
  ///        |  \             /                   |  \             /
234
  ///        |   >-->-->-->-->                    |   >-->-->-->-->
235
  ///        V                                    V
236
  ///
237
  ///                                    S4:    x1 = x1.s2a
238
  /// S4:  ... = x1                             ... = x1
239
  ///                                           x2 = x2.s2a
240
  ///      ... = x2                             ... = x2
241
  ///                                           add = add.s2a
242
  ///      ... = add                            ... = add
243
  ///
244
  ///      ScalarMap = { x1:Value -> x1.s2a, x2:Value -> x2.s2a,
245
  ///                    add:Value -> add.s2a, x2:PHI -> x2.phiops }
246
  ///
247
  ///  ??? Why does a PHI-node require two memory chunks ???
248
  ///
249
  ///  One may wonder why a PHI node requires two memory chunks and not just
250
  ///  all data is stored in a single location. The following example tries
251
  ///  to store all data in .s2a and drops the .phiops location:
252
  ///
253
  ///      S1:    x1 = ...
254
  ///             x1.s2a = s1
255
  ///             x2.s2a = s1             // use .s2a instead of .phiops
256
  ///               |
257
  ///               |   <--<--<--<--<
258
  ///               | /              \    .
259
  ///               V V               \   .
260
  ///      S2:    x2 = x2.s2a          |  // value is same as above, but read
261
  ///                                  |  // from .s2a
262
  ///                                  |
263
  ///             x2.s2a = x2          |  // store into .s2a as normal
264
  ///                                  |
265
  ///      S3:    add = x2 + 42        |
266
  ///             add.s2a = add        |
267
  ///             x2.s2a = add         |  // use s2a instead of .phiops
268
  ///               | \               /   // !!! This is wrong, as x2.s2a now
269
  ///               |   >-->-->-->-->     // contains add instead of x2.
270
  ///               V
271
  ///
272
  ///      S4:    x1 = x1.s2a
273
  ///             ... = x1
274
  ///             x2 = x2.s2a             // !!! We now read 'add' instead of
275
  ///             ... = x2                // 'x2'
276
  ///             add = add.s2a
277
  ///             ... = add
278
  ///
279
  ///  As visible in the example, the SSA value of the PHI node may still be
280
  ///  needed _after_ the basic block, which could conceptually branch to the
281
  ///  PHI node, has been run and has overwritten the PHI's old value. Hence, a
282
  ///  single memory location is not enough to code-generate a PHI node.
283
  ///
284
  /// Memory locations used for the special PHI node modeling.
285
  AllocaMapTy &ScalarMap;
286
 
287
  /// Map from instructions to their escape users as well as the alloca.
288
  EscapeUsersAllocaMapTy &EscapeMap;
289
 
290
  /// A map from llvm::Values referenced in the old code to a new set of
291
  ///        llvm::Values, which is used to replace these old values during
292
  ///        code generation.
293
  ValueMapT &GlobalMap;
294
 
295
  /// The first basic block after the RTC.
296
  BasicBlock *StartBlock;
297
 
298
  /// Split @p BB to create a new one we can use to clone @p BB in.
299
  BasicBlock *splitBB(BasicBlock *BB);
300
 
301
  /// Copy the given basic block.
302
  ///
303
  /// @param Stmt      The statement to code generate.
304
  /// @param BB        The basic block to code generate.
305
  /// @param BBMap     A mapping from old values to their new values in this
306
  /// block.
307
  /// @param LTS         A map from old loops to new induction variables as
308
  ///                    SCEVs.
309
  /// @param NewAccesses A map from memory access ids to new ast expressions,
310
  ///                    which may contain new access expressions for certain
311
  ///                    memory accesses.
312
  ///
313
  /// @returns The copy of the basic block.
314
  BasicBlock *copyBB(ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap,
315
                     LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses);
316
 
317
  /// Copy the given basic block.
318
  ///
319
  /// @param Stmt      The statement to code generate.
320
  /// @param BB        The basic block to code generate.
321
  /// @param BBCopy    The new basic block to generate code in.
322
  /// @param BBMap     A mapping from old values to their new values in this
323
  /// block.
324
  /// @param LTS         A map from old loops to new induction variables as
325
  ///                    SCEVs.
326
  /// @param NewAccesses A map from memory access ids to new ast expressions,
327
  ///                    which may contain new access expressions for certain
328
  ///                    memory accesses.
329
  void copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *BBCopy,
330
              ValueMapT &BBMap, LoopToScevMapT &LTS,
331
              isl_id_to_ast_expr *NewAccesses);
332
 
333
  /// Generate reload of scalars demoted to memory and needed by @p Stmt.
334
  ///
335
  /// @param Stmt  The statement we generate code for.
336
  /// @param LTS   A mapping from loops virtual canonical induction
337
  ///              variable to their new values.
338
  /// @param BBMap A mapping from old values to their new values in this block.
339
  /// @param NewAccesses A map from memory access ids to new ast expressions.
340
  void generateScalarLoads(ScopStmt &Stmt, LoopToScevMapT &LTS,
341
                           ValueMapT &BBMap,
342
                           __isl_keep isl_id_to_ast_expr *NewAccesses);
343
 
344
  /// When statement tracing is enabled, build the print instructions for
345
  /// printing the current statement instance.
346
  ///
347
  /// The printed output looks like:
348
  ///
349
  ///     Stmt1(0)
350
  ///
351
  /// If printing of scalars is enabled, it also appends the value of each
352
  /// scalar to the line:
353
  ///
354
  ///     Stmt1(0) %i=1 %sum=5
355
  ///
356
  /// @param Stmt  The statement we generate code for.
357
  /// @param LTS   A mapping from loops virtual canonical induction
358
  ///              variable to their new values.
359
  /// @param BBMap A mapping from old values to their new values in this block.
360
  void generateBeginStmtTrace(ScopStmt &Stmt, LoopToScevMapT &LTS,
361
                              ValueMapT &BBMap);
362
 
363
  /// Generate instructions that compute whether one instance of @p Set is
364
  /// executed.
365
  ///
366
  /// @param Stmt      The statement we generate code for.
367
  /// @param Subdomain A set in the space of @p Stmt's domain. Elements not in
368
  ///                  @p Stmt's domain are ignored.
369
  ///
370
  /// @return An expression of type i1, generated into the current builder
371
  ///         position, that evaluates to 1 if the executed instance is part of
372
  ///         @p Set.
373
  Value *buildContainsCondition(ScopStmt &Stmt, const isl::set &Subdomain);
374
 
375
  /// Generate code that executes in a subset of @p Stmt's domain.
376
  ///
377
  /// @param Stmt        The statement we generate code for.
378
  /// @param Subdomain   The condition for some code to be executed.
379
  /// @param Subject     A name for the code that is executed
380
  ///                    conditionally. Used to name new basic blocks and
381
  ///                    instructions.
382
  /// @param GenThenFunc Callback which generates the code to be executed
383
  ///                    when the current executed instance is in @p Set. The
384
  ///                    IRBuilder's position is moved to within the block that
385
  ///                    executes conditionally for this callback.
386
  void generateConditionalExecution(ScopStmt &Stmt, const isl::set &Subdomain,
387
                                    StringRef Subject,
388
                                    const std::function<void()> &GenThenFunc);
389
 
390
  /// Generate the scalar stores for the given statement.
391
  ///
392
  /// After the statement @p Stmt was copied all inner-SCoP scalar dependences
393
  /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to
394
  /// be demoted to memory.
395
  ///
396
  /// @param Stmt  The statement we generate code for.
397
  /// @param LTS   A mapping from loops virtual canonical induction
398
  ///              variable to their new values
399
  ///              (for values recalculated in the new ScoP, but not
400
  ///               within this basic block)
401
  /// @param BBMap A mapping from old values to their new values in this block.
402
  /// @param NewAccesses A map from memory access ids to new ast expressions.
403
  virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT &LTS,
404
                                    ValueMapT &BBMap,
405
                                    __isl_keep isl_id_to_ast_expr *NewAccesses);
406
 
407
  /// Handle users of @p Array outside the SCoP.
408
  ///
409
  /// @param S         The current SCoP.
410
  /// @param Inst      The ScopArrayInfo to handle.
411
  void handleOutsideUsers(const Scop &S, ScopArrayInfo *Array);
412
 
413
  /// Find scalar statements that have outside users.
414
  ///
415
  /// We register these scalar values to later update subsequent scalar uses of
416
  /// these values to either use the newly computed value from within the scop
417
  /// (if the scop was executed) or the unchanged original code (if the run-time
418
  /// check failed).
419
  ///
420
  /// @param S The scop for which to find the outside users.
421
  void findOutsideUsers(Scop &S);
422
 
423
  /// Initialize the memory of demoted scalars.
424
  ///
425
  /// @param S The scop for which to generate the scalar initializers.
426
  void createScalarInitialization(Scop &S);
427
 
428
  /// Create exit PHI node merges for PHI nodes with more than two edges
429
  ///        from inside the scop.
430
  ///
431
  /// For scops which have a PHI node in the exit block that has more than two
432
  /// incoming edges from inside the scop region, we require some special
433
  /// handling to understand which of the possible values will be passed to the
434
  /// PHI node from inside the optimized version of the scop. To do so ScopInfo
435
  /// models the possible incoming values as write accesses of the ScopStmts.
436
  ///
437
  /// This function creates corresponding code to reload the computed outgoing
438
  /// value from the stack slot it has been stored into and to pass it on to the
439
  /// PHI node in the original exit block.
440
  ///
441
  /// @param S The scop for which to generate the exiting PHI nodes.
442
  void createExitPHINodeMerges(Scop &S);
443
 
444
  /// Promote the values of demoted scalars after the SCoP.
445
  ///
446
  /// If a scalar value was used outside the SCoP we need to promote the value
447
  /// stored in the memory cell allocated for that scalar and combine it with
448
  /// the original value in the non-optimized SCoP.
449
  void createScalarFinalization(Scop &S);
450
 
451
  /// Try to synthesize a new value
452
  ///
453
  /// Given an old value, we try to synthesize it in a new context from its
454
  /// original SCEV expression. We start from the original SCEV expression,
455
  /// then replace outdated parameter and loop references, and finally
456
  /// expand it to code that computes this updated expression.
457
  ///
458
  /// @param Stmt      The statement to code generate
459
  /// @param Old       The old Value
460
  /// @param BBMap     A mapping from old values to their new values
461
  ///                  (for values recalculated within this basic block)
462
  /// @param LTS       A mapping from loops virtual canonical induction
463
  ///                  variable to their new values
464
  ///                  (for values recalculated in the new ScoP, but not
465
  ///                   within this basic block)
466
  /// @param L         The loop that surrounded the instruction that referenced
467
  ///                  this value in the original code. This loop is used to
468
  ///                  evaluate the scalar evolution at the right scope.
469
  ///
470
  /// @returns  o A newly synthesized value.
471
  ///           o NULL, if synthesizing the value failed.
472
  Value *trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
473
                               LoopToScevMapT &LTS, Loop *L) const;
474
 
475
  /// Get the new version of a value.
476
  ///
477
  /// Given an old value, we first check if a new version of this value is
478
  /// available in the BBMap or GlobalMap. In case it is not and the value can
479
  /// be recomputed using SCEV, we do so. If we can not recompute a value
480
  /// using SCEV, but we understand that the value is constant within the scop,
481
  /// we return the old value.  If the value can still not be derived, this
482
  /// function will assert.
483
  ///
484
  /// @param Stmt      The statement to code generate.
485
  /// @param Old       The old Value.
486
  /// @param BBMap     A mapping from old values to their new values
487
  ///                  (for values recalculated within this basic block).
488
  /// @param LTS       A mapping from loops virtual canonical induction
489
  ///                  variable to their new values
490
  ///                  (for values recalculated in the new ScoP, but not
491
  ///                   within this basic block).
492
  /// @param L         The loop that surrounded the instruction that referenced
493
  ///                  this value in the original code. This loop is used to
494
  ///                  evaluate the scalar evolution at the right scope.
495
  ///
496
  /// @returns  o The old value, if it is still valid.
497
  ///           o The new value, if available.
498
  ///           o NULL, if no value is found.
499
  Value *getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
500
                     LoopToScevMapT &LTS, Loop *L) const;
501
 
502
  void copyInstScalar(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap,
503
                      LoopToScevMapT &LTS);
504
 
505
  /// Get the innermost loop that surrounds the statement @p Stmt.
506
  Loop *getLoopForStmt(const ScopStmt &Stmt) const;
507
 
508
  /// Generate the operand address
509
  /// @param NewAccesses A map from memory access ids to new ast expressions,
510
  ///                    which may contain new access expressions for certain
511
  ///                    memory accesses.
512
  Value *generateLocationAccessed(ScopStmt &Stmt, MemAccInst Inst,
513
                                  ValueMapT &BBMap, LoopToScevMapT &LTS,
514
                                  isl_id_to_ast_expr *NewAccesses);
515
 
516
  /// Generate the operand address.
517
  ///
518
  /// @param Stmt         The statement to generate code for.
519
  /// @param L            The innermost loop that surrounds the statement.
520
  /// @param Pointer      If the access expression is not changed (ie. not found
521
  ///                     in @p LTS), use this Pointer from the original code
522
  ///                     instead.
523
  /// @param BBMap        A mapping from old values to their new values.
524
  /// @param LTS          A mapping from loops virtual canonical induction
525
  ///                     variable to their new values.
526
  /// @param NewAccesses  Ahead-of-time generated access expressions.
527
  /// @param Id           Identifier of the MemoryAccess to generate.
528
  /// @param ExpectedType The type the returned value should have.
529
  ///
530
  /// @return The generated address.
531
  Value *generateLocationAccessed(ScopStmt &Stmt, Loop *L, Value *Pointer,
532
                                  ValueMapT &BBMap, LoopToScevMapT &LTS,
533
                                  isl_id_to_ast_expr *NewAccesses,
534
                                  __isl_take isl_id *Id, Type *ExpectedType);
535
 
536
  /// Generate the pointer value that is accesses by @p Access.
537
  ///
538
  /// For write accesses, generate the target address. For read accesses,
539
  /// generate the source address.
540
  /// The access can be either an array access or a scalar access. In the first
541
  /// case, the returned address will point to an element into that array. In
542
  /// the scalar case, an alloca is used.
543
  /// If a new AccessRelation is set for the MemoryAccess, the new relation will
544
  /// be used.
545
  ///
546
  /// @param Access      The access to generate a pointer for.
547
  /// @param L           The innermost loop that surrounds the statement.
548
  /// @param LTS         A mapping from loops virtual canonical induction
549
  ///                    variable to their new values.
550
  /// @param BBMap       A mapping from old values to their new values.
551
  /// @param NewAccesses A map from memory access ids to new ast expressions.
552
  ///
553
  /// @return The generated address.
554
  Value *getImplicitAddress(MemoryAccess &Access, Loop *L, LoopToScevMapT &LTS,
555
                            ValueMapT &BBMap,
556
                            __isl_keep isl_id_to_ast_expr *NewAccesses);
557
 
558
  /// @param NewAccesses A map from memory access ids to new ast expressions,
559
  ///                    which may contain new access expressions for certain
560
  ///                    memory accesses.
561
  Value *generateArrayLoad(ScopStmt &Stmt, LoadInst *load, ValueMapT &BBMap,
562
                           LoopToScevMapT &LTS,
563
                           isl_id_to_ast_expr *NewAccesses);
564
 
565
  /// @param NewAccesses A map from memory access ids to new ast expressions,
566
  ///                    which may contain new access expressions for certain
567
  ///                    memory accesses.
568
  void generateArrayStore(ScopStmt &Stmt, StoreInst *store, ValueMapT &BBMap,
569
                          LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses);
570
 
571
  /// Copy a single PHI instruction.
572
  ///
573
  /// The implementation in the BlockGenerator is trivial, however it allows
574
  /// subclasses to handle PHIs different.
575
  virtual void copyPHIInstruction(ScopStmt &, PHINode *, ValueMapT &,
576
                                  LoopToScevMapT &) {}
577
 
578
  /// Copy a single Instruction.
579
  ///
580
  /// This copies a single Instruction and updates references to old values
581
  /// with references to new values, as defined by GlobalMap and BBMap.
582
  ///
583
  /// @param Stmt        The statement to code generate.
584
  /// @param Inst        The instruction to copy.
585
  /// @param BBMap       A mapping from old values to their new values
586
  ///                    (for values recalculated within this basic block).
587
  /// @param GlobalMap   A mapping from old values to their new values
588
  ///                    (for values recalculated in the new ScoP, but not
589
  ///                    within this basic block).
590
  /// @param LTS         A mapping from loops virtual canonical induction
591
  ///                    variable to their new values
592
  ///                    (for values recalculated in the new ScoP, but not
593
  ///                     within this basic block).
594
  /// @param NewAccesses A map from memory access ids to new ast expressions,
595
  ///                    which may contain new access expressions for certain
596
  ///                    memory accesses.
597
  void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap,
598
                       LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses);
599
 
600
  /// Helper to determine if @p Inst can be synthesized in @p Stmt.
601
  ///
602
  /// @returns false, iff @p Inst can be synthesized in @p Stmt.
603
  bool canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst);
604
 
605
  /// Remove dead instructions generated for BB
606
  ///
607
  /// @param BB The basic block code for which code has been generated.
608
  /// @param BBMap A local map from old to new instructions.
609
  void removeDeadInstructions(BasicBlock *BB, ValueMapT &BBMap);
610
 
611
  /// Invalidate the scalar evolution expressions for a scop.
612
  ///
613
  /// This function invalidates the scalar evolution results for all
614
  /// instructions that are part of a given scop, and the loops
615
  /// surrounding the users of merge blocks. This is necessary to ensure that
616
  /// later scops do not obtain scalar evolution expressions that reference
617
  /// values that earlier dominated the later scop, but have been moved in the
618
  /// conditional part of an earlier scop and consequently do not any more
619
  /// dominate the later scop.
620
  ///
621
  /// @param S The scop to invalidate.
622
  void invalidateScalarEvolution(Scop &S);
623
};
624
 
625
/// Generate a new vector basic block for a polyhedral statement.
626
///
627
/// The only public function exposed is generate().
628
class VectorBlockGenerator final : BlockGenerator {
629
public:
630
  /// Generate a new vector basic block for a ScoPStmt.
631
  ///
632
  /// This code generation is similar to the normal, scalar code generation,
633
  /// except that each instruction is code generated for several vector lanes
634
  /// at a time. If possible instructions are issued as actual vector
635
  /// instructions, but e.g. for address calculation instructions we currently
636
  /// generate scalar instructions for each vector lane.
637
  ///
638
  /// @param BlockGen    A block generator object used as parent.
639
  /// @param Stmt        The statement to code generate.
640
  /// @param VLTS        A mapping from loops virtual canonical induction
641
  ///                    variable to their new values
642
  ///                    (for values recalculated in the new ScoP, but not
643
  ///                     within this basic block), one for each lane.
644
  /// @param Schedule    A map from the statement to a schedule where the
645
  ///                    innermost dimension is the dimension of the innermost
646
  ///                    loop containing the statement.
647
  /// @param NewAccesses A map from memory access ids to new ast expressions,
648
  ///                    which may contain new access expressions for certain
649
  ///                    memory accesses.
650
  static void generate(BlockGenerator &BlockGen, ScopStmt &Stmt,
651
                       std::vector<LoopToScevMapT> &VLTS,
652
                       __isl_keep isl_map *Schedule,
653
                       __isl_keep isl_id_to_ast_expr *NewAccesses) {
654
    VectorBlockGenerator Generator(BlockGen, VLTS, Schedule);
655
    Generator.copyStmt(Stmt, NewAccesses);
656
  }
657
 
658
private:
659
  // This is a vector of loop->scev maps.  The first map is used for the first
660
  // vector lane, ...
661
  // Each map, contains information about Instructions in the old ScoP, which
662
  // are recalculated in the new SCoP. When copying the basic block, we replace
663
  // all references to the old instructions with their recalculated values.
664
  //
665
  // For example, when the code generator produces this AST:
666
  //
667
  //   for (int c1 = 0; c1 <= 1023; c1 += 1)
668
  //     for (int c2 = 0; c2 <= 1023; c2 += VF)
669
  //       for (int lane = 0; lane <= VF; lane += 1)
670
  //         Stmt(c2 + lane + 3, c1);
671
  //
672
  // VLTS[lane] contains a map:
673
  //   "outer loop in the old loop nest" -> SCEV("c2 + lane + 3"),
674
  //   "inner loop in the old loop nest" -> SCEV("c1").
675
  std::vector<LoopToScevMapT> &VLTS;
676
 
677
  // A map from the statement to a schedule where the innermost dimension is the
678
  // dimension of the innermost loop containing the statement.
679
  isl_map *Schedule;
680
 
681
  VectorBlockGenerator(BlockGenerator &BlockGen,
682
                       std::vector<LoopToScevMapT> &VLTS,
683
                       __isl_keep isl_map *Schedule);
684
 
685
  int getVectorWidth();
686
 
687
  Value *getVectorValue(ScopStmt &Stmt, Value *Old, ValueMapT &VectorMap,
688
                        VectorValueMapT &ScalarMaps, Loop *L);
689
 
690
  /// Load a vector from a set of adjacent scalars
691
  ///
692
  /// In case a set of scalars is known to be next to each other in memory,
693
  /// create a vector load that loads those scalars
694
  ///
695
  /// %vector_ptr= bitcast double* %p to <4 x double>*
696
  /// %vec_full = load <4 x double>* %vector_ptr
697
  ///
698
  /// @param Stmt           The statement to code generate.
699
  /// @param NegativeStride This is used to indicate a -1 stride. In such
700
  ///                       a case we load the end of a base address and
701
  ///                       shuffle the accesses in reverse order into the
702
  ///                       vector. By default we would do only positive
703
  ///                       strides.
704
  ///
705
  /// @param NewAccesses    A map from memory access ids to new ast
706
  ///                       expressions, which may contain new access
707
  ///                       expressions for certain memory accesses.
708
  Value *generateStrideOneLoad(ScopStmt &Stmt, LoadInst *Load,
709
                               VectorValueMapT &ScalarMaps,
710
                               __isl_keep isl_id_to_ast_expr *NewAccesses,
711
                               bool NegativeStride);
712
 
713
  /// Load a vector initialized from a single scalar in memory
714
  ///
715
  /// In case all elements of a vector are initialized to the same
716
  /// scalar value, this value is loaded and shuffled into all elements
717
  /// of the vector.
718
  ///
719
  /// %splat_one = load <1 x double>* %p
720
  /// %splat = shufflevector <1 x double> %splat_one, <1 x
721
  ///       double> %splat_one, <4 x i32> zeroinitializer
722
  ///
723
  /// @param NewAccesses A map from memory access ids to new ast expressions,
724
  ///                    which may contain new access expressions for certain
725
  ///                    memory accesses.
726
  Value *generateStrideZeroLoad(ScopStmt &Stmt, LoadInst *Load,
727
                                ValueMapT &BBMap,
728
                                __isl_keep isl_id_to_ast_expr *NewAccesses);
729
 
730
  /// Load a vector from scalars distributed in memory
731
  ///
732
  /// In case some scalars a distributed randomly in memory. Create a vector
733
  /// by loading each scalar and by inserting one after the other into the
734
  /// vector.
735
  ///
736
  /// %scalar_1= load double* %p_1
737
  /// %vec_1 = insertelement <2 x double> undef, double %scalar_1, i32 0
738
  /// %scalar 2 = load double* %p_2
739
  /// %vec_2 = insertelement <2 x double> %vec_1, double %scalar_1, i32 1
740
  ///
741
  /// @param NewAccesses A map from memory access ids to new ast expressions,
742
  ///                    which may contain new access expressions for certain
743
  ///                    memory accesses.
744
  Value *generateUnknownStrideLoad(ScopStmt &Stmt, LoadInst *Load,
745
                                   VectorValueMapT &ScalarMaps,
746
                                   __isl_keep isl_id_to_ast_expr *NewAccesses);
747
 
748
  /// @param NewAccesses A map from memory access ids to new ast expressions,
749
  ///                    which may contain new access expressions for certain
750
  ///                    memory accesses.
751
  void generateLoad(ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap,
752
                    VectorValueMapT &ScalarMaps,
753
                    __isl_keep isl_id_to_ast_expr *NewAccesses);
754
 
755
  void copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst,
756
                     ValueMapT &VectorMap, VectorValueMapT &ScalarMaps);
757
 
758
  void copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst,
759
                      ValueMapT &VectorMap, VectorValueMapT &ScalarMaps);
760
 
761
  /// @param NewAccesses A map from memory access ids to new ast expressions,
762
  ///                    which may contain new access expressions for certain
763
  ///                    memory accesses.
764
  void copyStore(ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap,
765
                 VectorValueMapT &ScalarMaps,
766
                 __isl_keep isl_id_to_ast_expr *NewAccesses);
767
 
768
  /// @param NewAccesses A map from memory access ids to new ast expressions,
769
  ///                    which may contain new access expressions for certain
770
  ///                    memory accesses.
771
  void copyInstScalarized(ScopStmt &Stmt, Instruction *Inst,
772
                          ValueMapT &VectorMap, VectorValueMapT &ScalarMaps,
773
                          __isl_keep isl_id_to_ast_expr *NewAccesses);
774
 
775
  bool extractScalarValues(const Instruction *Inst, ValueMapT &VectorMap,
776
                           VectorValueMapT &ScalarMaps);
777
 
778
  bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap);
779
 
780
  /// Generate vector loads for scalars.
781
  ///
782
  /// @param Stmt           The scop statement for which to generate the loads.
783
  /// @param VectorBlockMap A map that will be updated to relate the original
784
  ///                       values with the newly generated vector loads.
785
  void generateScalarVectorLoads(ScopStmt &Stmt, ValueMapT &VectorBlockMap);
786
 
787
  /// Verify absence of scalar stores.
788
  ///
789
  /// @param Stmt The scop statement to check for scalar stores.
790
  void verifyNoScalarStores(ScopStmt &Stmt);
791
 
792
  /// @param NewAccesses A map from memory access ids to new ast expressions,
793
  ///                    which may contain new access expressions for certain
794
  ///                    memory accesses.
795
  void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap,
796
                       VectorValueMapT &ScalarMaps,
797
                       __isl_keep isl_id_to_ast_expr *NewAccesses);
798
 
799
  /// @param NewAccesses A map from memory access ids to new ast expressions,
800
  ///                    which may contain new access expressions for certain
801
  ///                    memory accesses.
802
  void copyStmt(ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses);
803
};
804
 
805
/// Generator for new versions of polyhedral region statements.
806
class RegionGenerator final : BlockGenerator {
807
public:
808
  /// Create a generator for regions.
809
  ///
810
  /// @param BlockGen A generator for basic blocks.
811
  RegionGenerator(BlockGenerator &BlockGen) : BlockGenerator(BlockGen) {}
812
 
813
  virtual ~RegionGenerator() {}
814
 
815
  /// Copy the region statement @p Stmt.
816
  ///
817
  /// This copies the entire region represented by @p Stmt and updates
818
  /// references to old values with references to new values, as defined by
819
  /// GlobalMap.
820
  ///
821
  /// @param Stmt      The statement to code generate.
822
  /// @param LTS       A map from old loops to new induction variables as SCEVs.
823
  void copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
824
                __isl_keep isl_id_to_ast_expr *IdToAstExp);
825
 
826
private:
827
  /// A map from old to the first new block in the region, that was created to
828
  /// model the old basic block.
829
  DenseMap<BasicBlock *, BasicBlock *> StartBlockMap;
830
 
831
  /// A map from old to the last new block in the region, that was created to
832
  /// model the old basic block.
833
  DenseMap<BasicBlock *, BasicBlock *> EndBlockMap;
834
 
835
  /// The "BBMaps" for the whole region (one for each block). In case a basic
836
  /// block is code generated to multiple basic blocks (e.g., for partial
837
  /// writes), the StartBasic is used as index for the RegionMap.
838
  DenseMap<BasicBlock *, ValueMapT> RegionMaps;
839
 
840
  /// Mapping to remember PHI nodes that still need incoming values.
841
  using PHINodePairTy = std::pair<PHINode *, PHINode *>;
842
  DenseMap<BasicBlock *, SmallVector<PHINodePairTy, 4>> IncompletePHINodeMap;
843
 
844
  /// Repair the dominance tree after we created a copy block for @p BB.
845
  ///
846
  /// @returns The immediate dominator in the DT for @p BBCopy if in the region.
847
  BasicBlock *repairDominance(BasicBlock *BB, BasicBlock *BBCopy);
848
 
849
  /// Add the new operand from the copy of @p IncomingBB to @p PHICopy.
850
  ///
851
  /// PHI nodes, which may have (multiple) edges that enter from outside the
852
  /// non-affine subregion and even from outside the scop, are code generated as
853
  /// follows:
854
  ///
855
  /// # Original
856
  ///
857
  ///   Region: %A-> %exit
858
  ///   NonAffine Stmt: %nonaffB -> %D (includes %nonaffB, %nonaffC)
859
  ///
860
  ///     pre:
861
  ///       %val = add i64 1, 1
862
  ///
863
  ///     A:
864
  ///      br label %nonaff
865
  ///
866
  ///     nonaffB:
867
  ///       %phi = phi i64 [%val, %A], [%valC, %nonAffC], [%valD, %D]
868
  ///       %cmp = <nonaff>
869
  ///       br i1 %cmp, label %C, label %nonaffC
870
  ///
871
  ///     nonaffC:
872
  ///       %valC = add i64 1, 1
873
  ///       br i1 undef, label %D, label %nonaffB
874
  ///
875
  ///     D:
876
  ///       %valD = ...
877
  ///       %exit_cond = <loopexit>
878
  ///       br i1 %exit_cond, label %nonaffB, label %exit
879
  ///
880
  ///     exit:
881
  ///       ...
882
  ///
883
  ///  - %start and %C enter from outside the non-affine region.
884
  ///  - %nonaffC enters from within the non-affine region.
885
  ///
886
  ///  # New
887
  ///
888
  ///    polly.A:
889
  ///       store i64 %val, i64* %phi.phiops
890
  ///       br label %polly.nonaffA.entry
891
  ///
892
  ///    polly.nonaffB.entry:
893
  ///       %phi.phiops.reload = load i64, i64* %phi.phiops
894
  ///       br label %nonaffB
895
  ///
896
  ///    polly.nonaffB:
897
  ///       %polly.phi = [%phi.phiops.reload, %nonaffB.entry],
898
  ///                    [%p.valC, %polly.nonaffC]
899
  ///
900
  ///    polly.nonaffC:
901
  ///       %p.valC = add i64 1, 1
902
  ///       br i1 undef, label %polly.D, label %polly.nonaffB
903
  ///
904
  ///    polly.D:
905
  ///        %p.valD = ...
906
  ///        store i64 %p.valD, i64* %phi.phiops
907
  ///        %p.exit_cond = <loopexit>
908
  ///        br i1 %p.exit_cond, label %polly.nonaffB, label %exit
909
  ///
910
  /// Values that enter the PHI from outside the non-affine region are stored
911
  /// into the stack slot %phi.phiops by statements %polly.A and %polly.D and
912
  /// reloaded in %polly.nonaffB.entry, a basic block generated before the
913
  /// actual non-affine region.
914
  ///
915
  /// When generating the PHI node of the non-affine region in %polly.nonaffB,
916
  /// incoming edges from outside the region are combined into a single branch
917
  /// from %polly.nonaffB.entry which has as incoming value the value reloaded
918
  /// from the %phi.phiops stack slot. Incoming edges from within the region
919
  /// refer to the copied instructions (%p.valC) and basic blocks
920
  /// (%polly.nonaffC) of the non-affine region.
921
  ///
922
  /// @param Stmt       The statement to code generate.
923
  /// @param PHI        The original PHI we copy.
924
  /// @param PHICopy    The copy of @p PHI.
925
  /// @param IncomingBB An incoming block of @p PHI.
926
  /// @param LTS        A map from old loops to new induction variables as
927
  /// SCEVs.
928
  void addOperandToPHI(ScopStmt &Stmt, PHINode *PHI, PHINode *PHICopy,
929
                       BasicBlock *IncomingBB, LoopToScevMapT &LTS);
930
 
931
  /// Create a PHI that combines the incoming values from all incoming blocks
932
  /// that are in the subregion.
933
  ///
934
  /// PHIs in the subregion's exit block can have incoming edges from within and
935
  /// outside the subregion. This function combines the incoming values from
936
  /// within the subregion to appear as if there is only one incoming edge from
937
  /// the subregion (an additional exit block is created by RegionGenerator).
938
  /// This is to avoid that a value is written to the .phiops location without
939
  /// leaving the subregion because the exiting block as an edge back into the
940
  /// subregion.
941
  ///
942
  /// @param MA    The WRITE of MemoryKind::PHI/MemoryKind::ExitPHI for a PHI in
943
  ///              the subregion's exit block.
944
  /// @param LTS   Virtual induction variable mapping.
945
  /// @param BBMap A mapping from old values to their new values in this block.
946
  /// @param L     Loop surrounding this region statement.
947
  ///
948
  /// @returns The constructed PHI node.
949
  PHINode *buildExitPHI(MemoryAccess *MA, LoopToScevMapT &LTS, ValueMapT &BBMap,
950
                        Loop *L);
951
 
952
  /// @param Return the new value of a scalar write, creating a PHINode if
953
  ///        necessary.
954
  ///
955
  /// @param MA    A scalar WRITE MemoryAccess.
956
  /// @param LTS   Virtual induction variable mapping.
957
  /// @param BBMap A mapping from old values to their new values in this block.
958
  ///
959
  /// @returns The effective value of @p MA's written value when leaving the
960
  ///          subregion.
961
  /// @see buildExitPHI
962
  Value *getExitScalar(MemoryAccess *MA, LoopToScevMapT &LTS, ValueMapT &BBMap);
963
 
964
  /// Generate the scalar stores for the given statement.
965
  ///
966
  /// After the statement @p Stmt was copied all inner-SCoP scalar dependences
967
  /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to
968
  /// be demoted to memory.
969
  ///
970
  /// @param Stmt  The statement we generate code for.
971
  /// @param LTS   A mapping from loops virtual canonical induction variable to
972
  ///              their new values (for values recalculated in the new ScoP,
973
  ///              but not within this basic block)
974
  /// @param BBMap A mapping from old values to their new values in this block.
975
  /// @param LTS   A mapping from loops virtual canonical induction variable to
976
  /// their new values.
977
  void
978
  generateScalarStores(ScopStmt &Stmt, LoopToScevMapT &LTS, ValueMapT &BBMAp,
979
                       __isl_keep isl_id_to_ast_expr *NewAccesses) override;
980
 
981
  /// Copy a single PHI instruction.
982
  ///
983
  /// This copies a single PHI instruction and updates references to old values
984
  /// with references to new values, as defined by GlobalMap and BBMap.
985
  ///
986
  /// @param Stmt      The statement to code generate.
987
  /// @param PHI       The PHI instruction to copy.
988
  /// @param BBMap     A mapping from old values to their new values
989
  ///                  (for values recalculated within this basic block).
990
  /// @param LTS       A map from old loops to new induction variables as SCEVs.
991
  void copyPHIInstruction(ScopStmt &Stmt, PHINode *Inst, ValueMapT &BBMap,
992
                          LoopToScevMapT &LTS) override;
993
};
994
} // namespace polly
995
#endif