Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | //===-BlockGenerators.h - Helper to generate code for statements-*- C++ -*-===// |
| 2 | // |
||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 6 | // |
||
| 7 | //===----------------------------------------------------------------------===// |
||
| 8 | // |
||
| 9 | // This file defines the BlockGenerator and VectorBlockGenerator classes, which |
||
| 10 | // generate sequential code and vectorized code for a polyhedral statement, |
||
| 11 | // respectively. |
||
| 12 | // |
||
| 13 | //===----------------------------------------------------------------------===// |
||
| 14 | |||
| 15 | #ifndef POLLY_BLOCK_GENERATORS_H |
||
| 16 | #define POLLY_BLOCK_GENERATORS_H |
||
| 17 | |||
| 18 | #include "polly/CodeGen/IRBuilder.h" |
||
| 19 | #include "polly/Support/ScopHelper.h" |
||
| 20 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
||
| 21 | #include "isl/isl-noexceptions.h" |
||
| 22 | |||
| 23 | namespace polly { |
||
| 24 | using llvm::AllocaInst; |
||
| 25 | using llvm::ArrayRef; |
||
| 26 | using llvm::AssertingVH; |
||
| 27 | using llvm::BasicBlock; |
||
| 28 | using llvm::BinaryOperator; |
||
| 29 | using llvm::CmpInst; |
||
| 30 | using llvm::DataLayout; |
||
| 31 | using llvm::DenseMap; |
||
| 32 | using llvm::DominatorTree; |
||
| 33 | using llvm::Function; |
||
| 34 | using llvm::Instruction; |
||
| 35 | using llvm::LoadInst; |
||
| 36 | using llvm::Loop; |
||
| 37 | using llvm::LoopInfo; |
||
| 38 | using llvm::LoopToScevMapT; |
||
| 39 | using llvm::MapVector; |
||
| 40 | using llvm::PHINode; |
||
| 41 | using llvm::ScalarEvolution; |
||
| 42 | using llvm::SetVector; |
||
| 43 | using llvm::SmallVector; |
||
| 44 | using llvm::StoreInst; |
||
| 45 | using llvm::StringRef; |
||
| 46 | using llvm::Type; |
||
| 47 | using llvm::UnaryInstruction; |
||
| 48 | using llvm::Value; |
||
| 49 | |||
| 50 | class MemoryAccess; |
||
| 51 | class ScopArrayInfo; |
||
| 52 | class IslExprBuilder; |
||
| 53 | |||
| 54 | /// Generate a new basic block for a polyhedral statement. |
||
| 55 | class BlockGenerator { |
||
| 56 | public: |
||
| 57 | typedef llvm::SmallVector<ValueMapT, 8> VectorValueMapT; |
||
| 58 | |||
| 59 | /// Map types to resolve scalar dependences. |
||
| 60 | /// |
||
| 61 | ///@{ |
||
| 62 | using AllocaMapTy = DenseMap<const ScopArrayInfo *, AssertingVH<AllocaInst>>; |
||
| 63 | |||
| 64 | /// Simple vector of instructions to store escape users. |
||
| 65 | using EscapeUserVectorTy = SmallVector<Instruction *, 4>; |
||
| 66 | |||
| 67 | /// Map type to resolve escaping users for scalar instructions. |
||
| 68 | /// |
||
| 69 | /// @see The EscapeMap member. |
||
| 70 | using EscapeUsersAllocaMapTy = |
||
| 71 | MapVector<Instruction *, |
||
| 72 | std::pair<AssertingVH<Value>, EscapeUserVectorTy>>; |
||
| 73 | |||
| 74 | ///@} |
||
| 75 | |||
| 76 | /// Create a generator for basic blocks. |
||
| 77 | /// |
||
| 78 | /// @param Builder The LLVM-IR Builder used to generate the statement. The |
||
| 79 | /// code is generated at the location, the Builder points |
||
| 80 | /// to. |
||
| 81 | /// @param LI The loop info for the current function |
||
| 82 | /// @param SE The scalar evolution info for the current function |
||
| 83 | /// @param DT The dominator tree of this function. |
||
| 84 | /// @param ScalarMap Map from scalars to their demoted location. |
||
| 85 | /// @param EscapeMap Map from scalars to their escape users and locations. |
||
| 86 | /// @param GlobalMap A mapping from llvm::Values used in the original scop |
||
| 87 | /// region to a new set of llvm::Values. Each reference to |
||
| 88 | /// an original value appearing in this mapping is replaced |
||
| 89 | /// with the new value it is mapped to. |
||
| 90 | /// @param ExprBuilder An expression builder to generate new access functions. |
||
| 91 | /// @param StartBlock The first basic block after the RTC. |
||
| 92 | BlockGenerator(PollyIRBuilder &Builder, LoopInfo &LI, ScalarEvolution &SE, |
||
| 93 | DominatorTree &DT, AllocaMapTy &ScalarMap, |
||
| 94 | EscapeUsersAllocaMapTy &EscapeMap, ValueMapT &GlobalMap, |
||
| 95 | IslExprBuilder *ExprBuilder, BasicBlock *StartBlock); |
||
| 96 | |||
| 97 | /// Copy the basic block. |
||
| 98 | /// |
||
| 99 | /// This copies the entire basic block and updates references to old values |
||
| 100 | /// with references to new values, as defined by GlobalMap. |
||
| 101 | /// |
||
| 102 | /// @param Stmt The block statement to code generate. |
||
| 103 | /// @param LTS A map from old loops to new induction variables as |
||
| 104 | /// SCEVs. |
||
| 105 | /// @param NewAccesses A map from memory access ids to new ast expressions, |
||
| 106 | /// which may contain new access expressions for certain |
||
| 107 | /// memory accesses. |
||
| 108 | void copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, |
||
| 109 | isl_id_to_ast_expr *NewAccesses); |
||
| 110 | |||
| 111 | /// Remove a ScopArrayInfo's allocation from the ScalarMap. |
||
| 112 | /// |
||
| 113 | /// This function allows to remove values from the ScalarMap. This is useful |
||
| 114 | /// if the corresponding alloca instruction will be deleted (or moved into |
||
| 115 | /// another module), as without removing these values the underlying |
||
| 116 | /// AssertingVH will trigger due to us still keeping reference to this |
||
| 117 | /// scalar. |
||
| 118 | /// |
||
| 119 | /// @param Array The array for which the alloca was generated. |
||
| 120 | void freeScalarAlloc(ScopArrayInfo *Array) { ScalarMap.erase(Array); } |
||
| 121 | |||
| 122 | /// Return the alloca for @p Access. |
||
| 123 | /// |
||
| 124 | /// If no alloca was mapped for @p Access a new one is created. |
||
| 125 | /// |
||
| 126 | /// @param Access The memory access for which to generate the alloca. |
||
| 127 | /// |
||
| 128 | /// @returns The alloca for @p Access or a replacement value taken from |
||
| 129 | /// GlobalMap. |
||
| 130 | Value *getOrCreateAlloca(const MemoryAccess &Access); |
||
| 131 | |||
| 132 | /// Return the alloca for @p Array. |
||
| 133 | /// |
||
| 134 | /// If no alloca was mapped for @p Array a new one is created. |
||
| 135 | /// |
||
| 136 | /// @param Array The array for which to generate the alloca. |
||
| 137 | /// |
||
| 138 | /// @returns The alloca for @p Array or a replacement value taken from |
||
| 139 | /// GlobalMap. |
||
| 140 | Value *getOrCreateAlloca(const ScopArrayInfo *Array); |
||
| 141 | |||
| 142 | /// Finalize the code generation for the SCoP @p S. |
||
| 143 | /// |
||
| 144 | /// This will initialize and finalize the scalar variables we demoted during |
||
| 145 | /// the code generation. |
||
| 146 | /// |
||
| 147 | /// @see createScalarInitialization(Scop &) |
||
| 148 | /// @see createScalarFinalization(Region &) |
||
| 149 | void finalizeSCoP(Scop &S); |
||
| 150 | |||
| 151 | /// An empty destructor |
||
| 152 | virtual ~BlockGenerator() {} |
||
| 153 | |||
| 154 | BlockGenerator(const BlockGenerator &) = default; |
||
| 155 | |||
| 156 | protected: |
||
| 157 | PollyIRBuilder &Builder; |
||
| 158 | LoopInfo &LI; |
||
| 159 | ScalarEvolution &SE; |
||
| 160 | IslExprBuilder *ExprBuilder; |
||
| 161 | |||
| 162 | /// The dominator tree of this function. |
||
| 163 | DominatorTree &DT; |
||
| 164 | |||
| 165 | /// The entry block of the current function. |
||
| 166 | BasicBlock *EntryBB; |
||
| 167 | |||
| 168 | /// Map to resolve scalar dependences for PHI operands and scalars. |
||
| 169 | /// |
||
| 170 | /// When translating code that contains scalar dependences as they result from |
||
| 171 | /// inter-block scalar dependences (including the use of data carrying PHI |
||
| 172 | /// nodes), we do not directly regenerate in-register SSA code, but instead |
||
| 173 | /// allocate some stack memory through which these scalar values are passed. |
||
| 174 | /// Only a later pass of -mem2reg will then (re)introduce in-register |
||
| 175 | /// computations. |
||
| 176 | /// |
||
| 177 | /// To keep track of the memory location(s) used to store the data computed by |
||
| 178 | /// a given SSA instruction, we use the map 'ScalarMap'. ScalarMap maps a |
||
| 179 | /// given ScopArrayInfo to the junk of stack allocated memory, that is |
||
| 180 | /// used for code generation. |
||
| 181 | /// |
||
| 182 | /// Up to two different ScopArrayInfo objects are associated with each |
||
| 183 | /// llvm::Value: |
||
| 184 | /// |
||
| 185 | /// MemoryType::Value objects are used for normal scalar dependences that go |
||
| 186 | /// from a scalar definition to its use. Such dependences are lowered by |
||
| 187 | /// directly writing the value an instruction computes into the corresponding |
||
| 188 | /// chunk of memory and reading it back from this chunk of memory right before |
||
| 189 | /// every use of this original scalar value. The memory allocations for |
||
| 190 | /// MemoryType::Value objects end with '.s2a'. |
||
| 191 | /// |
||
| 192 | /// MemoryType::PHI (and MemoryType::ExitPHI) objects are used to model PHI |
||
| 193 | /// nodes. For each PHI nodes we introduce, besides the Array of type |
||
| 194 | /// MemoryType::Value, a second chunk of memory into which we write at the end |
||
| 195 | /// of each basic block preceding the PHI instruction the value passed |
||
| 196 | /// through this basic block. At the place where the PHI node is executed, we |
||
| 197 | /// replace the PHI node with a load from the corresponding MemoryType::PHI |
||
| 198 | /// memory location. The memory allocations for MemoryType::PHI end with |
||
| 199 | /// '.phiops'. |
||
| 200 | /// |
||
| 201 | /// Example: |
||
| 202 | /// |
||
| 203 | /// Input C Code |
||
| 204 | /// ============ |
||
| 205 | /// |
||
| 206 | /// S1: x1 = ... |
||
| 207 | /// for (i=0...N) { |
||
| 208 | /// S2: x2 = phi(x1, add) |
||
| 209 | /// S3: add = x2 + 42; |
||
| 210 | /// } |
||
| 211 | /// S4: print(x1) |
||
| 212 | /// print(x2) |
||
| 213 | /// print(add) |
||
| 214 | /// |
||
| 215 | /// |
||
| 216 | /// Unmodified IR IR After expansion |
||
| 217 | /// ============= ================== |
||
| 218 | /// |
||
| 219 | /// S1: x1 = ... S1: x1 = ... |
||
| 220 | /// x1.s2a = s1 |
||
| 221 | /// x2.phiops = s1 |
||
| 222 | /// | | |
||
| 223 | /// | <--<--<--<--< | <--<--<--<--< |
||
| 224 | /// | / \ | / \ . |
||
| 225 | /// V V \ V V \ . |
||
| 226 | /// S2: x2 = phi (x1, add) | S2: x2 = x2.phiops | |
||
| 227 | /// | x2.s2a = x2 | |
||
| 228 | /// | | |
||
| 229 | /// S3: add = x2 + 42 | S3: add = x2 + 42 | |
||
| 230 | /// | add.s2a = add | |
||
| 231 | /// | x2.phiops = add | |
||
| 232 | /// | \ / | \ / |
||
| 233 | /// | \ / | \ / |
||
| 234 | /// | >-->-->-->--> | >-->-->-->--> |
||
| 235 | /// V V |
||
| 236 | /// |
||
| 237 | /// S4: x1 = x1.s2a |
||
| 238 | /// S4: ... = x1 ... = x1 |
||
| 239 | /// x2 = x2.s2a |
||
| 240 | /// ... = x2 ... = x2 |
||
| 241 | /// add = add.s2a |
||
| 242 | /// ... = add ... = add |
||
| 243 | /// |
||
| 244 | /// ScalarMap = { x1:Value -> x1.s2a, x2:Value -> x2.s2a, |
||
| 245 | /// add:Value -> add.s2a, x2:PHI -> x2.phiops } |
||
| 246 | /// |
||
| 247 | /// ??? Why does a PHI-node require two memory chunks ??? |
||
| 248 | /// |
||
| 249 | /// One may wonder why a PHI node requires two memory chunks and not just |
||
| 250 | /// all data is stored in a single location. The following example tries |
||
| 251 | /// to store all data in .s2a and drops the .phiops location: |
||
| 252 | /// |
||
| 253 | /// S1: x1 = ... |
||
| 254 | /// x1.s2a = s1 |
||
| 255 | /// x2.s2a = s1 // use .s2a instead of .phiops |
||
| 256 | /// | |
||
| 257 | /// | <--<--<--<--< |
||
| 258 | /// | / \ . |
||
| 259 | /// V V \ . |
||
| 260 | /// S2: x2 = x2.s2a | // value is same as above, but read |
||
| 261 | /// | // from .s2a |
||
| 262 | /// | |
||
| 263 | /// x2.s2a = x2 | // store into .s2a as normal |
||
| 264 | /// | |
||
| 265 | /// S3: add = x2 + 42 | |
||
| 266 | /// add.s2a = add | |
||
| 267 | /// x2.s2a = add | // use s2a instead of .phiops |
||
| 268 | /// | \ / // !!! This is wrong, as x2.s2a now |
||
| 269 | /// | >-->-->-->--> // contains add instead of x2. |
||
| 270 | /// V |
||
| 271 | /// |
||
| 272 | /// S4: x1 = x1.s2a |
||
| 273 | /// ... = x1 |
||
| 274 | /// x2 = x2.s2a // !!! We now read 'add' instead of |
||
| 275 | /// ... = x2 // 'x2' |
||
| 276 | /// add = add.s2a |
||
| 277 | /// ... = add |
||
| 278 | /// |
||
| 279 | /// As visible in the example, the SSA value of the PHI node may still be |
||
| 280 | /// needed _after_ the basic block, which could conceptually branch to the |
||
| 281 | /// PHI node, has been run and has overwritten the PHI's old value. Hence, a |
||
| 282 | /// single memory location is not enough to code-generate a PHI node. |
||
| 283 | /// |
||
| 284 | /// Memory locations used for the special PHI node modeling. |
||
| 285 | AllocaMapTy &ScalarMap; |
||
| 286 | |||
| 287 | /// Map from instructions to their escape users as well as the alloca. |
||
| 288 | EscapeUsersAllocaMapTy &EscapeMap; |
||
| 289 | |||
| 290 | /// A map from llvm::Values referenced in the old code to a new set of |
||
| 291 | /// llvm::Values, which is used to replace these old values during |
||
| 292 | /// code generation. |
||
| 293 | ValueMapT &GlobalMap; |
||
| 294 | |||
| 295 | /// The first basic block after the RTC. |
||
| 296 | BasicBlock *StartBlock; |
||
| 297 | |||
| 298 | /// Split @p BB to create a new one we can use to clone @p BB in. |
||
| 299 | BasicBlock *splitBB(BasicBlock *BB); |
||
| 300 | |||
| 301 | /// Copy the given basic block. |
||
| 302 | /// |
||
| 303 | /// @param Stmt The statement to code generate. |
||
| 304 | /// @param BB The basic block to code generate. |
||
| 305 | /// @param BBMap A mapping from old values to their new values in this |
||
| 306 | /// block. |
||
| 307 | /// @param LTS A map from old loops to new induction variables as |
||
| 308 | /// SCEVs. |
||
| 309 | /// @param NewAccesses A map from memory access ids to new ast expressions, |
||
| 310 | /// which may contain new access expressions for certain |
||
| 311 | /// memory accesses. |
||
| 312 | /// |
||
| 313 | /// @returns The copy of the basic block. |
||
| 314 | BasicBlock *copyBB(ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap, |
||
| 315 | LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); |
||
| 316 | |||
| 317 | /// Copy the given basic block. |
||
| 318 | /// |
||
| 319 | /// @param Stmt The statement to code generate. |
||
| 320 | /// @param BB The basic block to code generate. |
||
| 321 | /// @param BBCopy The new basic block to generate code in. |
||
| 322 | /// @param BBMap A mapping from old values to their new values in this |
||
| 323 | /// block. |
||
| 324 | /// @param LTS A map from old loops to new induction variables as |
||
| 325 | /// SCEVs. |
||
| 326 | /// @param NewAccesses A map from memory access ids to new ast expressions, |
||
| 327 | /// which may contain new access expressions for certain |
||
| 328 | /// memory accesses. |
||
| 329 | void copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *BBCopy, |
||
| 330 | ValueMapT &BBMap, LoopToScevMapT <S, |
||
| 331 | isl_id_to_ast_expr *NewAccesses); |
||
| 332 | |||
| 333 | /// Generate reload of scalars demoted to memory and needed by @p Stmt. |
||
| 334 | /// |
||
| 335 | /// @param Stmt The statement we generate code for. |
||
| 336 | /// @param LTS A mapping from loops virtual canonical induction |
||
| 337 | /// variable to their new values. |
||
| 338 | /// @param BBMap A mapping from old values to their new values in this block. |
||
| 339 | /// @param NewAccesses A map from memory access ids to new ast expressions. |
||
| 340 | void generateScalarLoads(ScopStmt &Stmt, LoopToScevMapT <S, |
||
| 341 | ValueMapT &BBMap, |
||
| 342 | __isl_keep isl_id_to_ast_expr *NewAccesses); |
||
| 343 | |||
| 344 | /// When statement tracing is enabled, build the print instructions for |
||
| 345 | /// printing the current statement instance. |
||
| 346 | /// |
||
| 347 | /// The printed output looks like: |
||
| 348 | /// |
||
| 349 | /// Stmt1(0) |
||
| 350 | /// |
||
| 351 | /// If printing of scalars is enabled, it also appends the value of each |
||
| 352 | /// scalar to the line: |
||
| 353 | /// |
||
| 354 | /// Stmt1(0) %i=1 %sum=5 |
||
| 355 | /// |
||
| 356 | /// @param Stmt The statement we generate code for. |
||
| 357 | /// @param LTS A mapping from loops virtual canonical induction |
||
| 358 | /// variable to their new values. |
||
| 359 | /// @param BBMap A mapping from old values to their new values in this block. |
||
| 360 | void generateBeginStmtTrace(ScopStmt &Stmt, LoopToScevMapT <S, |
||
| 361 | ValueMapT &BBMap); |
||
| 362 | |||
| 363 | /// Generate instructions that compute whether one instance of @p Set is |
||
| 364 | /// executed. |
||
| 365 | /// |
||
| 366 | /// @param Stmt The statement we generate code for. |
||
| 367 | /// @param Subdomain A set in the space of @p Stmt's domain. Elements not in |
||
| 368 | /// @p Stmt's domain are ignored. |
||
| 369 | /// |
||
| 370 | /// @return An expression of type i1, generated into the current builder |
||
| 371 | /// position, that evaluates to 1 if the executed instance is part of |
||
| 372 | /// @p Set. |
||
| 373 | Value *buildContainsCondition(ScopStmt &Stmt, const isl::set &Subdomain); |
||
| 374 | |||
| 375 | /// Generate code that executes in a subset of @p Stmt's domain. |
||
| 376 | /// |
||
| 377 | /// @param Stmt The statement we generate code for. |
||
| 378 | /// @param Subdomain The condition for some code to be executed. |
||
| 379 | /// @param Subject A name for the code that is executed |
||
| 380 | /// conditionally. Used to name new basic blocks and |
||
| 381 | /// instructions. |
||
| 382 | /// @param GenThenFunc Callback which generates the code to be executed |
||
| 383 | /// when the current executed instance is in @p Set. The |
||
| 384 | /// IRBuilder's position is moved to within the block that |
||
| 385 | /// executes conditionally for this callback. |
||
| 386 | void generateConditionalExecution(ScopStmt &Stmt, const isl::set &Subdomain, |
||
| 387 | StringRef Subject, |
||
| 388 | const std::function<void()> &GenThenFunc); |
||
| 389 | |||
| 390 | /// Generate the scalar stores for the given statement. |
||
| 391 | /// |
||
| 392 | /// After the statement @p Stmt was copied all inner-SCoP scalar dependences |
||
| 393 | /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to |
||
| 394 | /// be demoted to memory. |
||
| 395 | /// |
||
| 396 | /// @param Stmt The statement we generate code for. |
||
| 397 | /// @param LTS A mapping from loops virtual canonical induction |
||
| 398 | /// variable to their new values |
||
| 399 | /// (for values recalculated in the new ScoP, but not |
||
| 400 | /// within this basic block) |
||
| 401 | /// @param BBMap A mapping from old values to their new values in this block. |
||
| 402 | /// @param NewAccesses A map from memory access ids to new ast expressions. |
||
| 403 | virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, |
||
| 404 | ValueMapT &BBMap, |
||
| 405 | __isl_keep isl_id_to_ast_expr *NewAccesses); |
||
| 406 | |||
| 407 | /// Handle users of @p Array outside the SCoP. |
||
| 408 | /// |
||
| 409 | /// @param S The current SCoP. |
||
| 410 | /// @param Inst The ScopArrayInfo to handle. |
||
| 411 | void handleOutsideUsers(const Scop &S, ScopArrayInfo *Array); |
||
| 412 | |||
| 413 | /// Find scalar statements that have outside users. |
||
| 414 | /// |
||
| 415 | /// We register these scalar values to later update subsequent scalar uses of |
||
| 416 | /// these values to either use the newly computed value from within the scop |
||
| 417 | /// (if the scop was executed) or the unchanged original code (if the run-time |
||
| 418 | /// check failed). |
||
| 419 | /// |
||
| 420 | /// @param S The scop for which to find the outside users. |
||
| 421 | void findOutsideUsers(Scop &S); |
||
| 422 | |||
| 423 | /// Initialize the memory of demoted scalars. |
||
| 424 | /// |
||
| 425 | /// @param S The scop for which to generate the scalar initializers. |
||
| 426 | void createScalarInitialization(Scop &S); |
||
| 427 | |||
| 428 | /// Create exit PHI node merges for PHI nodes with more than two edges |
||
| 429 | /// from inside the scop. |
||
| 430 | /// |
||
| 431 | /// For scops which have a PHI node in the exit block that has more than two |
||
| 432 | /// incoming edges from inside the scop region, we require some special |
||
| 433 | /// handling to understand which of the possible values will be passed to the |
||
| 434 | /// PHI node from inside the optimized version of the scop. To do so ScopInfo |
||
| 435 | /// models the possible incoming values as write accesses of the ScopStmts. |
||
| 436 | /// |
||
| 437 | /// This function creates corresponding code to reload the computed outgoing |
||
| 438 | /// value from the stack slot it has been stored into and to pass it on to the |
||
| 439 | /// PHI node in the original exit block. |
||
| 440 | /// |
||
| 441 | /// @param S The scop for which to generate the exiting PHI nodes. |
||
| 442 | void createExitPHINodeMerges(Scop &S); |
||
| 443 | |||
| 444 | /// Promote the values of demoted scalars after the SCoP. |
||
| 445 | /// |
||
| 446 | /// If a scalar value was used outside the SCoP we need to promote the value |
||
| 447 | /// stored in the memory cell allocated for that scalar and combine it with |
||
| 448 | /// the original value in the non-optimized SCoP. |
||
| 449 | void createScalarFinalization(Scop &S); |
||
| 450 | |||
| 451 | /// Try to synthesize a new value |
||
| 452 | /// |
||
| 453 | /// Given an old value, we try to synthesize it in a new context from its |
||
| 454 | /// original SCEV expression. We start from the original SCEV expression, |
||
| 455 | /// then replace outdated parameter and loop references, and finally |
||
| 456 | /// expand it to code that computes this updated expression. |
||
| 457 | /// |
||
| 458 | /// @param Stmt The statement to code generate |
||
| 459 | /// @param Old The old Value |
||
| 460 | /// @param BBMap A mapping from old values to their new values |
||
| 461 | /// (for values recalculated within this basic block) |
||
| 462 | /// @param LTS A mapping from loops virtual canonical induction |
||
| 463 | /// variable to their new values |
||
| 464 | /// (for values recalculated in the new ScoP, but not |
||
| 465 | /// within this basic block) |
||
| 466 | /// @param L The loop that surrounded the instruction that referenced |
||
| 467 | /// this value in the original code. This loop is used to |
||
| 468 | /// evaluate the scalar evolution at the right scope. |
||
| 469 | /// |
||
| 470 | /// @returns o A newly synthesized value. |
||
| 471 | /// o NULL, if synthesizing the value failed. |
||
| 472 | Value *trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, |
||
| 473 | LoopToScevMapT <S, Loop *L) const; |
||
| 474 | |||
| 475 | /// Get the new version of a value. |
||
| 476 | /// |
||
| 477 | /// Given an old value, we first check if a new version of this value is |
||
| 478 | /// available in the BBMap or GlobalMap. In case it is not and the value can |
||
| 479 | /// be recomputed using SCEV, we do so. If we can not recompute a value |
||
| 480 | /// using SCEV, but we understand that the value is constant within the scop, |
||
| 481 | /// we return the old value. If the value can still not be derived, this |
||
| 482 | /// function will assert. |
||
| 483 | /// |
||
| 484 | /// @param Stmt The statement to code generate. |
||
| 485 | /// @param Old The old Value. |
||
| 486 | /// @param BBMap A mapping from old values to their new values |
||
| 487 | /// (for values recalculated within this basic block). |
||
| 488 | /// @param LTS A mapping from loops virtual canonical induction |
||
| 489 | /// variable to their new values |
||
| 490 | /// (for values recalculated in the new ScoP, but not |
||
| 491 | /// within this basic block). |
||
| 492 | /// @param L The loop that surrounded the instruction that referenced |
||
| 493 | /// this value in the original code. This loop is used to |
||
| 494 | /// evaluate the scalar evolution at the right scope. |
||
| 495 | /// |
||
| 496 | /// @returns o The old value, if it is still valid. |
||
| 497 | /// o The new value, if available. |
||
| 498 | /// o NULL, if no value is found. |
||
| 499 | Value *getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, |
||
| 500 | LoopToScevMapT <S, Loop *L) const; |
||
| 501 | |||
| 502 | void copyInstScalar(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, |
||
| 503 | LoopToScevMapT <S); |
||
| 504 | |||
| 505 | /// Get the innermost loop that surrounds the statement @p Stmt. |
||
| 506 | Loop *getLoopForStmt(const ScopStmt &Stmt) const; |
||
| 507 | |||
| 508 | /// Generate the operand address |
||
| 509 | /// @param NewAccesses A map from memory access ids to new ast expressions, |
||
| 510 | /// which may contain new access expressions for certain |
||
| 511 | /// memory accesses. |
||
| 512 | Value *generateLocationAccessed(ScopStmt &Stmt, MemAccInst Inst, |
||
| 513 | ValueMapT &BBMap, LoopToScevMapT <S, |
||
| 514 | isl_id_to_ast_expr *NewAccesses); |
||
| 515 | |||
| 516 | /// Generate the operand address. |
||
| 517 | /// |
||
| 518 | /// @param Stmt The statement to generate code for. |
||
| 519 | /// @param L The innermost loop that surrounds the statement. |
||
| 520 | /// @param Pointer If the access expression is not changed (ie. not found |
||
| 521 | /// in @p LTS), use this Pointer from the original code |
||
| 522 | /// instead. |
||
| 523 | /// @param BBMap A mapping from old values to their new values. |
||
| 524 | /// @param LTS A mapping from loops virtual canonical induction |
||
| 525 | /// variable to their new values. |
||
| 526 | /// @param NewAccesses Ahead-of-time generated access expressions. |
||
| 527 | /// @param Id Identifier of the MemoryAccess to generate. |
||
| 528 | /// @param ExpectedType The type the returned value should have. |
||
| 529 | /// |
||
| 530 | /// @return The generated address. |
||
| 531 | Value *generateLocationAccessed(ScopStmt &Stmt, Loop *L, Value *Pointer, |
||
| 532 | ValueMapT &BBMap, LoopToScevMapT <S, |
||
| 533 | isl_id_to_ast_expr *NewAccesses, |
||
| 534 | __isl_take isl_id *Id, Type *ExpectedType); |
||
| 535 | |||
| 536 | /// Generate the pointer value that is accesses by @p Access. |
||
| 537 | /// |
||
| 538 | /// For write accesses, generate the target address. For read accesses, |
||
| 539 | /// generate the source address. |
||
| 540 | /// The access can be either an array access or a scalar access. In the first |
||
| 541 | /// case, the returned address will point to an element into that array. In |
||
| 542 | /// the scalar case, an alloca is used. |
||
| 543 | /// If a new AccessRelation is set for the MemoryAccess, the new relation will |
||
| 544 | /// be used. |
||
| 545 | /// |
||
| 546 | /// @param Access The access to generate a pointer for. |
||
| 547 | /// @param L The innermost loop that surrounds the statement. |
||
| 548 | /// @param LTS A mapping from loops virtual canonical induction |
||
| 549 | /// variable to their new values. |
||
| 550 | /// @param BBMap A mapping from old values to their new values. |
||
| 551 | /// @param NewAccesses A map from memory access ids to new ast expressions. |
||
| 552 | /// |
||
| 553 | /// @return The generated address. |
||
| 554 | Value *getImplicitAddress(MemoryAccess &Access, Loop *L, LoopToScevMapT <S, |
||
| 555 | ValueMapT &BBMap, |
||
| 556 | __isl_keep isl_id_to_ast_expr *NewAccesses); |
||
| 557 | |||
| 558 | /// @param NewAccesses A map from memory access ids to new ast expressions, |
||
| 559 | /// which may contain new access expressions for certain |
||
| 560 | /// memory accesses. |
||
| 561 | Value *generateArrayLoad(ScopStmt &Stmt, LoadInst *load, ValueMapT &BBMap, |
||
| 562 | LoopToScevMapT <S, |
||
| 563 | isl_id_to_ast_expr *NewAccesses); |
||
| 564 | |||
| 565 | /// @param NewAccesses A map from memory access ids to new ast expressions, |
||
| 566 | /// which may contain new access expressions for certain |
||
| 567 | /// memory accesses. |
||
| 568 | void generateArrayStore(ScopStmt &Stmt, StoreInst *store, ValueMapT &BBMap, |
||
| 569 | LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); |
||
| 570 | |||
| 571 | /// Copy a single PHI instruction. |
||
| 572 | /// |
||
| 573 | /// The implementation in the BlockGenerator is trivial, however it allows |
||
| 574 | /// subclasses to handle PHIs different. |
||
| 575 | virtual void copyPHIInstruction(ScopStmt &, PHINode *, ValueMapT &, |
||
| 576 | LoopToScevMapT &) {} |
||
| 577 | |||
| 578 | /// Copy a single Instruction. |
||
| 579 | /// |
||
| 580 | /// This copies a single Instruction and updates references to old values |
||
| 581 | /// with references to new values, as defined by GlobalMap and BBMap. |
||
| 582 | /// |
||
| 583 | /// @param Stmt The statement to code generate. |
||
| 584 | /// @param Inst The instruction to copy. |
||
| 585 | /// @param BBMap A mapping from old values to their new values |
||
| 586 | /// (for values recalculated within this basic block). |
||
| 587 | /// @param GlobalMap A mapping from old values to their new values |
||
| 588 | /// (for values recalculated in the new ScoP, but not |
||
| 589 | /// within this basic block). |
||
| 590 | /// @param LTS A mapping from loops virtual canonical induction |
||
| 591 | /// variable to their new values |
||
| 592 | /// (for values recalculated in the new ScoP, but not |
||
| 593 | /// within this basic block). |
||
| 594 | /// @param NewAccesses A map from memory access ids to new ast expressions, |
||
| 595 | /// which may contain new access expressions for certain |
||
| 596 | /// memory accesses. |
||
| 597 | void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, |
||
| 598 | LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); |
||
| 599 | |||
| 600 | /// Helper to determine if @p Inst can be synthesized in @p Stmt. |
||
| 601 | /// |
||
| 602 | /// @returns false, iff @p Inst can be synthesized in @p Stmt. |
||
| 603 | bool canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst); |
||
| 604 | |||
| 605 | /// Remove dead instructions generated for BB |
||
| 606 | /// |
||
| 607 | /// @param BB The basic block code for which code has been generated. |
||
| 608 | /// @param BBMap A local map from old to new instructions. |
||
| 609 | void removeDeadInstructions(BasicBlock *BB, ValueMapT &BBMap); |
||
| 610 | |||
| 611 | /// Invalidate the scalar evolution expressions for a scop. |
||
| 612 | /// |
||
| 613 | /// This function invalidates the scalar evolution results for all |
||
| 614 | /// instructions that are part of a given scop, and the loops |
||
| 615 | /// surrounding the users of merge blocks. This is necessary to ensure that |
||
| 616 | /// later scops do not obtain scalar evolution expressions that reference |
||
| 617 | /// values that earlier dominated the later scop, but have been moved in the |
||
| 618 | /// conditional part of an earlier scop and consequently do not any more |
||
| 619 | /// dominate the later scop. |
||
| 620 | /// |
||
| 621 | /// @param S The scop to invalidate. |
||
| 622 | void invalidateScalarEvolution(Scop &S); |
||
| 623 | }; |
||
| 624 | |||
| 625 | /// Generate a new vector basic block for a polyhedral statement. |
||
| 626 | /// |
||
| 627 | /// The only public function exposed is generate(). |
||
| 628 | class VectorBlockGenerator final : BlockGenerator { |
||
| 629 | public: |
||
| 630 | /// Generate a new vector basic block for a ScoPStmt. |
||
| 631 | /// |
||
| 632 | /// This code generation is similar to the normal, scalar code generation, |
||
| 633 | /// except that each instruction is code generated for several vector lanes |
||
| 634 | /// at a time. If possible instructions are issued as actual vector |
||
| 635 | /// instructions, but e.g. for address calculation instructions we currently |
||
| 636 | /// generate scalar instructions for each vector lane. |
||
| 637 | /// |
||
| 638 | /// @param BlockGen A block generator object used as parent. |
||
| 639 | /// @param Stmt The statement to code generate. |
||
| 640 | /// @param VLTS A mapping from loops virtual canonical induction |
||
| 641 | /// variable to their new values |
||
| 642 | /// (for values recalculated in the new ScoP, but not |
||
| 643 | /// within this basic block), one for each lane. |
||
| 644 | /// @param Schedule A map from the statement to a schedule where the |
||
| 645 | /// innermost dimension is the dimension of the innermost |
||
| 646 | /// loop containing the statement. |
||
| 647 | /// @param NewAccesses A map from memory access ids to new ast expressions, |
||
| 648 | /// which may contain new access expressions for certain |
||
| 649 | /// memory accesses. |
||
| 650 | static void generate(BlockGenerator &BlockGen, ScopStmt &Stmt, |
||
| 651 | std::vector<LoopToScevMapT> &VLTS, |
||
| 652 | __isl_keep isl_map *Schedule, |
||
| 653 | __isl_keep isl_id_to_ast_expr *NewAccesses) { |
||
| 654 | VectorBlockGenerator Generator(BlockGen, VLTS, Schedule); |
||
| 655 | Generator.copyStmt(Stmt, NewAccesses); |
||
| 656 | } |
||
| 657 | |||
| 658 | private: |
||
| 659 | // This is a vector of loop->scev maps. The first map is used for the first |
||
| 660 | // vector lane, ... |
||
| 661 | // Each map, contains information about Instructions in the old ScoP, which |
||
| 662 | // are recalculated in the new SCoP. When copying the basic block, we replace |
||
| 663 | // all references to the old instructions with their recalculated values. |
||
| 664 | // |
||
| 665 | // For example, when the code generator produces this AST: |
||
| 666 | // |
||
| 667 | // for (int c1 = 0; c1 <= 1023; c1 += 1) |
||
| 668 | // for (int c2 = 0; c2 <= 1023; c2 += VF) |
||
| 669 | // for (int lane = 0; lane <= VF; lane += 1) |
||
| 670 | // Stmt(c2 + lane + 3, c1); |
||
| 671 | // |
||
| 672 | // VLTS[lane] contains a map: |
||
| 673 | // "outer loop in the old loop nest" -> SCEV("c2 + lane + 3"), |
||
| 674 | // "inner loop in the old loop nest" -> SCEV("c1"). |
||
| 675 | std::vector<LoopToScevMapT> &VLTS; |
||
| 676 | |||
| 677 | // A map from the statement to a schedule where the innermost dimension is the |
||
| 678 | // dimension of the innermost loop containing the statement. |
||
| 679 | isl_map *Schedule; |
||
| 680 | |||
| 681 | VectorBlockGenerator(BlockGenerator &BlockGen, |
||
| 682 | std::vector<LoopToScevMapT> &VLTS, |
||
| 683 | __isl_keep isl_map *Schedule); |
||
| 684 | |||
| 685 | int getVectorWidth(); |
||
| 686 | |||
| 687 | Value *getVectorValue(ScopStmt &Stmt, Value *Old, ValueMapT &VectorMap, |
||
| 688 | VectorValueMapT &ScalarMaps, Loop *L); |
||
| 689 | |||
| 690 | /// Load a vector from a set of adjacent scalars |
||
| 691 | /// |
||
| 692 | /// In case a set of scalars is known to be next to each other in memory, |
||
| 693 | /// create a vector load that loads those scalars |
||
| 694 | /// |
||
| 695 | /// %vector_ptr= bitcast double* %p to <4 x double>* |
||
| 696 | /// %vec_full = load <4 x double>* %vector_ptr |
||
| 697 | /// |
||
| 698 | /// @param Stmt The statement to code generate. |
||
| 699 | /// @param NegativeStride This is used to indicate a -1 stride. In such |
||
| 700 | /// a case we load the end of a base address and |
||
| 701 | /// shuffle the accesses in reverse order into the |
||
| 702 | /// vector. By default we would do only positive |
||
| 703 | /// strides. |
||
| 704 | /// |
||
| 705 | /// @param NewAccesses A map from memory access ids to new ast |
||
| 706 | /// expressions, which may contain new access |
||
| 707 | /// expressions for certain memory accesses. |
||
| 708 | Value *generateStrideOneLoad(ScopStmt &Stmt, LoadInst *Load, |
||
| 709 | VectorValueMapT &ScalarMaps, |
||
| 710 | __isl_keep isl_id_to_ast_expr *NewAccesses, |
||
| 711 | bool NegativeStride); |
||
| 712 | |||
| 713 | /// Load a vector initialized from a single scalar in memory |
||
| 714 | /// |
||
| 715 | /// In case all elements of a vector are initialized to the same |
||
| 716 | /// scalar value, this value is loaded and shuffled into all elements |
||
| 717 | /// of the vector. |
||
| 718 | /// |
||
| 719 | /// %splat_one = load <1 x double>* %p |
||
| 720 | /// %splat = shufflevector <1 x double> %splat_one, <1 x |
||
| 721 | /// double> %splat_one, <4 x i32> zeroinitializer |
||
| 722 | /// |
||
| 723 | /// @param NewAccesses A map from memory access ids to new ast expressions, |
||
| 724 | /// which may contain new access expressions for certain |
||
| 725 | /// memory accesses. |
||
| 726 | Value *generateStrideZeroLoad(ScopStmt &Stmt, LoadInst *Load, |
||
| 727 | ValueMapT &BBMap, |
||
| 728 | __isl_keep isl_id_to_ast_expr *NewAccesses); |
||
| 729 | |||
| 730 | /// Load a vector from scalars distributed in memory |
||
| 731 | /// |
||
| 732 | /// In case some scalars a distributed randomly in memory. Create a vector |
||
| 733 | /// by loading each scalar and by inserting one after the other into the |
||
| 734 | /// vector. |
||
| 735 | /// |
||
| 736 | /// %scalar_1= load double* %p_1 |
||
| 737 | /// %vec_1 = insertelement <2 x double> undef, double %scalar_1, i32 0 |
||
| 738 | /// %scalar 2 = load double* %p_2 |
||
| 739 | /// %vec_2 = insertelement <2 x double> %vec_1, double %scalar_1, i32 1 |
||
| 740 | /// |
||
| 741 | /// @param NewAccesses A map from memory access ids to new ast expressions, |
||
| 742 | /// which may contain new access expressions for certain |
||
| 743 | /// memory accesses. |
||
| 744 | Value *generateUnknownStrideLoad(ScopStmt &Stmt, LoadInst *Load, |
||
| 745 | VectorValueMapT &ScalarMaps, |
||
| 746 | __isl_keep isl_id_to_ast_expr *NewAccesses); |
||
| 747 | |||
| 748 | /// @param NewAccesses A map from memory access ids to new ast expressions, |
||
| 749 | /// which may contain new access expressions for certain |
||
| 750 | /// memory accesses. |
||
| 751 | void generateLoad(ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap, |
||
| 752 | VectorValueMapT &ScalarMaps, |
||
| 753 | __isl_keep isl_id_to_ast_expr *NewAccesses); |
||
| 754 | |||
| 755 | void copyUnaryInst(ScopStmt &Stmt, UnaryInstruction *Inst, |
||
| 756 | ValueMapT &VectorMap, VectorValueMapT &ScalarMaps); |
||
| 757 | |||
| 758 | void copyBinaryInst(ScopStmt &Stmt, BinaryOperator *Inst, |
||
| 759 | ValueMapT &VectorMap, VectorValueMapT &ScalarMaps); |
||
| 760 | |||
| 761 | /// @param NewAccesses A map from memory access ids to new ast expressions, |
||
| 762 | /// which may contain new access expressions for certain |
||
| 763 | /// memory accesses. |
||
| 764 | void copyStore(ScopStmt &Stmt, StoreInst *Store, ValueMapT &VectorMap, |
||
| 765 | VectorValueMapT &ScalarMaps, |
||
| 766 | __isl_keep isl_id_to_ast_expr *NewAccesses); |
||
| 767 | |||
| 768 | /// @param NewAccesses A map from memory access ids to new ast expressions, |
||
| 769 | /// which may contain new access expressions for certain |
||
| 770 | /// memory accesses. |
||
| 771 | void copyInstScalarized(ScopStmt &Stmt, Instruction *Inst, |
||
| 772 | ValueMapT &VectorMap, VectorValueMapT &ScalarMaps, |
||
| 773 | __isl_keep isl_id_to_ast_expr *NewAccesses); |
||
| 774 | |||
| 775 | bool extractScalarValues(const Instruction *Inst, ValueMapT &VectorMap, |
||
| 776 | VectorValueMapT &ScalarMaps); |
||
| 777 | |||
| 778 | bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap); |
||
| 779 | |||
| 780 | /// Generate vector loads for scalars. |
||
| 781 | /// |
||
| 782 | /// @param Stmt The scop statement for which to generate the loads. |
||
| 783 | /// @param VectorBlockMap A map that will be updated to relate the original |
||
| 784 | /// values with the newly generated vector loads. |
||
| 785 | void generateScalarVectorLoads(ScopStmt &Stmt, ValueMapT &VectorBlockMap); |
||
| 786 | |||
| 787 | /// Verify absence of scalar stores. |
||
| 788 | /// |
||
| 789 | /// @param Stmt The scop statement to check for scalar stores. |
||
| 790 | void verifyNoScalarStores(ScopStmt &Stmt); |
||
| 791 | |||
| 792 | /// @param NewAccesses A map from memory access ids to new ast expressions, |
||
| 793 | /// which may contain new access expressions for certain |
||
| 794 | /// memory accesses. |
||
| 795 | void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &VectorMap, |
||
| 796 | VectorValueMapT &ScalarMaps, |
||
| 797 | __isl_keep isl_id_to_ast_expr *NewAccesses); |
||
| 798 | |||
| 799 | /// @param NewAccesses A map from memory access ids to new ast expressions, |
||
| 800 | /// which may contain new access expressions for certain |
||
| 801 | /// memory accesses. |
||
| 802 | void copyStmt(ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses); |
||
| 803 | }; |
||
| 804 | |||
| 805 | /// Generator for new versions of polyhedral region statements. |
||
| 806 | class RegionGenerator final : BlockGenerator { |
||
| 807 | public: |
||
| 808 | /// Create a generator for regions. |
||
| 809 | /// |
||
| 810 | /// @param BlockGen A generator for basic blocks. |
||
| 811 | RegionGenerator(BlockGenerator &BlockGen) : BlockGenerator(BlockGen) {} |
||
| 812 | |||
| 813 | virtual ~RegionGenerator() {} |
||
| 814 | |||
| 815 | /// Copy the region statement @p Stmt. |
||
| 816 | /// |
||
| 817 | /// This copies the entire region represented by @p Stmt and updates |
||
| 818 | /// references to old values with references to new values, as defined by |
||
| 819 | /// GlobalMap. |
||
| 820 | /// |
||
| 821 | /// @param Stmt The statement to code generate. |
||
| 822 | /// @param LTS A map from old loops to new induction variables as SCEVs. |
||
| 823 | void copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, |
||
| 824 | __isl_keep isl_id_to_ast_expr *IdToAstExp); |
||
| 825 | |||
| 826 | private: |
||
| 827 | /// A map from old to the first new block in the region, that was created to |
||
| 828 | /// model the old basic block. |
||
| 829 | DenseMap<BasicBlock *, BasicBlock *> StartBlockMap; |
||
| 830 | |||
| 831 | /// A map from old to the last new block in the region, that was created to |
||
| 832 | /// model the old basic block. |
||
| 833 | DenseMap<BasicBlock *, BasicBlock *> EndBlockMap; |
||
| 834 | |||
| 835 | /// The "BBMaps" for the whole region (one for each block). In case a basic |
||
| 836 | /// block is code generated to multiple basic blocks (e.g., for partial |
||
| 837 | /// writes), the StartBasic is used as index for the RegionMap. |
||
| 838 | DenseMap<BasicBlock *, ValueMapT> RegionMaps; |
||
| 839 | |||
| 840 | /// Mapping to remember PHI nodes that still need incoming values. |
||
| 841 | using PHINodePairTy = std::pair<PHINode *, PHINode *>; |
||
| 842 | DenseMap<BasicBlock *, SmallVector<PHINodePairTy, 4>> IncompletePHINodeMap; |
||
| 843 | |||
| 844 | /// Repair the dominance tree after we created a copy block for @p BB. |
||
| 845 | /// |
||
| 846 | /// @returns The immediate dominator in the DT for @p BBCopy if in the region. |
||
| 847 | BasicBlock *repairDominance(BasicBlock *BB, BasicBlock *BBCopy); |
||
| 848 | |||
| 849 | /// Add the new operand from the copy of @p IncomingBB to @p PHICopy. |
||
| 850 | /// |
||
| 851 | /// PHI nodes, which may have (multiple) edges that enter from outside the |
||
| 852 | /// non-affine subregion and even from outside the scop, are code generated as |
||
| 853 | /// follows: |
||
| 854 | /// |
||
| 855 | /// # Original |
||
| 856 | /// |
||
| 857 | /// Region: %A-> %exit |
||
| 858 | /// NonAffine Stmt: %nonaffB -> %D (includes %nonaffB, %nonaffC) |
||
| 859 | /// |
||
| 860 | /// pre: |
||
| 861 | /// %val = add i64 1, 1 |
||
| 862 | /// |
||
| 863 | /// A: |
||
| 864 | /// br label %nonaff |
||
| 865 | /// |
||
| 866 | /// nonaffB: |
||
| 867 | /// %phi = phi i64 [%val, %A], [%valC, %nonAffC], [%valD, %D] |
||
| 868 | /// %cmp = <nonaff> |
||
| 869 | /// br i1 %cmp, label %C, label %nonaffC |
||
| 870 | /// |
||
| 871 | /// nonaffC: |
||
| 872 | /// %valC = add i64 1, 1 |
||
| 873 | /// br i1 undef, label %D, label %nonaffB |
||
| 874 | /// |
||
| 875 | /// D: |
||
| 876 | /// %valD = ... |
||
| 877 | /// %exit_cond = <loopexit> |
||
| 878 | /// br i1 %exit_cond, label %nonaffB, label %exit |
||
| 879 | /// |
||
| 880 | /// exit: |
||
| 881 | /// ... |
||
| 882 | /// |
||
| 883 | /// - %start and %C enter from outside the non-affine region. |
||
| 884 | /// - %nonaffC enters from within the non-affine region. |
||
| 885 | /// |
||
| 886 | /// # New |
||
| 887 | /// |
||
| 888 | /// polly.A: |
||
| 889 | /// store i64 %val, i64* %phi.phiops |
||
| 890 | /// br label %polly.nonaffA.entry |
||
| 891 | /// |
||
| 892 | /// polly.nonaffB.entry: |
||
| 893 | /// %phi.phiops.reload = load i64, i64* %phi.phiops |
||
| 894 | /// br label %nonaffB |
||
| 895 | /// |
||
| 896 | /// polly.nonaffB: |
||
| 897 | /// %polly.phi = [%phi.phiops.reload, %nonaffB.entry], |
||
| 898 | /// [%p.valC, %polly.nonaffC] |
||
| 899 | /// |
||
| 900 | /// polly.nonaffC: |
||
| 901 | /// %p.valC = add i64 1, 1 |
||
| 902 | /// br i1 undef, label %polly.D, label %polly.nonaffB |
||
| 903 | /// |
||
| 904 | /// polly.D: |
||
| 905 | /// %p.valD = ... |
||
| 906 | /// store i64 %p.valD, i64* %phi.phiops |
||
| 907 | /// %p.exit_cond = <loopexit> |
||
| 908 | /// br i1 %p.exit_cond, label %polly.nonaffB, label %exit |
||
| 909 | /// |
||
| 910 | /// Values that enter the PHI from outside the non-affine region are stored |
||
| 911 | /// into the stack slot %phi.phiops by statements %polly.A and %polly.D and |
||
| 912 | /// reloaded in %polly.nonaffB.entry, a basic block generated before the |
||
| 913 | /// actual non-affine region. |
||
| 914 | /// |
||
| 915 | /// When generating the PHI node of the non-affine region in %polly.nonaffB, |
||
| 916 | /// incoming edges from outside the region are combined into a single branch |
||
| 917 | /// from %polly.nonaffB.entry which has as incoming value the value reloaded |
||
| 918 | /// from the %phi.phiops stack slot. Incoming edges from within the region |
||
| 919 | /// refer to the copied instructions (%p.valC) and basic blocks |
||
| 920 | /// (%polly.nonaffC) of the non-affine region. |
||
| 921 | /// |
||
| 922 | /// @param Stmt The statement to code generate. |
||
| 923 | /// @param PHI The original PHI we copy. |
||
| 924 | /// @param PHICopy The copy of @p PHI. |
||
| 925 | /// @param IncomingBB An incoming block of @p PHI. |
||
| 926 | /// @param LTS A map from old loops to new induction variables as |
||
| 927 | /// SCEVs. |
||
| 928 | void addOperandToPHI(ScopStmt &Stmt, PHINode *PHI, PHINode *PHICopy, |
||
| 929 | BasicBlock *IncomingBB, LoopToScevMapT <S); |
||
| 930 | |||
| 931 | /// Create a PHI that combines the incoming values from all incoming blocks |
||
| 932 | /// that are in the subregion. |
||
| 933 | /// |
||
| 934 | /// PHIs in the subregion's exit block can have incoming edges from within and |
||
| 935 | /// outside the subregion. This function combines the incoming values from |
||
| 936 | /// within the subregion to appear as if there is only one incoming edge from |
||
| 937 | /// the subregion (an additional exit block is created by RegionGenerator). |
||
| 938 | /// This is to avoid that a value is written to the .phiops location without |
||
| 939 | /// leaving the subregion because the exiting block as an edge back into the |
||
| 940 | /// subregion. |
||
| 941 | /// |
||
| 942 | /// @param MA The WRITE of MemoryKind::PHI/MemoryKind::ExitPHI for a PHI in |
||
| 943 | /// the subregion's exit block. |
||
| 944 | /// @param LTS Virtual induction variable mapping. |
||
| 945 | /// @param BBMap A mapping from old values to their new values in this block. |
||
| 946 | /// @param L Loop surrounding this region statement. |
||
| 947 | /// |
||
| 948 | /// @returns The constructed PHI node. |
||
| 949 | PHINode *buildExitPHI(MemoryAccess *MA, LoopToScevMapT <S, ValueMapT &BBMap, |
||
| 950 | Loop *L); |
||
| 951 | |||
| 952 | /// @param Return the new value of a scalar write, creating a PHINode if |
||
| 953 | /// necessary. |
||
| 954 | /// |
||
| 955 | /// @param MA A scalar WRITE MemoryAccess. |
||
| 956 | /// @param LTS Virtual induction variable mapping. |
||
| 957 | /// @param BBMap A mapping from old values to their new values in this block. |
||
| 958 | /// |
||
| 959 | /// @returns The effective value of @p MA's written value when leaving the |
||
| 960 | /// subregion. |
||
| 961 | /// @see buildExitPHI |
||
| 962 | Value *getExitScalar(MemoryAccess *MA, LoopToScevMapT <S, ValueMapT &BBMap); |
||
| 963 | |||
| 964 | /// Generate the scalar stores for the given statement. |
||
| 965 | /// |
||
| 966 | /// After the statement @p Stmt was copied all inner-SCoP scalar dependences |
||
| 967 | /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to |
||
| 968 | /// be demoted to memory. |
||
| 969 | /// |
||
| 970 | /// @param Stmt The statement we generate code for. |
||
| 971 | /// @param LTS A mapping from loops virtual canonical induction variable to |
||
| 972 | /// their new values (for values recalculated in the new ScoP, |
||
| 973 | /// but not within this basic block) |
||
| 974 | /// @param BBMap A mapping from old values to their new values in this block. |
||
| 975 | /// @param LTS A mapping from loops virtual canonical induction variable to |
||
| 976 | /// their new values. |
||
| 977 | void |
||
| 978 | generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMAp, |
||
| 979 | __isl_keep isl_id_to_ast_expr *NewAccesses) override; |
||
| 980 | |||
| 981 | /// Copy a single PHI instruction. |
||
| 982 | /// |
||
| 983 | /// This copies a single PHI instruction and updates references to old values |
||
| 984 | /// with references to new values, as defined by GlobalMap and BBMap. |
||
| 985 | /// |
||
| 986 | /// @param Stmt The statement to code generate. |
||
| 987 | /// @param PHI The PHI instruction to copy. |
||
| 988 | /// @param BBMap A mapping from old values to their new values |
||
| 989 | /// (for values recalculated within this basic block). |
||
| 990 | /// @param LTS A map from old loops to new induction variables as SCEVs. |
||
| 991 | void copyPHIInstruction(ScopStmt &Stmt, PHINode *Inst, ValueMapT &BBMap, |
||
| 992 | LoopToScevMapT <S) override; |
||
| 993 | }; |
||
| 994 | } // namespace polly |
||
| 995 | #endif |