Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line | 
|---|---|---|---|
| 14 | pmbaty | 1 | //===- NaryReassociate.h - Reassociate n-ary expressions --------*- C++ -*-===// | 
| 2 | // | ||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | ||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| 6 | // | ||
| 7 | //===----------------------------------------------------------------------===// | ||
| 8 | // | ||
| 9 | // This pass reassociates n-ary add expressions and eliminates the redundancy | ||
| 10 | // exposed by the reassociation. | ||
| 11 | // | ||
| 12 | // A motivating example: | ||
| 13 | // | ||
| 14 | //   void foo(int a, int b) { | ||
| 15 | //     bar(a + b); | ||
| 16 | //     bar((a + 2) + b); | ||
| 17 | //   } | ||
| 18 | // | ||
| 19 | // An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify | ||
| 20 | // the above code to | ||
| 21 | // | ||
| 22 | //   int t = a + b; | ||
| 23 | //   bar(t); | ||
| 24 | //   bar(t + 2); | ||
| 25 | // | ||
| 26 | // However, the Reassociate pass is unable to do that because it processes each | ||
| 27 | // instruction individually and believes (a + 2) + b is the best form according | ||
| 28 | // to its rank system. | ||
| 29 | // | ||
| 30 | // To address this limitation, NaryReassociate reassociates an expression in a | ||
| 31 | // form that reuses existing instructions. As a result, NaryReassociate can | ||
| 32 | // reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that | ||
| 33 | // (a + b) is computed before. | ||
| 34 | // | ||
| 35 | // NaryReassociate works as follows. For every instruction in the form of (a + | ||
| 36 | // b) + c, it checks whether a + c or b + c is already computed by a dominating | ||
| 37 | // instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b + | ||
| 38 | // c) + a and removes the redundancy accordingly. To efficiently look up whether | ||
| 39 | // an expression is computed before, we store each instruction seen and its SCEV | ||
| 40 | // into an SCEV-to-instruction map. | ||
| 41 | // | ||
| 42 | // Although the algorithm pattern-matches only ternary additions, it | ||
| 43 | // automatically handles many >3-ary expressions by walking through the function | ||
| 44 | // in the depth-first order. For example, given | ||
| 45 | // | ||
| 46 | //   (a + c) + d | ||
| 47 | //   ((a + b) + c) + d | ||
| 48 | // | ||
| 49 | // NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites | ||
| 50 | // ((a + c) + b) + d into ((a + c) + d) + b. | ||
| 51 | // | ||
| 52 | // Finally, the above dominator-based algorithm may need to be run multiple | ||
| 53 | // iterations before emitting optimal code. One source of this need is that we | ||
| 54 | // only split an operand when it is used only once. The above algorithm can | ||
| 55 | // eliminate an instruction and decrease the usage count of its operands. As a | ||
| 56 | // result, an instruction that previously had multiple uses may become a | ||
| 57 | // single-use instruction and thus eligible for split consideration. For | ||
| 58 | // example, | ||
| 59 | // | ||
| 60 | //   ac = a + c | ||
| 61 | //   ab = a + b | ||
| 62 | //   abc = ab + c | ||
| 63 | //   ab2 = ab + b | ||
| 64 | //   ab2c = ab2 + c | ||
| 65 | // | ||
| 66 | // In the first iteration, we cannot reassociate abc to ac+b because ab is used | ||
| 67 | // twice. However, we can reassociate ab2c to abc+b in the first iteration. As a | ||
| 68 | // result, ab2 becomes dead and ab will be used only once in the second | ||
| 69 | // iteration. | ||
| 70 | // | ||
| 71 | // Limitations and TODO items: | ||
| 72 | // | ||
| 73 | // 1) We only considers n-ary adds and muls for now. This should be extended | ||
| 74 | // and generalized. | ||
| 75 | // | ||
| 76 | //===----------------------------------------------------------------------===// | ||
| 77 | |||
| 78 | #ifndef LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H | ||
| 79 | #define LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H | ||
| 80 | |||
| 81 | #include "llvm/ADT/DenseMap.h" | ||
| 82 | #include "llvm/ADT/SmallVector.h" | ||
| 83 | #include "llvm/IR/PassManager.h" | ||
| 84 | #include "llvm/IR/ValueHandle.h" | ||
| 85 | |||
| 86 | namespace llvm { | ||
| 87 | |||
| 88 | class AssumptionCache; | ||
| 89 | class BinaryOperator; | ||
| 90 | class DataLayout; | ||
| 91 | class DominatorTree; | ||
| 92 | class Function; | ||
| 93 | class GetElementPtrInst; | ||
| 94 | class Instruction; | ||
| 95 | class ScalarEvolution; | ||
| 96 | class SCEV; | ||
| 97 | class TargetLibraryInfo; | ||
| 98 | class TargetTransformInfo; | ||
| 99 | class Type; | ||
| 100 | class Value; | ||
| 101 | |||
| 102 | class NaryReassociatePass : public PassInfoMixin<NaryReassociatePass> { | ||
| 103 | public: | ||
| 104 | PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); | ||
| 105 | |||
| 106 |   // Glue for old PM. | ||
| 107 | bool runImpl(Function &F, AssumptionCache *AC_, DominatorTree *DT_, | ||
| 108 | ScalarEvolution *SE_, TargetLibraryInfo *TLI_, | ||
| 109 | TargetTransformInfo *TTI_); | ||
| 110 | |||
| 111 | private: | ||
| 112 |   // Runs only one iteration of the dominator-based algorithm. See the header | ||
| 113 |   // comments for why we need multiple iterations. | ||
| 114 | bool doOneIteration(Function &F); | ||
| 115 | |||
| 116 |   // Reassociates I for better CSE. | ||
| 117 | Instruction *tryReassociate(Instruction *I, const SCEV *&OrigSCEV); | ||
| 118 | |||
| 119 |   // Reassociate GEP for better CSE. | ||
| 120 | Instruction *tryReassociateGEP(GetElementPtrInst *GEP); | ||
| 121 | |||
| 122 |   // Try splitting GEP at the I-th index and see whether either part can be | ||
| 123 |   // CSE'ed. This is a helper function for tryReassociateGEP. | ||
| 124 |   // | ||
| 125 |   // \p IndexedType The element type indexed by GEP's I-th index. This is | ||
| 126 |   //                equivalent to | ||
| 127 |   //                  GEP->getIndexedType(GEP->getPointerOperand(), 0-th index, | ||
| 128 |   //                                      ..., i-th index). | ||
| 129 | GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, | ||
| 130 | unsigned I, Type *IndexedType); | ||
| 131 | |||
| 132 |   // Given GEP's I-th index = LHS + RHS, see whether &Base[..][LHS][..] or | ||
| 133 |   // &Base[..][RHS][..] can be CSE'ed and rewrite GEP accordingly. | ||
| 134 | GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, | ||
| 135 | unsigned I, Value *LHS, | ||
| 136 | Value *RHS, Type *IndexedType); | ||
| 137 | |||
| 138 |   // Reassociate binary operators for better CSE. | ||
| 139 | Instruction *tryReassociateBinaryOp(BinaryOperator *I); | ||
| 140 | |||
| 141 |   // A helper function for tryReassociateBinaryOp. LHS and RHS are explicitly | ||
| 142 |   // passed. | ||
| 143 | Instruction *tryReassociateBinaryOp(Value *LHS, Value *RHS, | ||
| 144 | BinaryOperator *I); | ||
| 145 |   // Rewrites I to (LHS op RHS) if LHS is computed already. | ||
| 146 | Instruction *tryReassociatedBinaryOp(const SCEV *LHS, Value *RHS, | ||
| 147 | BinaryOperator *I); | ||
| 148 | |||
| 149 |   // Tries to match Op1 and Op2 by using V. | ||
| 150 | bool matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, Value *&Op2); | ||
| 151 | |||
| 152 |   // Gets SCEV for (LHS op RHS). | ||
| 153 | const SCEV *getBinarySCEV(BinaryOperator *I, const SCEV *LHS, | ||
| 154 | const SCEV *RHS); | ||
| 155 | |||
| 156 |   // Returns the closest dominator of \c Dominatee that computes | ||
| 157 |   // \c CandidateExpr. Returns null if not found. | ||
| 158 | Instruction *findClosestMatchingDominator(const SCEV *CandidateExpr, | ||
| 159 | Instruction *Dominatee); | ||
| 160 | |||
| 161 |   // Try to match \p I as signed/unsigned Min/Max and reassociate it. \p | ||
| 162 |   // OrigSCEV is set if \I matches Min/Max regardless whether resassociation is | ||
| 163 |   // done or not. If reassociation was successful newly generated instruction is | ||
| 164 |   // returned, otherwise nullptr. | ||
| 165 | template <typename PredT> | ||
| 166 | Instruction *matchAndReassociateMinOrMax(Instruction *I, | ||
| 167 | const SCEV *&OrigSCEV); | ||
| 168 | |||
| 169 |   // Reassociate Min/Max. | ||
| 170 | template <typename MaxMinT> | ||
| 171 | Value *tryReassociateMinOrMax(Instruction *I, MaxMinT MaxMinMatch, Value *LHS, | ||
| 172 | Value *RHS); | ||
| 173 | |||
| 174 |   // GetElementPtrInst implicitly sign-extends an index if the index is shorter | ||
| 175 |   // than the pointer size. This function returns whether Index is shorter than | ||
| 176 |   // GEP's pointer size, i.e., whether Index needs to be sign-extended in order | ||
| 177 |   // to be an index of GEP. | ||
| 178 | bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP); | ||
| 179 | |||
| 180 | AssumptionCache *AC; | ||
| 181 | const DataLayout *DL; | ||
| 182 | DominatorTree *DT; | ||
| 183 | ScalarEvolution *SE; | ||
| 184 | TargetLibraryInfo *TLI; | ||
| 185 | TargetTransformInfo *TTI; | ||
| 186 | |||
| 187 |   // A lookup table quickly telling which instructions compute the given SCEV. | ||
| 188 |   // Note that there can be multiple instructions at different locations | ||
| 189 |   // computing to the same SCEV, so we map a SCEV to an instruction list.  For | ||
| 190 |   // example, | ||
| 191 |   // | ||
| 192 |   //   if (p1) | ||
| 193 |   //     foo(a + b); | ||
| 194 |   //   if (p2) | ||
| 195 |   //     bar(a + b); | ||
| 196 | DenseMap<const SCEV *, SmallVector<WeakTrackingVH, 2>> SeenExprs; | ||
| 197 | }; | ||
| 198 | |||
| 199 | } // end namespace llvm | ||
| 200 | |||
| 201 | #endif // LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H |