Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===- NaryReassociate.h - Reassociate n-ary expressions --------*- C++ -*-===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // |
||
9 | // This pass reassociates n-ary add expressions and eliminates the redundancy |
||
10 | // exposed by the reassociation. |
||
11 | // |
||
12 | // A motivating example: |
||
13 | // |
||
14 | // void foo(int a, int b) { |
||
15 | // bar(a + b); |
||
16 | // bar((a + 2) + b); |
||
17 | // } |
||
18 | // |
||
19 | // An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify |
||
20 | // the above code to |
||
21 | // |
||
22 | // int t = a + b; |
||
23 | // bar(t); |
||
24 | // bar(t + 2); |
||
25 | // |
||
26 | // However, the Reassociate pass is unable to do that because it processes each |
||
27 | // instruction individually and believes (a + 2) + b is the best form according |
||
28 | // to its rank system. |
||
29 | // |
||
30 | // To address this limitation, NaryReassociate reassociates an expression in a |
||
31 | // form that reuses existing instructions. As a result, NaryReassociate can |
||
32 | // reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that |
||
33 | // (a + b) is computed before. |
||
34 | // |
||
35 | // NaryReassociate works as follows. For every instruction in the form of (a + |
||
36 | // b) + c, it checks whether a + c or b + c is already computed by a dominating |
||
37 | // instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b + |
||
38 | // c) + a and removes the redundancy accordingly. To efficiently look up whether |
||
39 | // an expression is computed before, we store each instruction seen and its SCEV |
||
40 | // into an SCEV-to-instruction map. |
||
41 | // |
||
42 | // Although the algorithm pattern-matches only ternary additions, it |
||
43 | // automatically handles many >3-ary expressions by walking through the function |
||
44 | // in the depth-first order. For example, given |
||
45 | // |
||
46 | // (a + c) + d |
||
47 | // ((a + b) + c) + d |
||
48 | // |
||
49 | // NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites |
||
50 | // ((a + c) + b) + d into ((a + c) + d) + b. |
||
51 | // |
||
52 | // Finally, the above dominator-based algorithm may need to be run multiple |
||
53 | // iterations before emitting optimal code. One source of this need is that we |
||
54 | // only split an operand when it is used only once. The above algorithm can |
||
55 | // eliminate an instruction and decrease the usage count of its operands. As a |
||
56 | // result, an instruction that previously had multiple uses may become a |
||
57 | // single-use instruction and thus eligible for split consideration. For |
||
58 | // example, |
||
59 | // |
||
60 | // ac = a + c |
||
61 | // ab = a + b |
||
62 | // abc = ab + c |
||
63 | // ab2 = ab + b |
||
64 | // ab2c = ab2 + c |
||
65 | // |
||
66 | // In the first iteration, we cannot reassociate abc to ac+b because ab is used |
||
67 | // twice. However, we can reassociate ab2c to abc+b in the first iteration. As a |
||
68 | // result, ab2 becomes dead and ab will be used only once in the second |
||
69 | // iteration. |
||
70 | // |
||
71 | // Limitations and TODO items: |
||
72 | // |
||
73 | // 1) We only considers n-ary adds and muls for now. This should be extended |
||
74 | // and generalized. |
||
75 | // |
||
76 | //===----------------------------------------------------------------------===// |
||
77 | |||
78 | #ifndef LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H |
||
79 | #define LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H |
||
80 | |||
81 | #include "llvm/ADT/DenseMap.h" |
||
82 | #include "llvm/ADT/SmallVector.h" |
||
83 | #include "llvm/IR/PassManager.h" |
||
84 | #include "llvm/IR/ValueHandle.h" |
||
85 | |||
86 | namespace llvm { |
||
87 | |||
88 | class AssumptionCache; |
||
89 | class BinaryOperator; |
||
90 | class DataLayout; |
||
91 | class DominatorTree; |
||
92 | class Function; |
||
93 | class GetElementPtrInst; |
||
94 | class Instruction; |
||
95 | class ScalarEvolution; |
||
96 | class SCEV; |
||
97 | class TargetLibraryInfo; |
||
98 | class TargetTransformInfo; |
||
99 | class Type; |
||
100 | class Value; |
||
101 | |||
102 | class NaryReassociatePass : public PassInfoMixin<NaryReassociatePass> { |
||
103 | public: |
||
104 | PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); |
||
105 | |||
106 | // Glue for old PM. |
||
107 | bool runImpl(Function &F, AssumptionCache *AC_, DominatorTree *DT_, |
||
108 | ScalarEvolution *SE_, TargetLibraryInfo *TLI_, |
||
109 | TargetTransformInfo *TTI_); |
||
110 | |||
111 | private: |
||
112 | // Runs only one iteration of the dominator-based algorithm. See the header |
||
113 | // comments for why we need multiple iterations. |
||
114 | bool doOneIteration(Function &F); |
||
115 | |||
116 | // Reassociates I for better CSE. |
||
117 | Instruction *tryReassociate(Instruction *I, const SCEV *&OrigSCEV); |
||
118 | |||
119 | // Reassociate GEP for better CSE. |
||
120 | Instruction *tryReassociateGEP(GetElementPtrInst *GEP); |
||
121 | |||
122 | // Try splitting GEP at the I-th index and see whether either part can be |
||
123 | // CSE'ed. This is a helper function for tryReassociateGEP. |
||
124 | // |
||
125 | // \p IndexedType The element type indexed by GEP's I-th index. This is |
||
126 | // equivalent to |
||
127 | // GEP->getIndexedType(GEP->getPointerOperand(), 0-th index, |
||
128 | // ..., i-th index). |
||
129 | GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, |
||
130 | unsigned I, Type *IndexedType); |
||
131 | |||
132 | // Given GEP's I-th index = LHS + RHS, see whether &Base[..][LHS][..] or |
||
133 | // &Base[..][RHS][..] can be CSE'ed and rewrite GEP accordingly. |
||
134 | GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, |
||
135 | unsigned I, Value *LHS, |
||
136 | Value *RHS, Type *IndexedType); |
||
137 | |||
138 | // Reassociate binary operators for better CSE. |
||
139 | Instruction *tryReassociateBinaryOp(BinaryOperator *I); |
||
140 | |||
141 | // A helper function for tryReassociateBinaryOp. LHS and RHS are explicitly |
||
142 | // passed. |
||
143 | Instruction *tryReassociateBinaryOp(Value *LHS, Value *RHS, |
||
144 | BinaryOperator *I); |
||
145 | // Rewrites I to (LHS op RHS) if LHS is computed already. |
||
146 | Instruction *tryReassociatedBinaryOp(const SCEV *LHS, Value *RHS, |
||
147 | BinaryOperator *I); |
||
148 | |||
149 | // Tries to match Op1 and Op2 by using V. |
||
150 | bool matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, Value *&Op2); |
||
151 | |||
152 | // Gets SCEV for (LHS op RHS). |
||
153 | const SCEV *getBinarySCEV(BinaryOperator *I, const SCEV *LHS, |
||
154 | const SCEV *RHS); |
||
155 | |||
156 | // Returns the closest dominator of \c Dominatee that computes |
||
157 | // \c CandidateExpr. Returns null if not found. |
||
158 | Instruction *findClosestMatchingDominator(const SCEV *CandidateExpr, |
||
159 | Instruction *Dominatee); |
||
160 | |||
161 | // Try to match \p I as signed/unsigned Min/Max and reassociate it. \p |
||
162 | // OrigSCEV is set if \I matches Min/Max regardless whether resassociation is |
||
163 | // done or not. If reassociation was successful newly generated instruction is |
||
164 | // returned, otherwise nullptr. |
||
165 | template <typename PredT> |
||
166 | Instruction *matchAndReassociateMinOrMax(Instruction *I, |
||
167 | const SCEV *&OrigSCEV); |
||
168 | |||
169 | // Reassociate Min/Max. |
||
170 | template <typename MaxMinT> |
||
171 | Value *tryReassociateMinOrMax(Instruction *I, MaxMinT MaxMinMatch, Value *LHS, |
||
172 | Value *RHS); |
||
173 | |||
174 | // GetElementPtrInst implicitly sign-extends an index if the index is shorter |
||
175 | // than the pointer size. This function returns whether Index is shorter than |
||
176 | // GEP's pointer size, i.e., whether Index needs to be sign-extended in order |
||
177 | // to be an index of GEP. |
||
178 | bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP); |
||
179 | |||
180 | AssumptionCache *AC; |
||
181 | const DataLayout *DL; |
||
182 | DominatorTree *DT; |
||
183 | ScalarEvolution *SE; |
||
184 | TargetLibraryInfo *TLI; |
||
185 | TargetTransformInfo *TTI; |
||
186 | |||
187 | // A lookup table quickly telling which instructions compute the given SCEV. |
||
188 | // Note that there can be multiple instructions at different locations |
||
189 | // computing to the same SCEV, so we map a SCEV to an instruction list. For |
||
190 | // example, |
||
191 | // |
||
192 | // if (p1) |
||
193 | // foo(a + b); |
||
194 | // if (p2) |
||
195 | // bar(a + b); |
||
196 | DenseMap<const SCEV *, SmallVector<WeakTrackingVH, 2>> SeenExprs; |
||
197 | }; |
||
198 | |||
199 | } // end namespace llvm |
||
200 | |||
201 | #endif // LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H |