Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===- IROutliner.h - Extract similar IR regions into functions --*- C++ -*-==// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // |
||
9 | // \file |
||
10 | // The interface file for the IROutliner which is used by the IROutliner Pass. |
||
11 | // |
||
12 | // The outliner uses the IRSimilarityIdentifier to identify the similar regions |
||
13 | // of code. It evaluates each set of IRSimilarityCandidates with an estimate of |
||
14 | // whether it will provide code size reduction. Each region is extracted using |
||
15 | // the code extractor. These extracted functions are consolidated into a single |
||
16 | // function and called from the extracted call site. |
||
17 | // |
||
18 | // For example: |
||
19 | // \code |
||
20 | // %1 = add i32 %a, %b |
||
21 | // %2 = add i32 %b, %a |
||
22 | // %3 = add i32 %b, %a |
||
23 | // %4 = add i32 %a, %b |
||
24 | // \endcode |
||
25 | // would become function |
||
26 | // \code |
||
27 | // define internal void outlined_ir_function(i32 %0, i32 %1) { |
||
28 | // %1 = add i32 %0, %1 |
||
29 | // %2 = add i32 %1, %0 |
||
30 | // ret void |
||
31 | // } |
||
32 | // \endcode |
||
33 | // with calls: |
||
34 | // \code |
||
35 | // call void outlined_ir_function(i32 %a, i32 %b) |
||
36 | // call void outlined_ir_function(i32 %b, i32 %a) |
||
37 | // \endcode |
||
38 | // |
||
39 | //===----------------------------------------------------------------------===// |
||
40 | |||
41 | #ifndef LLVM_TRANSFORMS_IPO_IROUTLINER_H |
||
42 | #define LLVM_TRANSFORMS_IPO_IROUTLINER_H |
||
43 | |||
44 | #include "llvm/Analysis/IRSimilarityIdentifier.h" |
||
45 | #include "llvm/IR/PassManager.h" |
||
46 | #include "llvm/Support/InstructionCost.h" |
||
47 | #include "llvm/Transforms/Utils/CodeExtractor.h" |
||
48 | |||
49 | struct OutlinableGroup; |
||
50 | |||
51 | namespace llvm { |
||
52 | using namespace CallingConv; |
||
53 | using namespace IRSimilarity; |
||
54 | |||
55 | class Module; |
||
56 | class TargetTransformInfo; |
||
57 | class OptimizationRemarkEmitter; |
||
58 | |||
59 | /// The OutlinableRegion holds all the information for a specific region, or |
||
60 | /// sequence of instructions. This includes what values need to be hoisted to |
||
61 | /// arguments from the extracted function, inputs and outputs to the region, and |
||
62 | /// mapping from the extracted function arguments to overall function arguments. |
||
63 | struct OutlinableRegion { |
||
64 | /// Describes the region of code. |
||
65 | IRSimilarityCandidate *Candidate = nullptr; |
||
66 | |||
67 | /// If this region is outlined, the front and back IRInstructionData could |
||
68 | /// potentially become invalidated if the only new instruction is a call. |
||
69 | /// This ensures that we replace in the instruction in the IRInstructionData. |
||
70 | IRInstructionData *NewFront = nullptr; |
||
71 | IRInstructionData *NewBack = nullptr; |
||
72 | |||
73 | /// The number of extracted inputs from the CodeExtractor. |
||
74 | unsigned NumExtractedInputs = 0; |
||
75 | |||
76 | /// The corresponding BasicBlock with the appropriate stores for this |
||
77 | /// OutlinableRegion in the overall function. |
||
78 | unsigned OutputBlockNum = -1; |
||
79 | |||
80 | /// Mapping the extracted argument number to the argument number in the |
||
81 | /// overall function. Since there will be inputs, such as elevated constants |
||
82 | /// that are not the same in each region in a SimilarityGroup, or values that |
||
83 | /// cannot be sunk into the extracted section in every region, we must keep |
||
84 | /// track of which extracted argument maps to which overall argument. |
||
85 | DenseMap<unsigned, unsigned> ExtractedArgToAgg; |
||
86 | DenseMap<unsigned, unsigned> AggArgToExtracted; |
||
87 | |||
88 | /// Values in the outlined functions will often be replaced by arguments. When |
||
89 | /// finding corresponding values from one region to another, the found value |
||
90 | /// will be the value the argument previously replaced. This structure maps |
||
91 | /// any replaced values for the region to the aggregate aggregate argument |
||
92 | /// in the overall function. |
||
93 | DenseMap<Value *, Value *> RemappedArguments; |
||
94 | |||
95 | /// Marks whether we need to change the order of the arguments when mapping |
||
96 | /// the old extracted function call to the new aggregate outlined function |
||
97 | /// call. |
||
98 | bool ChangedArgOrder = false; |
||
99 | |||
100 | /// Marks whether this region ends in a branch, there is special handling |
||
101 | /// required for the following basic blocks in this case. |
||
102 | bool EndsInBranch = false; |
||
103 | |||
104 | /// The PHIBlocks with their corresponding return block based on the return |
||
105 | /// value as the key. |
||
106 | DenseMap<Value *, BasicBlock *> PHIBlocks; |
||
107 | |||
108 | /// Mapping of the argument number in the deduplicated function |
||
109 | /// to a given constant, which is used when creating the arguments to the call |
||
110 | /// to the newly created deduplicated function. This is handled separately |
||
111 | /// since the CodeExtractor does not recognize constants. |
||
112 | DenseMap<unsigned, Constant *> AggArgToConstant; |
||
113 | |||
114 | /// The global value numbers that are used as outputs for this section. Once |
||
115 | /// extracted, each output will be stored to an output register. This |
||
116 | /// documents the global value numbers that are used in this pattern. |
||
117 | SmallVector<unsigned, 4> GVNStores; |
||
118 | |||
119 | /// Used to create an outlined function. |
||
120 | CodeExtractor *CE = nullptr; |
||
121 | |||
122 | /// The call site of the extracted region. |
||
123 | CallInst *Call = nullptr; |
||
124 | |||
125 | /// The function for the extracted region. |
||
126 | Function *ExtractedFunction = nullptr; |
||
127 | |||
128 | /// Flag for whether we have split out the IRSimilarityCanidate. That is, |
||
129 | /// make the region contained the IRSimilarityCandidate its own BasicBlock. |
||
130 | bool CandidateSplit = false; |
||
131 | |||
132 | /// Flag for whether we should not consider this region for extraction. |
||
133 | bool IgnoreRegion = false; |
||
134 | |||
135 | /// The BasicBlock that is before the start of the region BasicBlock, |
||
136 | /// only defined when the region has been split. |
||
137 | BasicBlock *PrevBB = nullptr; |
||
138 | |||
139 | /// The BasicBlock that contains the starting instruction of the region. |
||
140 | BasicBlock *StartBB = nullptr; |
||
141 | |||
142 | /// The BasicBlock that contains the ending instruction of the region. |
||
143 | BasicBlock *EndBB = nullptr; |
||
144 | |||
145 | /// The BasicBlock that is after the start of the region BasicBlock, |
||
146 | /// only defined when the region has been split. |
||
147 | BasicBlock *FollowBB = nullptr; |
||
148 | |||
149 | /// The Outlinable Group that contains this region and structurally similar |
||
150 | /// regions to this region. |
||
151 | OutlinableGroup *Parent = nullptr; |
||
152 | |||
153 | OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group) |
||
154 | : Candidate(&C), Parent(&Group) { |
||
155 | StartBB = C.getStartBB(); |
||
156 | EndBB = C.getEndBB(); |
||
157 | } |
||
158 | |||
159 | /// For the contained region, split the parent BasicBlock at the starting and |
||
160 | /// ending instructions of the contained IRSimilarityCandidate. |
||
161 | void splitCandidate(); |
||
162 | |||
163 | /// For the contained region, reattach the BasicBlock at the starting and |
||
164 | /// ending instructions of the contained IRSimilarityCandidate, or if the |
||
165 | /// function has been extracted, the start and end of the BasicBlock |
||
166 | /// containing the called function. |
||
167 | void reattachCandidate(); |
||
168 | |||
169 | /// Find a corresponding value for \p V in similar OutlinableRegion \p Other. |
||
170 | /// |
||
171 | /// \param Other [in] - The OutlinableRegion to find the corresponding Value |
||
172 | /// in. |
||
173 | /// \param V [in] - The Value to look for in the other region. |
||
174 | /// \return The corresponding Value to \p V if it exists, otherwise nullptr. |
||
175 | Value *findCorrespondingValueIn(const OutlinableRegion &Other, Value *V); |
||
176 | |||
177 | /// Find a corresponding BasicBlock for \p BB in similar OutlinableRegion \p Other. |
||
178 | /// |
||
179 | /// \param Other [in] - The OutlinableRegion to find the corresponding |
||
180 | /// BasicBlock in. |
||
181 | /// \param BB [in] - The BasicBlock to look for in the other region. |
||
182 | /// \return The corresponding Value to \p V if it exists, otherwise nullptr. |
||
183 | BasicBlock *findCorrespondingBlockIn(const OutlinableRegion &Other, |
||
184 | BasicBlock *BB); |
||
185 | |||
186 | /// Get the size of the code removed from the region. |
||
187 | /// |
||
188 | /// \param [in] TTI - The TargetTransformInfo for the parent function. |
||
189 | /// \returns the code size of the region |
||
190 | InstructionCost getBenefit(TargetTransformInfo &TTI); |
||
191 | }; |
||
192 | |||
193 | /// This class is a pass that identifies similarity in a Module, extracts |
||
194 | /// instances of the similarity, and then consolidating the similar regions |
||
195 | /// in an effort to reduce code size. It uses the IRSimilarityIdentifier pass |
||
196 | /// to identify the similar regions of code, and then extracts the similar |
||
197 | /// sections into a single function. See the above for an example as to |
||
198 | /// how code is extracted and consolidated into a single function. |
||
199 | class IROutliner { |
||
200 | public: |
||
201 | IROutliner(function_ref<TargetTransformInfo &(Function &)> GTTI, |
||
202 | function_ref<IRSimilarityIdentifier &(Module &)> GIRSI, |
||
203 | function_ref<OptimizationRemarkEmitter &(Function &)> GORE) |
||
204 | : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) { |
||
205 | |||
206 | // Check that the DenseMap implementation has not changed. |
||
207 | assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 && |
||
208 | "DenseMapInfo<unsigned>'s empty key isn't -1!"); |
||
209 | assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 && |
||
210 | "DenseMapInfo<unsigned>'s tombstone key isn't -2!"); |
||
211 | } |
||
212 | bool run(Module &M); |
||
213 | |||
214 | private: |
||
215 | /// Find repeated similar code sequences in \p M and outline them into new |
||
216 | /// Functions. |
||
217 | /// |
||
218 | /// \param [in] M - The module to outline from. |
||
219 | /// \returns The number of Functions created. |
||
220 | unsigned doOutline(Module &M); |
||
221 | |||
222 | /// Check whether an OutlinableRegion is incompatible with code already |
||
223 | /// outlined. OutlinableRegions are incomptaible when there are overlapping |
||
224 | /// instructions, or code that has not been recorded has been added to the |
||
225 | /// instructions. |
||
226 | /// |
||
227 | /// \param [in] Region - The OutlinableRegion to check for conflicts with |
||
228 | /// already outlined code. |
||
229 | /// \returns whether the region can safely be outlined. |
||
230 | bool isCompatibleWithAlreadyOutlinedCode(const OutlinableRegion &Region); |
||
231 | |||
232 | /// Remove all the IRSimilarityCandidates from \p CandidateVec that have |
||
233 | /// instructions contained in a previously outlined region and put the |
||
234 | /// remaining regions in \p CurrentGroup. |
||
235 | /// |
||
236 | /// \param [in] CandidateVec - List of similarity candidates for regions with |
||
237 | /// the same similarity structure. |
||
238 | /// \param [in,out] CurrentGroup - Contains the potential sections to |
||
239 | /// be outlined. |
||
240 | void |
||
241 | pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec, |
||
242 | OutlinableGroup &CurrentGroup); |
||
243 | |||
244 | /// Create the function based on the overall types found in the current |
||
245 | /// regions being outlined. |
||
246 | /// |
||
247 | /// \param M - The module to outline from. |
||
248 | /// \param [in,out] CG - The OutlinableGroup for the regions to be outlined. |
||
249 | /// \param [in] FunctionNameSuffix - How many functions have we previously |
||
250 | /// created. |
||
251 | /// \returns the newly created function. |
||
252 | Function *createFunction(Module &M, OutlinableGroup &CG, |
||
253 | unsigned FunctionNameSuffix); |
||
254 | |||
255 | /// Identify the needed extracted inputs in a section, and add to the overall |
||
256 | /// function if needed. |
||
257 | /// |
||
258 | /// \param [in] M - The module to outline from. |
||
259 | /// \param [in,out] Region - The region to be extracted. |
||
260 | /// \param [in] NotSame - The global value numbers of the Values in the region |
||
261 | /// that do not have the same Constant in each strucutrally similar region. |
||
262 | void findAddInputsOutputs(Module &M, OutlinableRegion &Region, |
||
263 | DenseSet<unsigned> &NotSame); |
||
264 | |||
265 | /// Find the number of instructions that will be removed by extracting the |
||
266 | /// OutlinableRegions in \p CurrentGroup. |
||
267 | /// |
||
268 | /// \param [in] CurrentGroup - The collection of OutlinableRegions to be |
||
269 | /// analyzed. |
||
270 | /// \returns the number of outlined instructions across all regions. |
||
271 | InstructionCost findBenefitFromAllRegions(OutlinableGroup &CurrentGroup); |
||
272 | |||
273 | /// Find the number of instructions that will be added by reloading arguments. |
||
274 | /// |
||
275 | /// \param [in] CurrentGroup - The collection of OutlinableRegions to be |
||
276 | /// analyzed. |
||
277 | /// \returns the number of added reload instructions across all regions. |
||
278 | InstructionCost findCostOutputReloads(OutlinableGroup &CurrentGroup); |
||
279 | |||
280 | /// Find the cost and the benefit of \p CurrentGroup and save it back to |
||
281 | /// \p CurrentGroup. |
||
282 | /// |
||
283 | /// \param [in] M - The module being analyzed |
||
284 | /// \param [in,out] CurrentGroup - The overall outlined section |
||
285 | void findCostBenefit(Module &M, OutlinableGroup &CurrentGroup); |
||
286 | |||
287 | /// Update the output mapping based on the load instruction, and the outputs |
||
288 | /// of the extracted function. |
||
289 | /// |
||
290 | /// \param Region - The region extracted |
||
291 | /// \param Outputs - The outputs from the extracted function. |
||
292 | /// \param LI - The load instruction used to update the mapping. |
||
293 | void updateOutputMapping(OutlinableRegion &Region, |
||
294 | ArrayRef<Value *> Outputs, LoadInst *LI); |
||
295 | |||
296 | /// Extract \p Region into its own function. |
||
297 | /// |
||
298 | /// \param [in] Region - The region to be extracted into its own function. |
||
299 | /// \returns True if it was successfully outlined. |
||
300 | bool extractSection(OutlinableRegion &Region); |
||
301 | |||
302 | /// For the similarities found, and the extracted sections, create a single |
||
303 | /// outlined function with appropriate output blocks as necessary. |
||
304 | /// |
||
305 | /// \param [in] M - The module to outline from |
||
306 | /// \param [in] CurrentGroup - The set of extracted sections to consolidate. |
||
307 | /// \param [in,out] FuncsToRemove - List of functions to remove from the |
||
308 | /// module after outlining is completed. |
||
309 | /// \param [in,out] OutlinedFunctionNum - the number of new outlined |
||
310 | /// functions. |
||
311 | void deduplicateExtractedSections(Module &M, OutlinableGroup &CurrentGroup, |
||
312 | std::vector<Function *> &FuncsToRemove, |
||
313 | unsigned &OutlinedFunctionNum); |
||
314 | |||
315 | /// If true, enables us to outline from functions that have LinkOnceFromODR |
||
316 | /// linkages. |
||
317 | bool OutlineFromLinkODRs = false; |
||
318 | |||
319 | /// If false, we do not worry if the cost is greater than the benefit. This |
||
320 | /// is for debugging and testing, so that we can test small cases to ensure |
||
321 | /// that the outlining is being done correctly. |
||
322 | bool CostModel = true; |
||
323 | |||
324 | /// The set of outlined Instructions, identified by their location in the |
||
325 | /// sequential ordering of instructions in a Module. |
||
326 | DenseSet<unsigned> Outlined; |
||
327 | |||
328 | /// TargetTransformInfo lambda for target specific information. |
||
329 | function_ref<TargetTransformInfo &(Function &)> getTTI; |
||
330 | |||
331 | /// A mapping from newly created reloaded output values to the original value. |
||
332 | /// If an value is replace by an output from an outlined region, this maps |
||
333 | /// that Value, back to its original Value. |
||
334 | DenseMap<Value *, Value *> OutputMappings; |
||
335 | |||
336 | /// IRSimilarityIdentifier lambda to retrieve IRSimilarityIdentifier. |
||
337 | function_ref<IRSimilarityIdentifier &(Module &)> getIRSI; |
||
338 | |||
339 | /// The optimization remark emitter for the pass. |
||
340 | function_ref<OptimizationRemarkEmitter &(Function &)> getORE; |
||
341 | |||
342 | /// The memory allocator used to allocate the CodeExtractors. |
||
343 | SpecificBumpPtrAllocator<CodeExtractor> ExtractorAllocator; |
||
344 | |||
345 | /// The memory allocator used to allocate the OutlinableRegions. |
||
346 | SpecificBumpPtrAllocator<OutlinableRegion> RegionAllocator; |
||
347 | |||
348 | /// The memory allocator used to allocate new IRInstructionData. |
||
349 | SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator; |
||
350 | |||
351 | /// Custom InstVisitor to classify different instructions for whether it can |
||
352 | /// be analyzed for similarity. This is needed as there may be instruction we |
||
353 | /// can identify as having similarity, but are more complicated to outline. |
||
354 | struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> { |
||
355 | InstructionAllowed() = default; |
||
356 | |||
357 | bool visitBranchInst(BranchInst &BI) { return EnableBranches; } |
||
358 | bool visitPHINode(PHINode &PN) { return EnableBranches; } |
||
359 | // TODO: Handle allocas. |
||
360 | bool visitAllocaInst(AllocaInst &AI) { return false; } |
||
361 | // VAArg instructions are not allowed since this could cause difficulty when |
||
362 | // differentiating between different sets of variable instructions in |
||
363 | // the deduplicated outlined regions. |
||
364 | bool visitVAArgInst(VAArgInst &VI) { return false; } |
||
365 | // We exclude all exception handling cases since they are so context |
||
366 | // dependent. |
||
367 | bool visitLandingPadInst(LandingPadInst &LPI) { return false; } |
||
368 | bool visitFuncletPadInst(FuncletPadInst &FPI) { return false; } |
||
369 | // DebugInfo should be included in the regions, but should not be |
||
370 | // analyzed for similarity as it has no bearing on the outcome of the |
||
371 | // program. |
||
372 | bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; } |
||
373 | // TODO: Handle specific intrinsics individually from those that can be |
||
374 | // handled. |
||
375 | bool IntrinsicInst(IntrinsicInst &II) { return EnableIntrinsics; } |
||
376 | // We only handle CallInsts that are not indirect, since we cannot guarantee |
||
377 | // that they have a name in these cases. |
||
378 | bool visitCallInst(CallInst &CI) { |
||
379 | Function *F = CI.getCalledFunction(); |
||
380 | bool IsIndirectCall = CI.isIndirectCall(); |
||
381 | if (IsIndirectCall && !EnableIndirectCalls) |
||
382 | return false; |
||
383 | if (!F && !IsIndirectCall) |
||
384 | return false; |
||
385 | // Returning twice can cause issues with the state of the function call |
||
386 | // that were not expected when the function was used, so we do not include |
||
387 | // the call in outlined functions. |
||
388 | if (CI.canReturnTwice()) |
||
389 | return false; |
||
390 | // TODO: Update the outliner to capture whether the outlined function |
||
391 | // needs these extra attributes. |
||
392 | |||
393 | // Functions marked with the swifttailcc and tailcc calling conventions |
||
394 | // require special handling when outlining musttail functions. The |
||
395 | // calling convention must be passed down to the outlined function as |
||
396 | // well. Further, there is special handling for musttail calls as well, |
||
397 | // requiring a return call directly after. For now, the outliner does not |
||
398 | // support this. |
||
399 | bool IsTailCC = CI.getCallingConv() == CallingConv::SwiftTail || |
||
400 | CI.getCallingConv() == CallingConv::Tail; |
||
401 | if (IsTailCC && !EnableMustTailCalls) |
||
402 | return false; |
||
403 | if (CI.isMustTailCall() && !EnableMustTailCalls) |
||
404 | return false; |
||
405 | // The outliner can only handle musttail items if it is also accompanied |
||
406 | // by the tailcc or swifttailcc calling convention. |
||
407 | if (CI.isMustTailCall() && !IsTailCC) |
||
408 | return false; |
||
409 | return true; |
||
410 | } |
||
411 | // TODO: Handle FreezeInsts. Since a frozen value could be frozen inside |
||
412 | // the outlined region, and then returned as an output, this will have to be |
||
413 | // handled differently. |
||
414 | bool visitFreezeInst(FreezeInst &CI) { return false; } |
||
415 | // TODO: We do not current handle similarity that changes the control flow. |
||
416 | bool visitInvokeInst(InvokeInst &II) { return false; } |
||
417 | // TODO: We do not current handle similarity that changes the control flow. |
||
418 | bool visitCallBrInst(CallBrInst &CBI) { return false; } |
||
419 | // TODO: Handle interblock similarity. |
||
420 | bool visitTerminator(Instruction &I) { return false; } |
||
421 | bool visitInstruction(Instruction &I) { return true; } |
||
422 | |||
423 | // The flag variable that marks whether we should allow branch instructions |
||
424 | // to be outlined. |
||
425 | bool EnableBranches = false; |
||
426 | |||
427 | // The flag variable that marks whether we should allow indirect calls |
||
428 | // to be outlined. |
||
429 | bool EnableIndirectCalls = true; |
||
430 | |||
431 | // The flag variable that marks whether we should allow intrinsics |
||
432 | // instructions to be outlined. |
||
433 | bool EnableIntrinsics = false; |
||
434 | |||
435 | // The flag variable that marks whether we should allow musttail calls. |
||
436 | bool EnableMustTailCalls = false; |
||
437 | }; |
||
438 | |||
439 | /// A InstVisitor used to exclude certain instructions from being outlined. |
||
440 | InstructionAllowed InstructionClassifier; |
||
441 | }; |
||
442 | |||
443 | /// Pass to outline similar regions. |
||
444 | class IROutlinerPass : public PassInfoMixin<IROutlinerPass> { |
||
445 | public: |
||
446 | PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); |
||
447 | }; |
||
448 | |||
449 | } // end namespace llvm |
||
450 | |||
451 | #endif // LLVM_TRANSFORMS_IPO_IROUTLINER_H |