Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===- IRSimilarityIdentifier.h - Find similarity in a module --------------==// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // |
||
9 | // \file |
||
10 | // Interface file for the IRSimilarityIdentifier for identifying similarities in |
||
11 | // IR including the IRInstructionMapper, which maps an Instruction to unsigned |
||
12 | // integers. |
||
13 | // |
||
14 | // Two sequences of instructions are called "similar" if they perform the same |
||
15 | // series of operations for all inputs. |
||
16 | // |
||
17 | // \code |
||
18 | // %1 = add i32 %a, 10 |
||
19 | // %2 = add i32 %a, %1 |
||
20 | // %3 = icmp slt icmp %1, %2 |
||
21 | // \endcode |
||
22 | // |
||
23 | // and |
||
24 | // |
||
25 | // \code |
||
26 | // %1 = add i32 11, %a |
||
27 | // %2 = sub i32 %a, %1 |
||
28 | // %3 = icmp sgt icmp %2, %1 |
||
29 | // \endcode |
||
30 | // |
||
31 | // ultimately have the same result, even if the inputs, and structure are |
||
32 | // slightly different. |
||
33 | // |
||
34 | // For instructions, we do not worry about operands that do not have fixed |
||
35 | // semantic meaning to the program. We consider the opcode that the instruction |
||
36 | // has, the types, parameters, and extra information such as the function name, |
||
37 | // or comparison predicate. These are used to create a hash to map instructions |
||
38 | // to integers to be used in similarity matching in sequences of instructions |
||
39 | // |
||
40 | // Terminology: |
||
41 | // An IRSimilarityCandidate is a region of IRInstructionData (wrapped |
||
42 | // Instructions), usually used to denote a region of similarity has been found. |
||
43 | // |
||
44 | // A SimilarityGroup is a set of IRSimilarityCandidates that are structurally |
||
45 | // similar to one another. |
||
46 | // |
||
47 | //===----------------------------------------------------------------------===// |
||
48 | |||
49 | #ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H |
||
50 | #define LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H |
||
51 | |||
52 | #include "llvm/IR/InstVisitor.h" |
||
53 | #include "llvm/IR/Instructions.h" |
||
54 | #include "llvm/IR/PassManager.h" |
||
55 | #include "llvm/Pass.h" |
||
56 | #include "llvm/Support/Allocator.h" |
||
57 | #include <optional> |
||
58 | |||
59 | namespace llvm { |
||
60 | class Module; |
||
61 | |||
62 | namespace IRSimilarity { |
||
63 | |||
64 | struct IRInstructionDataList; |
||
65 | |||
66 | /// This represents what is and is not supported when finding similarity in |
||
67 | /// Instructions. |
||
68 | /// |
||
69 | /// Legal Instructions are considered when looking at similarity between |
||
70 | /// Instructions. |
||
71 | /// |
||
72 | /// Illegal Instructions cannot be considered when looking for similarity |
||
73 | /// between Instructions. They act as boundaries between similarity regions. |
||
74 | /// |
||
75 | /// Invisible Instructions are skipped over during analysis. |
||
76 | // TODO: Shared with MachineOutliner |
||
77 | enum InstrType { Legal, Illegal, Invisible }; |
||
78 | |||
79 | /// This provides the utilities for hashing an Instruction to an unsigned |
||
80 | /// integer. Two IRInstructionDatas produce the same hash value when their |
||
81 | /// underlying Instructions perform the same operation (even if they don't have |
||
82 | /// the same input operands.) |
||
83 | /// As a more concrete example, consider the following: |
||
84 | /// |
||
85 | /// \code |
||
86 | /// %add1 = add i32 %a, %b |
||
87 | /// %add2 = add i32 %c, %d |
||
88 | /// %add3 = add i64 %e, %f |
||
89 | /// \endcode |
||
90 | /// |
||
91 | // Then the IRInstructionData wrappers for these Instructions may be hashed like |
||
92 | /// so: |
||
93 | /// |
||
94 | /// \code |
||
95 | /// ; These two adds have the same types and operand types, so they hash to the |
||
96 | /// ; same number. |
||
97 | /// %add1 = add i32 %a, %b ; Hash: 1 |
||
98 | /// %add2 = add i32 %c, %d ; Hash: 1 |
||
99 | /// ; This add produces an i64. This differentiates it from %add1 and %add2. So, |
||
100 | /// ; it hashes to a different number. |
||
101 | /// %add3 = add i64 %e, %f; Hash: 2 |
||
102 | /// \endcode |
||
103 | /// |
||
104 | /// |
||
105 | /// This hashing scheme will be used to represent the program as a very long |
||
106 | /// string. This string can then be placed in a data structure which can be used |
||
107 | /// for similarity queries. |
||
108 | /// |
||
109 | /// TODO: Handle types of Instructions which can be equal even with different |
||
110 | /// operands. (E.g. comparisons with swapped predicates.) |
||
111 | /// TODO: Handle CallInsts, which are only checked for function type |
||
112 | /// by \ref isSameOperationAs. |
||
113 | /// TODO: Handle GetElementPtrInsts, as some of the operands have to be the |
||
114 | /// exact same, and some do not. |
||
115 | struct IRInstructionData |
||
116 | : ilist_node<IRInstructionData, ilist_sentinel_tracking<true>> { |
||
117 | |||
118 | /// The source Instruction that is being wrapped. |
||
119 | Instruction *Inst = nullptr; |
||
120 | /// The values of the operands in the Instruction. |
||
121 | SmallVector<Value *, 4> OperVals; |
||
122 | /// The legality of the wrapped instruction. This is informed by InstrType, |
||
123 | /// and is used when checking when two instructions are considered similar. |
||
124 | /// If either instruction is not legal, the instructions are automatically not |
||
125 | /// considered similar. |
||
126 | bool Legal = false; |
||
127 | |||
128 | /// This is only relevant if we are wrapping a CmpInst where we needed to |
||
129 | /// change the predicate of a compare instruction from a greater than form |
||
130 | /// to a less than form. It is None otherwise. |
||
131 | std::optional<CmpInst::Predicate> RevisedPredicate; |
||
132 | |||
133 | /// This is only relevant if we are wrapping a CallInst. If we are requiring |
||
134 | /// that the function calls have matching names as well as types, and the |
||
135 | /// call is not an indirect call, this will hold the name of the function. If |
||
136 | /// it is an indirect string, it will be the empty string. However, if this |
||
137 | /// requirement is not in place it will be the empty string regardless of the |
||
138 | /// function call type. The value held here is used to create the hash of the |
||
139 | /// instruction, and check to make sure two instructions are close to one |
||
140 | /// another. |
||
141 | std::optional<std::string> CalleeName; |
||
142 | |||
143 | /// This structure holds the distances of how far "ahead of" or "behind" the |
||
144 | /// target blocks of a branch, or the incoming blocks of a phi nodes are. |
||
145 | /// If the value is negative, it means that the block was registered before |
||
146 | /// the block of this instruction in terms of blocks in the function. |
||
147 | /// Code Example: |
||
148 | /// \code |
||
149 | /// block_1: |
||
150 | /// br i1 %0, label %block_2, label %block_3 |
||
151 | /// block_2: |
||
152 | /// br i1 %1, label %block_1, label %block_2 |
||
153 | /// block_3: |
||
154 | /// br i1 %2, label %block_2, label %block_1 |
||
155 | /// ; Replacing the labels with relative values, this becomes: |
||
156 | /// block_1: |
||
157 | /// br i1 %0, distance 1, distance 2 |
||
158 | /// block_2: |
||
159 | /// br i1 %1, distance -1, distance 0 |
||
160 | /// block_3: |
||
161 | /// br i1 %2, distance -1, distance -2 |
||
162 | /// \endcode |
||
163 | /// Taking block_2 as our example, block_1 is "behind" block_2, and block_2 is |
||
164 | /// "ahead" of block_2. |
||
165 | SmallVector<int, 4> RelativeBlockLocations; |
||
166 | |||
167 | /// Gather the information that is difficult to gather for an Instruction, or |
||
168 | /// is changed. i.e. the operands of an Instruction and the Types of those |
||
169 | /// operands. This extra information allows for similarity matching to make |
||
170 | /// assertions that allow for more flexibility when checking for whether an |
||
171 | /// Instruction performs the same operation. |
||
172 | IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL); |
||
173 | IRInstructionData(IRInstructionDataList &IDL); |
||
174 | |||
175 | /// Fills data stuctures for IRInstructionData when it is constructed from a |
||
176 | // reference or a pointer. |
||
177 | void initializeInstruction(); |
||
178 | |||
179 | /// Get the predicate that the compare instruction is using for hashing the |
||
180 | /// instruction. the IRInstructionData must be wrapping a CmpInst. |
||
181 | CmpInst::Predicate getPredicate() const; |
||
182 | |||
183 | /// Get the callee name that the call instruction is using for hashing the |
||
184 | /// instruction. The IRInstructionData must be wrapping a CallInst. |
||
185 | StringRef getCalleeName() const; |
||
186 | |||
187 | /// A function that swaps the predicates to their less than form if they are |
||
188 | /// in a greater than form. Otherwise, the predicate is unchanged. |
||
189 | /// |
||
190 | /// \param CI - The comparison operation to find a consistent preidcate for. |
||
191 | /// \return the consistent comparison predicate. |
||
192 | static CmpInst::Predicate predicateForConsistency(CmpInst *CI); |
||
193 | |||
194 | /// For an IRInstructionData containing a branch, finds the |
||
195 | /// relative distances from the source basic block to the target by taking |
||
196 | /// the difference of the number assigned to the current basic block and the |
||
197 | /// target basic block of the branch. |
||
198 | /// |
||
199 | /// \param BasicBlockToInteger - The mapping of basic blocks to their location |
||
200 | /// in the module. |
||
201 | void |
||
202 | setBranchSuccessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger); |
||
203 | |||
204 | /// For an IRInstructionData containing a CallInst, set the function name |
||
205 | /// appropriately. This will be an empty string if it is an indirect call, |
||
206 | /// or we are not matching by name of the called function. It will be the |
||
207 | /// name of the function if \p MatchByName is true and it is not an indirect |
||
208 | /// call. We may decide not to match by name in order to expand the |
||
209 | /// size of the regions we can match. If a function name has the same type |
||
210 | /// signature, but the different name, the region of code is still almost the |
||
211 | /// same. Since function names can be treated as constants, the name itself |
||
212 | /// could be extrapolated away. However, matching by name provides a |
||
213 | /// specificity and more "identical" code than not matching by name. |
||
214 | /// |
||
215 | /// \param MatchByName - A flag to mark whether we are using the called |
||
216 | /// function name as a differentiating parameter. |
||
217 | void setCalleeName(bool MatchByName = true); |
||
218 | |||
219 | /// For an IRInstructionData containing a PHINode, finds the |
||
220 | /// relative distances from the incoming basic block to the current block by |
||
221 | /// taking the difference of the number assigned to the current basic block |
||
222 | /// and the incoming basic block of the branch. |
||
223 | /// |
||
224 | /// \param BasicBlockToInteger - The mapping of basic blocks to their location |
||
225 | /// in the module. |
||
226 | void |
||
227 | setPHIPredecessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger); |
||
228 | |||
229 | /// Hashes \p Value based on its opcode, types, and operand types. |
||
230 | /// Two IRInstructionData instances produce the same hash when they perform |
||
231 | /// the same operation. |
||
232 | /// |
||
233 | /// As a simple example, consider the following instructions. |
||
234 | /// |
||
235 | /// \code |
||
236 | /// %add1 = add i32 %x1, %y1 |
||
237 | /// %add2 = add i32 %x2, %y2 |
||
238 | /// |
||
239 | /// %sub = sub i32 %x1, %y1 |
||
240 | /// |
||
241 | /// %add_i64 = add i64 %x2, %y2 |
||
242 | /// \endcode |
||
243 | /// |
||
244 | /// Because the first two adds operate the same types, and are performing the |
||
245 | /// same action, they will be hashed to the same value. |
||
246 | /// |
||
247 | /// However, the subtraction instruction is not the same as an addition, and |
||
248 | /// will be hashed to a different value. |
||
249 | /// |
||
250 | /// Finally, the last add has a different type compared to the first two add |
||
251 | /// instructions, so it will also be hashed to a different value that any of |
||
252 | /// the previous instructions. |
||
253 | /// |
||
254 | /// \param [in] ID - The IRInstructionData instance to be hashed. |
||
255 | /// \returns A hash_value of the IRInstructionData. |
||
256 | friend hash_code hash_value(const IRInstructionData &ID) { |
||
257 | SmallVector<Type *, 4> OperTypes; |
||
258 | for (Value *V : ID.OperVals) |
||
259 | OperTypes.push_back(V->getType()); |
||
260 | |||
261 | if (isa<CmpInst>(ID.Inst)) |
||
262 | return llvm::hash_combine( |
||
263 | llvm::hash_value(ID.Inst->getOpcode()), |
||
264 | llvm::hash_value(ID.Inst->getType()), |
||
265 | llvm::hash_value(ID.getPredicate()), |
||
266 | llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); |
||
267 | |||
268 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(ID.Inst)) { |
||
269 | // To hash intrinsics, we use the opcode, and types like the other |
||
270 | // instructions, but also, the Intrinsic ID, and the Name of the |
||
271 | // intrinsic. |
||
272 | Intrinsic::ID IntrinsicID = II->getIntrinsicID(); |
||
273 | return llvm::hash_combine( |
||
274 | llvm::hash_value(ID.Inst->getOpcode()), |
||
275 | llvm::hash_value(ID.Inst->getType()), llvm::hash_value(IntrinsicID), |
||
276 | llvm::hash_value(*ID.CalleeName), |
||
277 | llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); |
||
278 | } |
||
279 | |||
280 | if (isa<CallInst>(ID.Inst)) { |
||
281 | std::string FunctionName = *ID.CalleeName; |
||
282 | return llvm::hash_combine( |
||
283 | llvm::hash_value(ID.Inst->getOpcode()), |
||
284 | llvm::hash_value(ID.Inst->getType()), |
||
285 | llvm::hash_value(ID.Inst->getType()), llvm::hash_value(FunctionName), |
||
286 | llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); |
||
287 | } |
||
288 | |||
289 | return llvm::hash_combine( |
||
290 | llvm::hash_value(ID.Inst->getOpcode()), |
||
291 | llvm::hash_value(ID.Inst->getType()), |
||
292 | llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); |
||
293 | } |
||
294 | |||
295 | IRInstructionDataList *IDL = nullptr; |
||
296 | }; |
||
297 | |||
298 | struct IRInstructionDataList |
||
299 | : simple_ilist<IRInstructionData, ilist_sentinel_tracking<true>> {}; |
||
300 | |||
301 | /// Compare one IRInstructionData class to another IRInstructionData class for |
||
302 | /// whether they are performing a the same operation, and can mapped to the |
||
303 | /// same value. For regular instructions if the hash value is the same, then |
||
304 | /// they will also be close. |
||
305 | /// |
||
306 | /// \param A - The first IRInstructionData class to compare |
||
307 | /// \param B - The second IRInstructionData class to compare |
||
308 | /// \returns true if \p A and \p B are similar enough to be mapped to the same |
||
309 | /// value. |
||
310 | bool isClose(const IRInstructionData &A, const IRInstructionData &B); |
||
311 | |||
312 | struct IRInstructionDataTraits : DenseMapInfo<IRInstructionData *> { |
||
313 | static inline IRInstructionData *getEmptyKey() { return nullptr; } |
||
314 | static inline IRInstructionData *getTombstoneKey() { |
||
315 | return reinterpret_cast<IRInstructionData *>(-1); |
||
316 | } |
||
317 | |||
318 | static unsigned getHashValue(const IRInstructionData *E) { |
||
319 | using llvm::hash_value; |
||
320 | assert(E && "IRInstructionData is a nullptr?"); |
||
321 | return hash_value(*E); |
||
322 | } |
||
323 | |||
324 | static bool isEqual(const IRInstructionData *LHS, |
||
325 | const IRInstructionData *RHS) { |
||
326 | if (RHS == getEmptyKey() || RHS == getTombstoneKey() || |
||
327 | LHS == getEmptyKey() || LHS == getTombstoneKey()) |
||
328 | return LHS == RHS; |
||
329 | |||
330 | assert(LHS && RHS && "nullptr should have been caught by getEmptyKey?"); |
||
331 | return isClose(*LHS, *RHS); |
||
332 | } |
||
333 | }; |
||
334 | |||
335 | /// Helper struct for converting the Instructions in a Module into a vector of |
||
336 | /// unsigned integers. This vector of unsigned integers can be thought of as a |
||
337 | /// "numeric string". This numeric string can then be queried by, for example, |
||
338 | /// data structures that find repeated substrings. |
||
339 | /// |
||
340 | /// This hashing is done per BasicBlock in the module. To hash Instructions |
||
341 | /// based off of their operations, each Instruction is wrapped in an |
||
342 | /// IRInstructionData struct. The unsigned integer for an IRInstructionData |
||
343 | /// depends on: |
||
344 | /// - The hash provided by the IRInstructionData. |
||
345 | /// - Which member of InstrType the IRInstructionData is classified as. |
||
346 | // See InstrType for more details on the possible classifications, and how they |
||
347 | // manifest in the numeric string. |
||
348 | /// |
||
349 | /// The numeric string for an individual BasicBlock is terminated by an unique |
||
350 | /// unsigned integer. This prevents data structures which rely on repetition |
||
351 | /// from matching across BasicBlocks. (For example, the SuffixTree.) |
||
352 | /// As a concrete example, if we have the following two BasicBlocks: |
||
353 | /// \code |
||
354 | /// bb0: |
||
355 | /// %add1 = add i32 %a, %b |
||
356 | /// %add2 = add i32 %c, %d |
||
357 | /// %add3 = add i64 %e, %f |
||
358 | /// bb1: |
||
359 | /// %sub = sub i32 %c, %d |
||
360 | /// \endcode |
||
361 | /// We may hash the Instructions like this (via IRInstructionData): |
||
362 | /// \code |
||
363 | /// bb0: |
||
364 | /// %add1 = add i32 %a, %b ; Hash: 1 |
||
365 | /// %add2 = add i32 %c, %d; Hash: 1 |
||
366 | /// %add3 = add i64 %e, %f; Hash: 2 |
||
367 | /// bb1: |
||
368 | /// %sub = sub i32 %c, %d; Hash: 3 |
||
369 | /// %add4 = add i32 %c, %d ; Hash: 1 |
||
370 | /// \endcode |
||
371 | /// And produce a "numeric string representation" like so: |
||
372 | /// 1, 1, 2, unique_integer_1, 3, 1, unique_integer_2 |
||
373 | /// |
||
374 | /// TODO: This is very similar to the MachineOutliner, and should be |
||
375 | /// consolidated into the same interface. |
||
376 | struct IRInstructionMapper { |
||
377 | /// The starting illegal instruction number to map to. |
||
378 | /// |
||
379 | /// Set to -3 for compatibility with DenseMapInfo<unsigned>. |
||
380 | unsigned IllegalInstrNumber = static_cast<unsigned>(-3); |
||
381 | |||
382 | /// The next available integer to assign to a legal Instruction to. |
||
383 | unsigned LegalInstrNumber = 0; |
||
384 | |||
385 | /// Correspondence from IRInstructionData to unsigned integers. |
||
386 | DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits> |
||
387 | InstructionIntegerMap; |
||
388 | |||
389 | /// A mapping for a basic block in a module to its assigned number/location |
||
390 | /// in the module. |
||
391 | DenseMap<BasicBlock *, unsigned> BasicBlockToInteger; |
||
392 | |||
393 | /// Set if we added an illegal number in the previous step. |
||
394 | /// Since each illegal number is unique, we only need one of them between |
||
395 | /// each range of legal numbers. This lets us make sure we don't add more |
||
396 | /// than one illegal number per range. |
||
397 | bool AddedIllegalLastTime = false; |
||
398 | |||
399 | /// Marks whether we found a illegal instruction in the previous step. |
||
400 | bool CanCombineWithPrevInstr = false; |
||
401 | |||
402 | /// Marks whether we have found a set of instructions that is long enough |
||
403 | /// to be considered for similarity. |
||
404 | bool HaveLegalRange = false; |
||
405 | |||
406 | /// Marks whether we should use exact function names, as well as types to |
||
407 | /// find similarity between calls. |
||
408 | bool EnableMatchCallsByName = false; |
||
409 | |||
410 | /// This allocator pointer is in charge of holding on to the IRInstructionData |
||
411 | /// so it is not deallocated until whatever external tool is using it is done |
||
412 | /// with the information. |
||
413 | SpecificBumpPtrAllocator<IRInstructionData> *InstDataAllocator = nullptr; |
||
414 | |||
415 | /// This allocator pointer is in charge of creating the IRInstructionDataList |
||
416 | /// so it is not deallocated until whatever external tool is using it is done |
||
417 | /// with the information. |
||
418 | SpecificBumpPtrAllocator<IRInstructionDataList> *IDLAllocator = nullptr; |
||
419 | |||
420 | /// Get an allocated IRInstructionData struct using the InstDataAllocator. |
||
421 | /// |
||
422 | /// \param I - The Instruction to wrap with IRInstructionData. |
||
423 | /// \param Legality - A boolean value that is true if the instruction is to |
||
424 | /// be considered for similarity, and false if not. |
||
425 | /// \param IDL - The InstructionDataList that the IRInstructionData is |
||
426 | /// inserted into. |
||
427 | /// \returns An allocated IRInstructionData struct. |
||
428 | IRInstructionData *allocateIRInstructionData(Instruction &I, bool Legality, |
||
429 | IRInstructionDataList &IDL); |
||
430 | |||
431 | /// Get an empty allocated IRInstructionData struct using the |
||
432 | /// InstDataAllocator. |
||
433 | /// |
||
434 | /// \param IDL - The InstructionDataList that the IRInstructionData is |
||
435 | /// inserted into. |
||
436 | /// \returns An allocated IRInstructionData struct. |
||
437 | IRInstructionData *allocateIRInstructionData(IRInstructionDataList &IDL); |
||
438 | |||
439 | /// Get an allocated IRInstructionDataList object using the IDLAllocator. |
||
440 | /// |
||
441 | /// \returns An allocated IRInstructionDataList object. |
||
442 | IRInstructionDataList *allocateIRInstructionDataList(); |
||
443 | |||
444 | IRInstructionDataList *IDL = nullptr; |
||
445 | |||
446 | /// Assigns values to all the basic blocks in function \p F starting from |
||
447 | /// integer \p BBNumber. |
||
448 | /// |
||
449 | /// \param F - The function containing the basic blocks to assign numbers to. |
||
450 | /// \param BBNumber - The number to start from. |
||
451 | void initializeForBBs(Function &F, unsigned &BBNumber) { |
||
452 | for (BasicBlock &BB : F) |
||
453 | BasicBlockToInteger.insert(std::make_pair(&BB, BBNumber++)); |
||
454 | } |
||
455 | |||
456 | /// Assigns values to all the basic blocks in Module \p M. |
||
457 | /// \param M - The module containing the basic blocks to assign numbers to. |
||
458 | void initializeForBBs(Module &M) { |
||
459 | unsigned BBNumber = 0; |
||
460 | for (Function &F : M) |
||
461 | initializeForBBs(F, BBNumber); |
||
462 | } |
||
463 | |||
464 | /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers |
||
465 | /// determined by \p InstrType. Two Instructions are mapped to the same value |
||
466 | /// if they are close as defined by the InstructionData class above. |
||
467 | /// |
||
468 | /// \param [in] BB - The BasicBlock to be mapped to integers. |
||
469 | /// \param [in,out] InstrList - Vector of IRInstructionData to append to. |
||
470 | /// \param [in,out] IntegerMapping - Vector of unsigned integers to append to. |
||
471 | void convertToUnsignedVec(BasicBlock &BB, |
||
472 | std::vector<IRInstructionData *> &InstrList, |
||
473 | std::vector<unsigned> &IntegerMapping); |
||
474 | |||
475 | /// Maps an Instruction to a legal integer. |
||
476 | /// |
||
477 | /// \param [in] It - The Instruction to be mapped to an integer. |
||
478 | /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to |
||
479 | /// append to. |
||
480 | /// \param [in,out] InstrListForBB - Vector of InstructionData to append to. |
||
481 | /// \returns The integer \p It was mapped to. |
||
482 | unsigned mapToLegalUnsigned(BasicBlock::iterator &It, |
||
483 | std::vector<unsigned> &IntegerMappingForBB, |
||
484 | std::vector<IRInstructionData *> &InstrListForBB); |
||
485 | |||
486 | /// Maps an Instruction to an illegal integer. |
||
487 | /// |
||
488 | /// \param [in] It - The \p Instruction to be mapped to an integer. |
||
489 | /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to |
||
490 | /// append to. |
||
491 | /// \param [in,out] InstrListForBB - Vector of IRInstructionData to append to. |
||
492 | /// \param End - true if creating a dummy IRInstructionData at the end of a |
||
493 | /// basic block. |
||
494 | /// \returns The integer \p It was mapped to. |
||
495 | unsigned mapToIllegalUnsigned( |
||
496 | BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, |
||
497 | std::vector<IRInstructionData *> &InstrListForBB, bool End = false); |
||
498 | |||
499 | IRInstructionMapper(SpecificBumpPtrAllocator<IRInstructionData> *IDA, |
||
500 | SpecificBumpPtrAllocator<IRInstructionDataList> *IDLA) |
||
501 | : InstDataAllocator(IDA), IDLAllocator(IDLA) { |
||
502 | // Make sure that the implementation of DenseMapInfo<unsigned> hasn't |
||
503 | // changed. |
||
504 | assert(DenseMapInfo<unsigned>::getEmptyKey() == static_cast<unsigned>(-1) && |
||
505 | "DenseMapInfo<unsigned>'s empty key isn't -1!"); |
||
506 | assert(DenseMapInfo<unsigned>::getTombstoneKey() == |
||
507 | static_cast<unsigned>(-2) && |
||
508 | "DenseMapInfo<unsigned>'s tombstone key isn't -2!"); |
||
509 | |||
510 | IDL = new (IDLAllocator->Allocate()) |
||
511 | IRInstructionDataList(); |
||
512 | } |
||
513 | |||
514 | /// Custom InstVisitor to classify different instructions for whether it can |
||
515 | /// be analyzed for similarity. |
||
516 | struct InstructionClassification |
||
517 | : public InstVisitor<InstructionClassification, InstrType> { |
||
518 | InstructionClassification() = default; |
||
519 | |||
520 | // TODO: Determine a scheme to resolve when the label is similar enough. |
||
521 | InstrType visitBranchInst(BranchInst &BI) { |
||
522 | if (EnableBranches) |
||
523 | return Legal; |
||
524 | return Illegal; |
||
525 | } |
||
526 | InstrType visitPHINode(PHINode &PN) { |
||
527 | if (EnableBranches) |
||
528 | return Legal; |
||
529 | return Illegal; |
||
530 | } |
||
531 | // TODO: Handle allocas. |
||
532 | InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; } |
||
533 | // We exclude variable argument instructions since variable arguments |
||
534 | // requires extra checking of the argument list. |
||
535 | InstrType visitVAArgInst(VAArgInst &VI) { return Illegal; } |
||
536 | // We exclude all exception handling cases since they are so context |
||
537 | // dependent. |
||
538 | InstrType visitLandingPadInst(LandingPadInst &LPI) { return Illegal; } |
||
539 | InstrType visitFuncletPadInst(FuncletPadInst &FPI) { return Illegal; } |
||
540 | // DebugInfo should be included in the regions, but should not be |
||
541 | // analyzed for similarity as it has no bearing on the outcome of the |
||
542 | // program. |
||
543 | InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; } |
||
544 | InstrType visitIntrinsicInst(IntrinsicInst &II) { |
||
545 | // These are disabled due to complications in the CodeExtractor when |
||
546 | // outlining these instructions. For instance, It is unclear what we |
||
547 | // should do when moving only the start or end lifetime instruction into |
||
548 | // an outlined function. Also, assume-like intrinsics could be removed |
||
549 | // from the region, removing arguments, causing discrepencies in the |
||
550 | // number of inputs between different regions. |
||
551 | if (II.isAssumeLikeIntrinsic()) |
||
552 | return Illegal; |
||
553 | return EnableIntrinsics ? Legal : Illegal; |
||
554 | } |
||
555 | // We only allow call instructions where the function has a name and |
||
556 | // is not an indirect call. |
||
557 | InstrType visitCallInst(CallInst &CI) { |
||
558 | Function *F = CI.getCalledFunction(); |
||
559 | bool IsIndirectCall = CI.isIndirectCall(); |
||
560 | if (IsIndirectCall && !EnableIndirectCalls) |
||
561 | return Illegal; |
||
562 | if (!F && !IsIndirectCall) |
||
563 | return Illegal; |
||
564 | // Functions marked with the swifttailcc and tailcc calling conventions |
||
565 | // require special handling when outlining musttail functions. The |
||
566 | // calling convention must be passed down to the outlined function as |
||
567 | // well. Further, there is special handling for musttail calls as well, |
||
568 | // requiring a return call directly after. For now, the outliner does not |
||
569 | // support this, so we do not handle matching this case either. |
||
570 | if ((CI.getCallingConv() == CallingConv::SwiftTail || |
||
571 | CI.getCallingConv() == CallingConv::Tail) && |
||
572 | !EnableMustTailCalls) |
||
573 | return Illegal; |
||
574 | if (CI.isMustTailCall() && !EnableMustTailCalls) |
||
575 | return Illegal; |
||
576 | return Legal; |
||
577 | } |
||
578 | // TODO: We do not current handle similarity that changes the control flow. |
||
579 | InstrType visitInvokeInst(InvokeInst &II) { return Illegal; } |
||
580 | // TODO: We do not current handle similarity that changes the control flow. |
||
581 | InstrType visitCallBrInst(CallBrInst &CBI) { return Illegal; } |
||
582 | // TODO: Handle interblock similarity. |
||
583 | InstrType visitTerminator(Instruction &I) { return Illegal; } |
||
584 | InstrType visitInstruction(Instruction &I) { return Legal; } |
||
585 | |||
586 | // The flag variable that lets the classifier know whether we should |
||
587 | // allow branches to be checked for similarity. |
||
588 | bool EnableBranches = false; |
||
589 | |||
590 | // The flag variable that lets the classifier know whether we should |
||
591 | // allow indirect calls to be considered legal instructions. |
||
592 | bool EnableIndirectCalls = false; |
||
593 | |||
594 | // Flag that lets the classifier know whether we should allow intrinsics to |
||
595 | // be checked for similarity. |
||
596 | bool EnableIntrinsics = false; |
||
597 | |||
598 | // Flag that lets the classifier know whether we should allow tail calls to |
||
599 | // be checked for similarity. |
||
600 | bool EnableMustTailCalls = false; |
||
601 | }; |
||
602 | |||
603 | /// Maps an Instruction to a member of InstrType. |
||
604 | InstructionClassification InstClassifier; |
||
605 | }; |
||
606 | |||
607 | /// This is a class that wraps a range of IRInstructionData from one point to |
||
608 | /// another in the vector of IRInstructionData, which is a region of the |
||
609 | /// program. It is also responsible for defining the structure within this |
||
610 | /// region of instructions. |
||
611 | /// |
||
612 | /// The structure of a region is defined through a value numbering system |
||
613 | /// assigned to each unique value in a region at the creation of the |
||
614 | /// IRSimilarityCandidate. |
||
615 | /// |
||
616 | /// For example, for each Instruction we add a mapping for each new |
||
617 | /// value seen in that Instruction. |
||
618 | /// IR: Mapping Added: |
||
619 | /// %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 |
||
620 | /// %add2 = add i32 %a, %1 %add2 -> 4 |
||
621 | /// %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 |
||
622 | /// |
||
623 | /// We can compare IRSimilarityCandidates against one another. |
||
624 | /// The \ref isSimilar function compares each IRInstructionData against one |
||
625 | /// another and if we have the same sequences of IRInstructionData that would |
||
626 | /// create the same hash, we have similar IRSimilarityCandidates. |
||
627 | /// |
||
628 | /// We can also compare the structure of IRSimilarityCandidates. If we can |
||
629 | /// create a mapping of registers in the region contained by one |
||
630 | /// IRSimilarityCandidate to the region contained by different |
||
631 | /// IRSimilarityCandidate, they can be considered structurally similar. |
||
632 | /// |
||
633 | /// IRSimilarityCandidate1: IRSimilarityCandidate2: |
||
634 | /// %add1 = add i32 %a, %b %add1 = add i32 %d, %e |
||
635 | /// %add2 = add i32 %a, %c %add2 = add i32 %d, %f |
||
636 | /// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4 |
||
637 | /// |
||
638 | /// Can have the following mapping from candidate to candidate of: |
||
639 | /// %a -> %d, %b -> %e, %c -> %f, c1 -> c3, c2 -> c4 |
||
640 | /// and can be considered similar. |
||
641 | /// |
||
642 | /// IRSimilarityCandidate1: IRSimilarityCandidate2: |
||
643 | /// %add1 = add i32 %a, %b %add1 = add i32 %d, c4 |
||
644 | /// %add2 = add i32 %a, %c %add2 = add i32 %d, %f |
||
645 | /// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4 |
||
646 | /// |
||
647 | /// We cannot create the same mapping since the use of c4 is not used in the |
||
648 | /// same way as %b or c2. |
||
649 | class IRSimilarityCandidate { |
||
650 | private: |
||
651 | /// The start index of this IRSimilarityCandidate in the instruction list. |
||
652 | unsigned StartIdx = 0; |
||
653 | |||
654 | /// The number of instructions in this IRSimilarityCandidate. |
||
655 | unsigned Len = 0; |
||
656 | |||
657 | /// The first instruction in this IRSimilarityCandidate. |
||
658 | IRInstructionData *FirstInst = nullptr; |
||
659 | |||
660 | /// The last instruction in this IRSimilarityCandidate. |
||
661 | IRInstructionData *LastInst = nullptr; |
||
662 | |||
663 | /// Global Value Numbering structures |
||
664 | /// @{ |
||
665 | /// Stores the mapping of the value to the number assigned to it in the |
||
666 | /// IRSimilarityCandidate. |
||
667 | DenseMap<Value *, unsigned> ValueToNumber; |
||
668 | /// Stores the mapping of the number to the value assigned this number. |
||
669 | DenseMap<unsigned, Value *> NumberToValue; |
||
670 | /// Stores the mapping of a value's number to canonical numbering in the |
||
671 | /// candidate's respective similarity group. |
||
672 | DenseMap<unsigned, unsigned> NumberToCanonNum; |
||
673 | /// Stores the mapping of canonical number in the candidate's respective |
||
674 | /// similarity group to a value number. |
||
675 | DenseMap<unsigned, unsigned> CanonNumToNumber; |
||
676 | /// @} |
||
677 | |||
678 | public: |
||
679 | /// \param StartIdx - The starting location of the region. |
||
680 | /// \param Len - The length of the region. |
||
681 | /// \param FirstInstIt - The starting IRInstructionData of the region. |
||
682 | /// \param LastInstIt - The ending IRInstructionData of the region. |
||
683 | IRSimilarityCandidate(unsigned StartIdx, unsigned Len, |
||
684 | IRInstructionData *FirstInstIt, |
||
685 | IRInstructionData *LastInstIt); |
||
686 | |||
687 | /// \param A - The first IRInstructionCandidate to compare. |
||
688 | /// \param B - The second IRInstructionCandidate to compare. |
||
689 | /// \returns True when every IRInstructionData in \p A is similar to every |
||
690 | /// IRInstructionData in \p B. |
||
691 | static bool isSimilar(const IRSimilarityCandidate &A, |
||
692 | const IRSimilarityCandidate &B); |
||
693 | |||
694 | /// \param [in] A - The first IRInstructionCandidate to compare. |
||
695 | /// \param [in] B - The second IRInstructionCandidate to compare. |
||
696 | /// \returns True when every IRInstructionData in \p A is structurally similar |
||
697 | /// to \p B. |
||
698 | static bool compareStructure(const IRSimilarityCandidate &A, |
||
699 | const IRSimilarityCandidate &B); |
||
700 | |||
701 | /// \param [in] A - The first IRInstructionCandidate to compare. |
||
702 | /// \param [in] B - The second IRInstructionCandidate to compare. |
||
703 | /// \param [in,out] ValueNumberMappingA - A mapping of value numbers from |
||
704 | /// candidate \p A to candidate \B. |
||
705 | /// \param [in,out] ValueNumberMappingB - A mapping of value numbers from |
||
706 | /// candidate \p B to candidate \A. |
||
707 | /// \returns True when every IRInstructionData in \p A is structurally similar |
||
708 | /// to \p B. |
||
709 | static bool |
||
710 | compareStructure(const IRSimilarityCandidate &A, |
||
711 | const IRSimilarityCandidate &B, |
||
712 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, |
||
713 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB); |
||
714 | |||
715 | struct OperandMapping { |
||
716 | /// The IRSimilarityCandidate that holds the instruction the OperVals were |
||
717 | /// pulled from. |
||
718 | const IRSimilarityCandidate &IRSC; |
||
719 | |||
720 | /// The operand values to be analyzed. |
||
721 | ArrayRef<Value *> &OperVals; |
||
722 | |||
723 | /// The current mapping of global value numbers from one IRSimilarityCandidate |
||
724 | /// to another IRSimilarityCandidate. |
||
725 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMapping; |
||
726 | }; |
||
727 | |||
728 | /// A helper struct to hold the candidate, for a branch instruction, the |
||
729 | /// relative location of a label, and the label itself. This is mostly to |
||
730 | /// group the values together before passing them as a bundle to a function. |
||
731 | struct RelativeLocMapping { |
||
732 | /// The IRSimilarityCandidate that holds the instruction the relative |
||
733 | /// location was pulled from. |
||
734 | const IRSimilarityCandidate &IRSC; |
||
735 | |||
736 | /// The relative location to be analyzed. |
||
737 | int RelativeLocation; |
||
738 | |||
739 | /// The corresponding value. |
||
740 | Value *OperVal; |
||
741 | }; |
||
742 | |||
743 | /// Compare the operands in \p A and \p B and check that the current mapping |
||
744 | /// of global value numbers from \p A to \p B and \p B to \A is consistent. |
||
745 | /// |
||
746 | /// \param A - The first IRInstructionCandidate, operand values, and current |
||
747 | /// operand mappings to compare. |
||
748 | /// \param B - The second IRInstructionCandidate, operand values, and current |
||
749 | /// operand mappings to compare. |
||
750 | /// \returns true if the IRSimilarityCandidates operands are compatible. |
||
751 | static bool compareNonCommutativeOperandMapping(OperandMapping A, |
||
752 | OperandMapping B); |
||
753 | |||
754 | /// Compare the operands in \p A and \p B and check that the current mapping |
||
755 | /// of global value numbers from \p A to \p B and \p B to \A is consistent |
||
756 | /// given that the operands are commutative. |
||
757 | /// |
||
758 | /// \param A - The first IRInstructionCandidate, operand values, and current |
||
759 | /// operand mappings to compare. |
||
760 | /// \param B - The second IRInstructionCandidate, operand values, and current |
||
761 | /// operand mappings to compare. |
||
762 | /// \returns true if the IRSimilarityCandidates operands are compatible. |
||
763 | static bool compareCommutativeOperandMapping(OperandMapping A, |
||
764 | OperandMapping B); |
||
765 | |||
766 | /// Compare the relative locations in \p A and \p B and check that the |
||
767 | /// distances match if both locations are contained in the region, and that |
||
768 | /// the branches both point outside the region if they do not. |
||
769 | /// Example Region: |
||
770 | /// \code |
||
771 | /// entry: |
||
772 | /// br i1 %0, label %block_1, label %block_3 |
||
773 | /// block_0: |
||
774 | /// br i1 %0, label %block_1, label %block_2 |
||
775 | /// block_1: |
||
776 | /// br i1 %0, label %block_2, label %block_3 |
||
777 | /// block_2: |
||
778 | /// br i1 %1, label %block_1, label %block_4 |
||
779 | /// block_3: |
||
780 | /// br i1 %2, label %block_2, label %block_5 |
||
781 | /// \endcode |
||
782 | /// If we compare the branches in block_0 and block_1 the relative values are |
||
783 | /// 1 and 2 for both, so we consider this a match. |
||
784 | /// |
||
785 | /// If we compare the branches in entry and block_0 the relative values are |
||
786 | /// 2 and 3, and 1 and 2 respectively. Since these are not the same we do not |
||
787 | /// consider them a match. |
||
788 | /// |
||
789 | /// If we compare the branches in block_1 and block_2 the relative values are |
||
790 | /// 1 and 2, and -1 and None respectively. As a result we do not consider |
||
791 | /// these to be the same |
||
792 | /// |
||
793 | /// If we compare the branches in block_2 and block_3 the relative values are |
||
794 | /// -1 and None for both. We do consider these to be a match. |
||
795 | /// |
||
796 | /// \param A - The first IRInstructionCandidate, relative location value, |
||
797 | /// and incoming block. |
||
798 | /// \param B - The second IRInstructionCandidate, relative location value, |
||
799 | /// and incoming block. |
||
800 | /// \returns true if the relative locations match. |
||
801 | static bool checkRelativeLocations(RelativeLocMapping A, |
||
802 | RelativeLocMapping B); |
||
803 | |||
804 | /// Create a mapping from the value numbering to a different separate set of |
||
805 | /// numbers. This will serve as a guide for relating one candidate to another. |
||
806 | /// The canonical number gives use the ability identify which global value |
||
807 | /// number in one candidate relates to the global value number in the other. |
||
808 | /// |
||
809 | /// \param [in, out] CurrCand - The IRSimilarityCandidate to create a |
||
810 | /// canonical numbering for. |
||
811 | static void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand); |
||
812 | |||
813 | /// Create a mapping for the value numbering of the calling |
||
814 | /// IRSimilarityCandidate, to a different separate set of numbers, based on |
||
815 | /// the canonical ordering in \p SourceCand. These are defined based on the |
||
816 | /// found mappings in \p ToSourceMapping and \p FromSourceMapping. Both of |
||
817 | /// these relationships should have the same information, just in opposite |
||
818 | /// directions. |
||
819 | /// |
||
820 | /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a |
||
821 | /// canonical numbering from. |
||
822 | /// \param ToSourceMapping - The mapping of value numbers from this candidate |
||
823 | /// to \p SourceCand. |
||
824 | /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand |
||
825 | /// to this candidate. |
||
826 | void createCanonicalRelationFrom( |
||
827 | IRSimilarityCandidate &SourceCand, |
||
828 | DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping, |
||
829 | DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping); |
||
830 | |||
831 | /// \param [in,out] BBSet - The set to track the basic blocks. |
||
832 | void getBasicBlocks(DenseSet<BasicBlock *> &BBSet) const { |
||
833 | for (IRInstructionData &ID : *this) { |
||
834 | BasicBlock *BB = ID.Inst->getParent(); |
||
835 | BBSet.insert(BB); |
||
836 | } |
||
837 | } |
||
838 | |||
839 | /// \param [in,out] BBSet - The set to track the basic blocks. |
||
840 | /// \param [in,out] BBList - A list in order of use to track the basic blocks. |
||
841 | void getBasicBlocks(DenseSet<BasicBlock *> &BBSet, |
||
842 | SmallVector<BasicBlock *> &BBList) const { |
||
843 | for (IRInstructionData &ID : *this) { |
||
844 | BasicBlock *BB = ID.Inst->getParent(); |
||
845 | if (BBSet.insert(BB).second) |
||
846 | BBList.push_back(BB); |
||
847 | } |
||
848 | } |
||
849 | |||
850 | /// Compare the start and end indices of the two IRSimilarityCandidates for |
||
851 | /// whether they overlap. If the start instruction of one |
||
852 | /// IRSimilarityCandidate is less than the end instruction of the other, and |
||
853 | /// the start instruction of one is greater than the start instruction of the |
||
854 | /// other, they overlap. |
||
855 | /// |
||
856 | /// \returns true if the IRSimilarityCandidates do not have overlapping |
||
857 | /// instructions. |
||
858 | static bool overlap(const IRSimilarityCandidate &A, |
||
859 | const IRSimilarityCandidate &B); |
||
860 | |||
861 | /// \returns the number of instructions in this Candidate. |
||
862 | unsigned getLength() const { return Len; } |
||
863 | |||
864 | /// \returns the start index of this IRSimilarityCandidate. |
||
865 | unsigned getStartIdx() const { return StartIdx; } |
||
866 | |||
867 | /// \returns the end index of this IRSimilarityCandidate. |
||
868 | unsigned getEndIdx() const { return StartIdx + Len - 1; } |
||
869 | |||
870 | /// \returns The first IRInstructionData. |
||
871 | IRInstructionData *front() const { return FirstInst; } |
||
872 | /// \returns The last IRInstructionData. |
||
873 | IRInstructionData *back() const { return LastInst; } |
||
874 | |||
875 | /// \returns The first Instruction. |
||
876 | Instruction *frontInstruction() { return FirstInst->Inst; } |
||
877 | /// \returns The last Instruction |
||
878 | Instruction *backInstruction() { return LastInst->Inst; } |
||
879 | |||
880 | /// \returns The BasicBlock the IRSimilarityCandidate starts in. |
||
881 | BasicBlock *getStartBB() { return FirstInst->Inst->getParent(); } |
||
882 | /// \returns The BasicBlock the IRSimilarityCandidate ends in. |
||
883 | BasicBlock *getEndBB() { return LastInst->Inst->getParent(); } |
||
884 | |||
885 | /// \returns The Function that the IRSimilarityCandidate is located in. |
||
886 | Function *getFunction() { return getStartBB()->getParent(); } |
||
887 | |||
888 | /// Finds the positive number associated with \p V if it has been mapped. |
||
889 | /// \param [in] V - the Value to find. |
||
890 | /// \returns The positive number corresponding to the value. |
||
891 | /// \returns std::nullopt if not present. |
||
892 | std::optional<unsigned> getGVN(Value *V) { |
||
893 | assert(V != nullptr && "Value is a nullptr?"); |
||
894 | DenseMap<Value *, unsigned>::iterator VNIt = ValueToNumber.find(V); |
||
895 | if (VNIt == ValueToNumber.end()) |
||
896 | return std::nullopt; |
||
897 | return VNIt->second; |
||
898 | } |
||
899 | |||
900 | /// Finds the Value associate with \p Num if it exists. |
||
901 | /// \param [in] Num - the number to find. |
||
902 | /// \returns The Value associated with the number. |
||
903 | /// \returns std::nullopt if not present. |
||
904 | std::optional<Value *> fromGVN(unsigned Num) { |
||
905 | DenseMap<unsigned, Value *>::iterator VNIt = NumberToValue.find(Num); |
||
906 | if (VNIt == NumberToValue.end()) |
||
907 | return std::nullopt; |
||
908 | assert(VNIt->second != nullptr && "Found value is a nullptr!"); |
||
909 | return VNIt->second; |
||
910 | } |
||
911 | |||
912 | /// Find the canonical number from the global value number \p N stored in the |
||
913 | /// candidate. |
||
914 | /// |
||
915 | /// \param N - The global value number to find the canonical number for. |
||
916 | /// \returns An optional containing the value, and std::nullopt if it could |
||
917 | /// not be found. |
||
918 | std::optional<unsigned> getCanonicalNum(unsigned N) { |
||
919 | DenseMap<unsigned, unsigned>::iterator NCIt = NumberToCanonNum.find(N); |
||
920 | if (NCIt == NumberToCanonNum.end()) |
||
921 | return std::nullopt; |
||
922 | return NCIt->second; |
||
923 | } |
||
924 | |||
925 | /// Find the global value number from the canonical number \p N stored in the |
||
926 | /// candidate. |
||
927 | /// |
||
928 | /// \param N - The canonical number to find the global vlaue number for. |
||
929 | /// \returns An optional containing the value, and std::nullopt if it could |
||
930 | /// not be found. |
||
931 | std::optional<unsigned> fromCanonicalNum(unsigned N) { |
||
932 | DenseMap<unsigned, unsigned>::iterator CNIt = CanonNumToNumber.find(N); |
||
933 | if (CNIt == CanonNumToNumber.end()) |
||
934 | return std::nullopt; |
||
935 | return CNIt->second; |
||
936 | } |
||
937 | |||
938 | /// \param RHS -The IRSimilarityCandidate to compare against |
||
939 | /// \returns true if the IRSimilarityCandidate is occurs after the |
||
940 | /// IRSimilarityCandidate in the program. |
||
941 | bool operator<(const IRSimilarityCandidate &RHS) const { |
||
942 | return getStartIdx() > RHS.getStartIdx(); |
||
943 | } |
||
944 | |||
945 | using iterator = IRInstructionDataList::iterator; |
||
946 | iterator begin() const { return iterator(front()); } |
||
947 | iterator end() const { return std::next(iterator(back())); } |
||
948 | }; |
||
949 | |||
950 | typedef DenseMap<IRSimilarityCandidate *, |
||
951 | DenseMap<unsigned, DenseSet<unsigned>>> |
||
952 | CandidateGVNMapping; |
||
953 | typedef std::vector<IRSimilarityCandidate> SimilarityGroup; |
||
954 | typedef std::vector<SimilarityGroup> SimilarityGroupList; |
||
955 | |||
956 | /// This class puts all the pieces of the IRInstructionData, |
||
957 | /// IRInstructionMapper, IRSimilarityCandidate together. |
||
958 | /// |
||
959 | /// It first feeds the Module or vector of Modules into the IRInstructionMapper, |
||
960 | /// and puts all the mapped instructions into a single long list of |
||
961 | /// IRInstructionData. |
||
962 | /// |
||
963 | /// The list of unsigned integers is given to the Suffix Tree or similar data |
||
964 | /// structure to find repeated subsequences. We construct an |
||
965 | /// IRSimilarityCandidate for each instance of the subsequence. We compare them |
||
966 | /// against one another since These repeated subsequences can have different |
||
967 | /// structure. For each different kind of structure found, we create a |
||
968 | /// similarity group. |
||
969 | /// |
||
970 | /// If we had four IRSimilarityCandidates A, B, C, and D where A, B and D are |
||
971 | /// structurally similar to one another, while C is different we would have two |
||
972 | /// SimilarityGroups: |
||
973 | /// |
||
974 | /// SimilarityGroup 1: SimilarityGroup 2 |
||
975 | /// A, B, D C |
||
976 | /// |
||
977 | /// A list of the different similarity groups is then returned after |
||
978 | /// analyzing the module. |
||
979 | class IRSimilarityIdentifier { |
||
980 | public: |
||
981 | IRSimilarityIdentifier(bool MatchBranches = true, |
||
982 | bool MatchIndirectCalls = true, |
||
983 | bool MatchCallsWithName = false, |
||
984 | bool MatchIntrinsics = true, |
||
985 | bool MatchMustTailCalls = true) |
||
986 | : Mapper(&InstDataAllocator, &InstDataListAllocator), |
||
987 | EnableBranches(MatchBranches), EnableIndirectCalls(MatchIndirectCalls), |
||
988 | EnableMatchingCallsByName(MatchCallsWithName), |
||
989 | EnableIntrinsics(MatchIntrinsics), |
||
990 | EnableMustTailCalls(MatchMustTailCalls) {} |
||
991 | |||
992 | private: |
||
993 | /// Map the instructions in the module to unsigned integers, using mapping |
||
994 | /// already present in the Mapper if possible. |
||
995 | /// |
||
996 | /// \param [in] M Module - To map to integers. |
||
997 | /// \param [in,out] InstrList - The vector to append IRInstructionData to. |
||
998 | /// \param [in,out] IntegerMapping - The vector to append integers to. |
||
999 | void populateMapper(Module &M, std::vector<IRInstructionData *> &InstrList, |
||
1000 | std::vector<unsigned> &IntegerMapping); |
||
1001 | |||
1002 | /// Map the instructions in the modules vector to unsigned integers, using |
||
1003 | /// mapping already present in the mapper if possible. |
||
1004 | /// |
||
1005 | /// \param [in] Modules - The list of modules to use to populate the mapper |
||
1006 | /// \param [in,out] InstrList - The vector to append IRInstructionData to. |
||
1007 | /// \param [in,out] IntegerMapping - The vector to append integers to. |
||
1008 | void populateMapper(ArrayRef<std::unique_ptr<Module>> &Modules, |
||
1009 | std::vector<IRInstructionData *> &InstrList, |
||
1010 | std::vector<unsigned> &IntegerMapping); |
||
1011 | |||
1012 | /// Find the similarity candidates in \p InstrList and corresponding |
||
1013 | /// \p UnsignedVec |
||
1014 | /// |
||
1015 | /// \param [in,out] InstrList - The vector to append IRInstructionData to. |
||
1016 | /// \param [in,out] IntegerMapping - The vector to append integers to. |
||
1017 | /// candidates found in the program. |
||
1018 | void findCandidates(std::vector<IRInstructionData *> &InstrList, |
||
1019 | std::vector<unsigned> &IntegerMapping); |
||
1020 | |||
1021 | public: |
||
1022 | // Find the IRSimilarityCandidates in the \p Modules and group by structural |
||
1023 | // similarity in a SimilarityGroup, each group is returned in a |
||
1024 | // SimilarityGroupList. |
||
1025 | // |
||
1026 | // \param [in] Modules - the modules to analyze. |
||
1027 | // \returns The groups of similarity ranges found in the modules. |
||
1028 | SimilarityGroupList & |
||
1029 | findSimilarity(ArrayRef<std::unique_ptr<Module>> Modules); |
||
1030 | |||
1031 | // Find the IRSimilarityCandidates in the given Module grouped by structural |
||
1032 | // similarity in a SimilarityGroup, contained inside a SimilarityGroupList. |
||
1033 | // |
||
1034 | // \param [in] M - the module to analyze. |
||
1035 | // \returns The groups of similarity ranges found in the module. |
||
1036 | SimilarityGroupList &findSimilarity(Module &M); |
||
1037 | |||
1038 | // Clears \ref SimilarityCandidates if it is already filled by a previous run. |
||
1039 | void resetSimilarityCandidates() { |
||
1040 | // If we've already analyzed a Module or set of Modules, so we must clear |
||
1041 | // the SimilarityCandidates to make sure we do not have only old values |
||
1042 | // hanging around. |
||
1043 | if (SimilarityCandidates) |
||
1044 | SimilarityCandidates->clear(); |
||
1045 | else |
||
1046 | SimilarityCandidates = SimilarityGroupList(); |
||
1047 | } |
||
1048 | |||
1049 | // \returns The groups of similarity ranges found in the most recently passed |
||
1050 | // set of modules. |
||
1051 | std::optional<SimilarityGroupList> &getSimilarity() { |
||
1052 | return SimilarityCandidates; |
||
1053 | } |
||
1054 | |||
1055 | private: |
||
1056 | /// The allocator for IRInstructionData. |
||
1057 | SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator; |
||
1058 | |||
1059 | /// The allocator for IRInstructionDataLists. |
||
1060 | SpecificBumpPtrAllocator<IRInstructionDataList> InstDataListAllocator; |
||
1061 | |||
1062 | /// Map Instructions to unsigned integers and wraps the Instruction in an |
||
1063 | /// instance of IRInstructionData. |
||
1064 | IRInstructionMapper Mapper; |
||
1065 | |||
1066 | /// The flag variable that marks whether we should check branches for |
||
1067 | /// similarity, or only look within basic blocks. |
||
1068 | bool EnableBranches = true; |
||
1069 | |||
1070 | /// The flag variable that marks whether we allow indirect calls to be checked |
||
1071 | /// for similarity, or exclude them as a legal instruction. |
||
1072 | bool EnableIndirectCalls = true; |
||
1073 | |||
1074 | /// The flag variable that marks whether we allow calls to be marked as |
||
1075 | /// similar if they do not have the same name, only the same calling |
||
1076 | /// convention, attributes and type signature. |
||
1077 | bool EnableMatchingCallsByName = true; |
||
1078 | |||
1079 | /// The flag variable that marks whether we should check intrinsics for |
||
1080 | /// similarity. |
||
1081 | bool EnableIntrinsics = true; |
||
1082 | |||
1083 | // The flag variable that marks whether we should allow tailcalls |
||
1084 | // to be checked for similarity. |
||
1085 | bool EnableMustTailCalls = false; |
||
1086 | |||
1087 | /// The SimilarityGroups found with the most recent run of \ref |
||
1088 | /// findSimilarity. std::nullopt if there is no recent run. |
||
1089 | std::optional<SimilarityGroupList> SimilarityCandidates; |
||
1090 | }; |
||
1091 | |||
1092 | } // end namespace IRSimilarity |
||
1093 | |||
1094 | /// An analysis pass based on legacy pass manager that runs and returns |
||
1095 | /// IRSimilarityIdentifier run on the Module. |
||
1096 | class IRSimilarityIdentifierWrapperPass : public ModulePass { |
||
1097 | std::unique_ptr<IRSimilarity::IRSimilarityIdentifier> IRSI; |
||
1098 | |||
1099 | public: |
||
1100 | static char ID; |
||
1101 | IRSimilarityIdentifierWrapperPass(); |
||
1102 | |||
1103 | IRSimilarity::IRSimilarityIdentifier &getIRSI() { return *IRSI; } |
||
1104 | const IRSimilarity::IRSimilarityIdentifier &getIRSI() const { return *IRSI; } |
||
1105 | |||
1106 | bool doInitialization(Module &M) override; |
||
1107 | bool doFinalization(Module &M) override; |
||
1108 | bool runOnModule(Module &M) override; |
||
1109 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
||
1110 | AU.setPreservesAll(); |
||
1111 | } |
||
1112 | }; |
||
1113 | |||
1114 | /// An analysis pass that runs and returns the IRSimilarityIdentifier run on the |
||
1115 | /// Module. |
||
1116 | class IRSimilarityAnalysis : public AnalysisInfoMixin<IRSimilarityAnalysis> { |
||
1117 | public: |
||
1118 | typedef IRSimilarity::IRSimilarityIdentifier Result; |
||
1119 | |||
1120 | Result run(Module &M, ModuleAnalysisManager &); |
||
1121 | |||
1122 | private: |
||
1123 | friend AnalysisInfoMixin<IRSimilarityAnalysis>; |
||
1124 | static AnalysisKey Key; |
||
1125 | }; |
||
1126 | |||
1127 | /// Printer pass that uses \c IRSimilarityAnalysis. |
||
1128 | class IRSimilarityAnalysisPrinterPass |
||
1129 | : public PassInfoMixin<IRSimilarityAnalysisPrinterPass> { |
||
1130 | raw_ostream &OS; |
||
1131 | |||
1132 | public: |
||
1133 | explicit IRSimilarityAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} |
||
1134 | PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); |
||
1135 | }; |
||
1136 | |||
1137 | } // end namespace llvm |
||
1138 | |||
1139 | #endif // LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H |