Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
//===- IRSimilarityIdentifier.h - Find similarity in a module --------------==//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// \file
10
// Interface file for the IRSimilarityIdentifier for identifying similarities in
11
// IR including the IRInstructionMapper, which maps an Instruction to unsigned
12
// integers.
13
//
14
// Two sequences of instructions are called "similar" if they perform the same
15
// series of operations for all inputs.
16
//
17
// \code
18
// %1 = add i32 %a, 10
19
// %2 = add i32 %a, %1
20
// %3 = icmp slt icmp %1, %2
21
// \endcode
22
//
23
// and
24
//
25
// \code
26
// %1 = add i32 11, %a
27
// %2 = sub i32 %a, %1
28
// %3 = icmp sgt icmp %2, %1
29
// \endcode
30
//
31
// ultimately have the same result, even if the inputs, and structure are
32
// slightly different.
33
//
34
// For instructions, we do not worry about operands that do not have fixed
35
// semantic meaning to the program.  We consider the opcode that the instruction
36
// has, the types, parameters, and extra information such as the function name,
37
// or comparison predicate.  These are used to create a hash to map instructions
38
// to integers to be used in similarity matching in sequences of instructions
39
//
40
// Terminology:
41
// An IRSimilarityCandidate is a region of IRInstructionData (wrapped
42
// Instructions), usually used to denote a region of similarity has been found.
43
//
44
// A SimilarityGroup is a set of IRSimilarityCandidates that are structurally
45
// similar to one another.
46
//
47
//===----------------------------------------------------------------------===//
48
 
49
#ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
50
#define LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
51
 
52
#include "llvm/IR/InstVisitor.h"
53
#include "llvm/IR/Instructions.h"
54
#include "llvm/IR/PassManager.h"
55
#include "llvm/Pass.h"
56
#include "llvm/Support/Allocator.h"
57
#include <optional>
58
 
59
namespace llvm {
60
class Module;
61
 
62
namespace IRSimilarity {
63
 
64
struct IRInstructionDataList;
65
 
66
/// This represents what is and is not supported when finding similarity in
67
/// Instructions.
68
///
69
/// Legal Instructions are considered when looking at similarity between
70
/// Instructions.
71
///
72
/// Illegal Instructions cannot be considered when looking for similarity
73
/// between Instructions. They act as boundaries between similarity regions.
74
///
75
/// Invisible Instructions are skipped over during analysis.
76
// TODO: Shared with MachineOutliner
77
enum InstrType { Legal, Illegal, Invisible };
78
 
79
/// This provides the utilities for hashing an Instruction to an unsigned
80
/// integer. Two IRInstructionDatas produce the same hash value when their
81
/// underlying Instructions perform the same operation (even if they don't have
82
/// the same input operands.)
83
/// As a more concrete example, consider the following:
84
///
85
/// \code
86
/// %add1 = add i32 %a, %b
87
/// %add2 = add i32 %c, %d
88
/// %add3 = add i64 %e, %f
89
/// \endcode
90
///
91
// Then the IRInstructionData wrappers for these Instructions may be hashed like
92
/// so:
93
///
94
/// \code
95
/// ; These two adds have the same types and operand types, so they hash to the
96
/// ; same number.
97
/// %add1 = add i32 %a, %b ; Hash: 1
98
/// %add2 = add i32 %c, %d ; Hash: 1
99
/// ; This add produces an i64. This differentiates it from %add1 and %add2. So,
100
/// ; it hashes to a different number.
101
/// %add3 = add i64 %e, %f; Hash: 2
102
/// \endcode
103
///
104
///
105
/// This hashing scheme will be used to represent the program as a very long
106
/// string. This string can then be placed in a data structure which can be used
107
/// for similarity queries.
108
///
109
/// TODO: Handle types of Instructions which can be equal even with different
110
/// operands. (E.g. comparisons with swapped predicates.)
111
/// TODO: Handle CallInsts, which are only checked for function type
112
/// by \ref isSameOperationAs.
113
/// TODO: Handle GetElementPtrInsts, as some of the operands have to be the
114
/// exact same, and some do not.
115
struct IRInstructionData
116
    : ilist_node<IRInstructionData, ilist_sentinel_tracking<true>> {
117
 
118
  /// The source Instruction that is being wrapped.
119
  Instruction *Inst = nullptr;
120
  /// The values of the operands in the Instruction.
121
  SmallVector<Value *, 4> OperVals;
122
  /// The legality of the wrapped instruction. This is informed by InstrType,
123
  /// and is used when checking when two instructions are considered similar.
124
  /// If either instruction is not legal, the instructions are automatically not
125
  /// considered similar.
126
  bool Legal = false;
127
 
128
  /// This is only relevant if we are wrapping a CmpInst where we needed to
129
  /// change the predicate of a compare instruction from a greater than form
130
  /// to a less than form.  It is None otherwise.
131
  std::optional<CmpInst::Predicate> RevisedPredicate;
132
 
133
  /// This is only relevant if we are wrapping a CallInst. If we are requiring
134
  /// that the function calls have matching names as well as types, and the
135
  /// call is not an indirect call, this will hold the name of the function.  If
136
  /// it is an indirect string, it will be the empty string.  However, if this
137
  /// requirement is not in place it will be the empty string regardless of the
138
  /// function call type.  The value held here is used to create the hash of the
139
  /// instruction, and check to make sure two instructions are close to one
140
  /// another.
141
  std::optional<std::string> CalleeName;
142
 
143
  /// This structure holds the distances of how far "ahead of" or "behind" the
144
  /// target blocks of a branch, or the incoming blocks of a phi nodes are.
145
  /// If the value is negative, it means that the block was registered before
146
  /// the block of this instruction in terms of blocks in the function.
147
  /// Code Example:
148
  /// \code
149
  /// block_1:
150
  ///   br i1 %0, label %block_2, label %block_3
151
  /// block_2:
152
  ///   br i1 %1, label %block_1, label %block_2
153
  /// block_3:
154
  ///   br i1 %2, label %block_2, label %block_1
155
  /// ; Replacing the labels with relative values, this becomes:
156
  /// block_1:
157
  ///   br i1 %0, distance 1, distance 2
158
  /// block_2:
159
  ///   br i1 %1, distance -1, distance 0
160
  /// block_3:
161
  ///   br i1 %2, distance -1, distance -2
162
  /// \endcode
163
  /// Taking block_2 as our example, block_1 is "behind" block_2, and block_2 is
164
  /// "ahead" of block_2.
165
  SmallVector<int, 4> RelativeBlockLocations;
166
 
167
  /// Gather the information that is difficult to gather for an Instruction, or
168
  /// is changed. i.e. the operands of an Instruction and the Types of those
169
  /// operands. This extra information allows for similarity matching to make
170
  /// assertions that allow for more flexibility when checking for whether an
171
  /// Instruction performs the same operation.
172
  IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL);
173
  IRInstructionData(IRInstructionDataList &IDL);
174
 
175
  /// Fills data stuctures for IRInstructionData when it is constructed from a
176
  // reference or a pointer.
177
  void initializeInstruction();
178
 
179
  /// Get the predicate that the compare instruction is using for hashing the
180
  /// instruction. the IRInstructionData must be wrapping a CmpInst.
181
  CmpInst::Predicate getPredicate() const;
182
 
183
  /// Get the callee name that the call instruction is using for hashing the
184
  /// instruction. The IRInstructionData must be wrapping a CallInst.
185
  StringRef getCalleeName() const;
186
 
187
  /// A function that swaps the predicates to their less than form if they are
188
  /// in a greater than form. Otherwise, the predicate is unchanged.
189
  ///
190
  /// \param CI - The comparison operation to find a consistent preidcate for.
191
  /// \return the consistent comparison predicate. 
192
  static CmpInst::Predicate predicateForConsistency(CmpInst *CI);
193
 
194
  /// For an IRInstructionData containing a branch, finds the
195
  /// relative distances from the source basic block to the target by taking
196
  /// the difference of the number assigned to the current basic block and the
197
  /// target basic block of the branch.
198
  ///
199
  /// \param BasicBlockToInteger - The mapping of basic blocks to their location
200
  /// in the module.
201
  void
202
  setBranchSuccessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger);
203
 
204
  /// For an IRInstructionData containing a CallInst, set the function name
205
  /// appropriately.  This will be an empty string if it is an indirect call,
206
  /// or we are not matching by name of the called function.  It will be the
207
  /// name of the function if \p MatchByName is true and it is not an indirect
208
  /// call.  We may decide not to match by name in order to expand the
209
  /// size of the regions we can match.  If a function name has the same type
210
  /// signature, but the different name, the region of code is still almost the
211
  /// same.  Since function names can be treated as constants, the name itself
212
  /// could be extrapolated away.  However, matching by name provides a
213
  /// specificity and more "identical" code than not matching by name.
214
  ///
215
  /// \param MatchByName - A flag to mark whether we are using the called
216
  /// function name as a differentiating parameter.
217
  void setCalleeName(bool MatchByName = true);
218
 
219
  /// For an IRInstructionData containing a PHINode, finds the
220
  /// relative distances from the incoming basic block to the current block by
221
  /// taking the difference of the number assigned to the current basic block
222
  /// and the incoming basic block of the branch.
223
  ///
224
  /// \param BasicBlockToInteger - The mapping of basic blocks to their location
225
  /// in the module.
226
  void
227
  setPHIPredecessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger);
228
 
229
  /// Hashes \p Value based on its opcode, types, and operand types.
230
  /// Two IRInstructionData instances produce the same hash when they perform
231
  /// the same operation.
232
  ///
233
  /// As a simple example, consider the following instructions.
234
  ///
235
  /// \code
236
  /// %add1 = add i32 %x1, %y1
237
  /// %add2 = add i32 %x2, %y2
238
  ///
239
  /// %sub = sub i32 %x1, %y1
240
  ///
241
  /// %add_i64 = add i64 %x2, %y2
242
  /// \endcode
243
  ///
244
  /// Because the first two adds operate the same types, and are performing the
245
  /// same action, they will be hashed to the same value.
246
  ///
247
  /// However, the subtraction instruction is not the same as an addition, and
248
  /// will be hashed to a different value.
249
  ///
250
  /// Finally, the last add has a different type compared to the first two add
251
  /// instructions, so it will also be hashed to a different value that any of
252
  /// the previous instructions.
253
  ///
254
  /// \param [in] ID - The IRInstructionData instance to be hashed.
255
  /// \returns A hash_value of the IRInstructionData.
256
  friend hash_code hash_value(const IRInstructionData &ID) {
257
    SmallVector<Type *, 4> OperTypes;
258
    for (Value *V : ID.OperVals)
259
      OperTypes.push_back(V->getType());
260
 
261
    if (isa<CmpInst>(ID.Inst))
262
      return llvm::hash_combine(
263
          llvm::hash_value(ID.Inst->getOpcode()),
264
          llvm::hash_value(ID.Inst->getType()),
265
          llvm::hash_value(ID.getPredicate()),
266
          llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
267
 
268
    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(ID.Inst)) {
269
      // To hash intrinsics, we use the opcode, and types like the other
270
      // instructions, but also, the Intrinsic ID, and the Name of the
271
      // intrinsic.
272
      Intrinsic::ID IntrinsicID = II->getIntrinsicID();
273
      return llvm::hash_combine(
274
          llvm::hash_value(ID.Inst->getOpcode()),
275
          llvm::hash_value(ID.Inst->getType()), llvm::hash_value(IntrinsicID),
276
          llvm::hash_value(*ID.CalleeName),
277
          llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
278
    }
279
 
280
    if (isa<CallInst>(ID.Inst)) {
281
      std::string FunctionName = *ID.CalleeName;
282
      return llvm::hash_combine(
283
          llvm::hash_value(ID.Inst->getOpcode()),
284
          llvm::hash_value(ID.Inst->getType()),
285
          llvm::hash_value(ID.Inst->getType()), llvm::hash_value(FunctionName),
286
          llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
287
    }
288
 
289
    return llvm::hash_combine(
290
        llvm::hash_value(ID.Inst->getOpcode()),
291
        llvm::hash_value(ID.Inst->getType()),
292
        llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
293
  }
294
 
295
  IRInstructionDataList *IDL = nullptr;
296
};
297
 
298
struct IRInstructionDataList
299
    : simple_ilist<IRInstructionData, ilist_sentinel_tracking<true>> {};
300
 
301
/// Compare one IRInstructionData class to another IRInstructionData class for
302
/// whether they are performing a the same operation, and can mapped to the
303
/// same value. For regular instructions if the hash value is the same, then
304
/// they will also be close.
305
///
306
/// \param A - The first IRInstructionData class to compare
307
/// \param B - The second IRInstructionData class to compare
308
/// \returns true if \p A and \p B are similar enough to be mapped to the same
309
/// value.
310
bool isClose(const IRInstructionData &A, const IRInstructionData &B);
311
 
312
struct IRInstructionDataTraits : DenseMapInfo<IRInstructionData *> {
313
  static inline IRInstructionData *getEmptyKey() { return nullptr; }
314
  static inline IRInstructionData *getTombstoneKey() {
315
    return reinterpret_cast<IRInstructionData *>(-1);
316
  }
317
 
318
  static unsigned getHashValue(const IRInstructionData *E) {
319
    using llvm::hash_value;
320
    assert(E && "IRInstructionData is a nullptr?");
321
    return hash_value(*E);
322
  }
323
 
324
  static bool isEqual(const IRInstructionData *LHS,
325
                      const IRInstructionData *RHS) {
326
    if (RHS == getEmptyKey() || RHS == getTombstoneKey() ||
327
        LHS == getEmptyKey() || LHS == getTombstoneKey())
328
      return LHS == RHS;
329
 
330
    assert(LHS && RHS && "nullptr should have been caught by getEmptyKey?");
331
    return isClose(*LHS, *RHS);
332
  }
333
};
334
 
335
/// Helper struct for converting the Instructions in a Module into a vector of
336
/// unsigned integers. This vector of unsigned integers can be thought of as a
337
/// "numeric string". This numeric string can then be queried by, for example,
338
/// data structures that find repeated substrings.
339
///
340
/// This hashing is done per BasicBlock in the module. To hash Instructions
341
/// based off of their operations, each Instruction is wrapped in an
342
/// IRInstructionData struct. The unsigned integer for an IRInstructionData
343
/// depends on:
344
/// - The hash provided by the IRInstructionData.
345
/// - Which member of InstrType the IRInstructionData is classified as.
346
// See InstrType for more details on the possible classifications, and how they
347
// manifest in the numeric string.
348
///
349
/// The numeric string for an individual BasicBlock is terminated by an unique
350
/// unsigned integer. This prevents data structures which rely on repetition
351
/// from matching across BasicBlocks. (For example, the SuffixTree.)
352
/// As a concrete example, if we have the following two BasicBlocks:
353
/// \code
354
/// bb0:
355
/// %add1 = add i32 %a, %b
356
/// %add2 = add i32 %c, %d
357
/// %add3 = add i64 %e, %f
358
/// bb1:
359
/// %sub = sub i32 %c, %d
360
/// \endcode
361
/// We may hash the Instructions like this (via IRInstructionData):
362
/// \code
363
/// bb0:
364
/// %add1 = add i32 %a, %b ; Hash: 1
365
/// %add2 = add i32 %c, %d; Hash: 1
366
/// %add3 = add i64 %e, %f; Hash: 2
367
/// bb1:
368
/// %sub = sub i32 %c, %d; Hash: 3
369
/// %add4 = add i32 %c, %d ; Hash: 1
370
/// \endcode
371
/// And produce a "numeric string representation" like so:
372
/// 1, 1, 2, unique_integer_1, 3, 1, unique_integer_2
373
///
374
/// TODO: This is very similar to the MachineOutliner, and should be
375
/// consolidated into the same interface.
376
struct IRInstructionMapper {
377
  /// The starting illegal instruction number to map to.
378
  ///
379
  /// Set to -3 for compatibility with DenseMapInfo<unsigned>.
380
  unsigned IllegalInstrNumber = static_cast<unsigned>(-3);
381
 
382
  /// The next available integer to assign to a legal Instruction to.
383
  unsigned LegalInstrNumber = 0;
384
 
385
  /// Correspondence from IRInstructionData to unsigned integers.
386
  DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>
387
      InstructionIntegerMap;
388
 
389
  /// A mapping for a basic block in a module to its assigned number/location
390
  /// in the module.
391
  DenseMap<BasicBlock *, unsigned> BasicBlockToInteger;
392
 
393
  /// Set if we added an illegal number in the previous step.
394
  /// Since each illegal number is unique, we only need one of them between
395
  /// each range of legal numbers. This lets us make sure we don't add more
396
  /// than one illegal number per range.
397
  bool AddedIllegalLastTime = false;
398
 
399
  /// Marks whether we found a illegal instruction in the previous step.
400
  bool CanCombineWithPrevInstr = false;
401
 
402
  /// Marks whether we have found a set of instructions that is long enough
403
  /// to be considered for similarity.
404
  bool HaveLegalRange = false;
405
 
406
  /// Marks whether we should use exact function names, as well as types to
407
  /// find similarity between calls.
408
  bool EnableMatchCallsByName = false;
409
 
410
  /// This allocator pointer is in charge of holding on to the IRInstructionData
411
  /// so it is not deallocated until whatever external tool is using it is done
412
  /// with the information.
413
  SpecificBumpPtrAllocator<IRInstructionData> *InstDataAllocator = nullptr;
414
 
415
  /// This allocator pointer is in charge of creating the IRInstructionDataList
416
  /// so it is not deallocated until whatever external tool is using it is done
417
  /// with the information.
418
  SpecificBumpPtrAllocator<IRInstructionDataList> *IDLAllocator = nullptr;
419
 
420
  /// Get an allocated IRInstructionData struct using the InstDataAllocator.
421
  ///
422
  /// \param I - The Instruction to wrap with IRInstructionData.
423
  /// \param Legality - A boolean value that is true if the instruction is to
424
  /// be considered for similarity, and false if not.
425
  /// \param IDL - The InstructionDataList that the IRInstructionData is
426
  /// inserted into.
427
  /// \returns An allocated IRInstructionData struct.
428
  IRInstructionData *allocateIRInstructionData(Instruction &I, bool Legality,
429
                                               IRInstructionDataList &IDL);
430
 
431
  /// Get an empty allocated IRInstructionData struct using the
432
  /// InstDataAllocator.
433
  ///
434
  /// \param IDL - The InstructionDataList that the IRInstructionData is
435
  /// inserted into.
436
  /// \returns An allocated IRInstructionData struct.
437
  IRInstructionData *allocateIRInstructionData(IRInstructionDataList &IDL);
438
 
439
  /// Get an allocated IRInstructionDataList object using the IDLAllocator.
440
  ///
441
  /// \returns An allocated IRInstructionDataList object.
442
  IRInstructionDataList *allocateIRInstructionDataList();
443
 
444
  IRInstructionDataList *IDL = nullptr;
445
 
446
  /// Assigns values to all the basic blocks in function \p F starting from
447
  /// integer \p BBNumber.
448
  ///
449
  /// \param F - The function containing the basic blocks to assign numbers to.
450
  /// \param BBNumber - The number to start from.
451
  void initializeForBBs(Function &F, unsigned &BBNumber) {
452
    for (BasicBlock &BB : F)
453
      BasicBlockToInteger.insert(std::make_pair(&BB, BBNumber++));
454
  }
455
 
456
  /// Assigns values to all the basic blocks in Module \p M.
457
  /// \param M - The module containing the basic blocks to assign numbers to.
458
  void initializeForBBs(Module &M) {
459
    unsigned BBNumber = 0;
460
    for (Function &F : M)
461
      initializeForBBs(F, BBNumber);
462
  }
463
 
464
  /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers
465
  /// determined by \p InstrType. Two Instructions are mapped to the same value
466
  /// if they are close as defined by the InstructionData class above.
467
  ///
468
  /// \param [in] BB - The BasicBlock to be mapped to integers.
469
  /// \param [in,out] InstrList - Vector of IRInstructionData to append to.
470
  /// \param [in,out] IntegerMapping - Vector of unsigned integers to append to.
471
  void convertToUnsignedVec(BasicBlock &BB,
472
                            std::vector<IRInstructionData *> &InstrList,
473
                            std::vector<unsigned> &IntegerMapping);
474
 
475
  /// Maps an Instruction to a legal integer.
476
  ///
477
  /// \param [in] It - The Instruction to be mapped to an integer.
478
  /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to
479
  /// append to.
480
  /// \param [in,out] InstrListForBB - Vector of InstructionData to append to.
481
  /// \returns The integer \p It was mapped to.
482
  unsigned mapToLegalUnsigned(BasicBlock::iterator &It,
483
                              std::vector<unsigned> &IntegerMappingForBB,
484
                              std::vector<IRInstructionData *> &InstrListForBB);
485
 
486
  /// Maps an Instruction to an illegal integer.
487
  ///
488
  /// \param [in] It - The \p Instruction to be mapped to an integer.
489
  /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to
490
  /// append to.
491
  /// \param [in,out] InstrListForBB - Vector of IRInstructionData to append to.
492
  /// \param End - true if creating a dummy IRInstructionData at the end of a
493
  /// basic block.
494
  /// \returns The integer \p It was mapped to.
495
  unsigned mapToIllegalUnsigned(
496
      BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
497
      std::vector<IRInstructionData *> &InstrListForBB, bool End = false);
498
 
499
  IRInstructionMapper(SpecificBumpPtrAllocator<IRInstructionData> *IDA,
500
                      SpecificBumpPtrAllocator<IRInstructionDataList> *IDLA)
501
      : InstDataAllocator(IDA), IDLAllocator(IDLA) {
502
    // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
503
    // changed.
504
    assert(DenseMapInfo<unsigned>::getEmptyKey() == static_cast<unsigned>(-1) &&
505
           "DenseMapInfo<unsigned>'s empty key isn't -1!");
506
    assert(DenseMapInfo<unsigned>::getTombstoneKey() ==
507
               static_cast<unsigned>(-2) &&
508
           "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
509
 
510
    IDL = new (IDLAllocator->Allocate())
511
        IRInstructionDataList();
512
  }
513
 
514
  /// Custom InstVisitor to classify different instructions for whether it can
515
  /// be analyzed for similarity.
516
  struct InstructionClassification
517
      : public InstVisitor<InstructionClassification, InstrType> {
518
    InstructionClassification() = default;
519
 
520
    // TODO: Determine a scheme to resolve when the label is similar enough.
521
    InstrType visitBranchInst(BranchInst &BI) {
522
      if (EnableBranches)
523
        return Legal;
524
      return Illegal;
525
    }
526
    InstrType visitPHINode(PHINode &PN) {
527
      if (EnableBranches)
528
        return Legal;
529
      return Illegal;
530
    }
531
    // TODO: Handle allocas.
532
    InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; }
533
    // We exclude variable argument instructions since variable arguments
534
    // requires extra checking of the argument list.
535
    InstrType visitVAArgInst(VAArgInst &VI) { return Illegal; }
536
    // We exclude all exception handling cases since they are so context
537
    // dependent.
538
    InstrType visitLandingPadInst(LandingPadInst &LPI) { return Illegal; }
539
    InstrType visitFuncletPadInst(FuncletPadInst &FPI) { return Illegal; }
540
    // DebugInfo should be included in the regions, but should not be
541
    // analyzed for similarity as it has no bearing on the outcome of the
542
    // program.
543
    InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; }
544
    InstrType visitIntrinsicInst(IntrinsicInst &II) {
545
      // These are disabled due to complications in the CodeExtractor when
546
      // outlining these instructions.  For instance, It is unclear what we
547
      // should do when moving only the start or end lifetime instruction into
548
      // an outlined function. Also, assume-like intrinsics could be removed
549
      // from the region, removing arguments, causing discrepencies in the
550
      // number of inputs between different regions.
551
      if (II.isAssumeLikeIntrinsic())
552
        return Illegal;
553
      return EnableIntrinsics ? Legal : Illegal;
554
    }
555
    // We only allow call instructions where the function has a name and
556
    // is not an indirect call.
557
    InstrType visitCallInst(CallInst &CI) {
558
      Function *F = CI.getCalledFunction();
559
      bool IsIndirectCall = CI.isIndirectCall();
560
      if (IsIndirectCall && !EnableIndirectCalls)
561
        return Illegal;
562
      if (!F && !IsIndirectCall)
563
        return Illegal;
564
      // Functions marked with the swifttailcc and tailcc calling conventions
565
      // require special handling when outlining musttail functions.  The
566
      // calling convention must be passed down to the outlined function as
567
      // well. Further, there is special handling for musttail calls as well,
568
      // requiring a return call directly after.  For now, the outliner does not
569
      // support this, so we do not handle matching this case either.
570
      if ((CI.getCallingConv() == CallingConv::SwiftTail ||
571
           CI.getCallingConv() == CallingConv::Tail) &&
572
          !EnableMustTailCalls)
573
        return Illegal;
574
      if (CI.isMustTailCall() && !EnableMustTailCalls)
575
        return Illegal;
576
      return Legal;
577
    }
578
    // TODO: We do not current handle similarity that changes the control flow.
579
    InstrType visitInvokeInst(InvokeInst &II) { return Illegal; }
580
    // TODO: We do not current handle similarity that changes the control flow.
581
    InstrType visitCallBrInst(CallBrInst &CBI) { return Illegal; }
582
    // TODO: Handle interblock similarity.
583
    InstrType visitTerminator(Instruction &I) { return Illegal; }
584
    InstrType visitInstruction(Instruction &I) { return Legal; }
585
 
586
    // The flag variable that lets the classifier know whether we should
587
    // allow branches to be checked for similarity.
588
    bool EnableBranches = false;
589
 
590
    // The flag variable that lets the classifier know whether we should
591
    // allow indirect calls to be considered legal instructions.
592
    bool EnableIndirectCalls = false;
593
 
594
    // Flag that lets the classifier know whether we should allow intrinsics to
595
    // be checked for similarity.
596
    bool EnableIntrinsics = false;
597
 
598
    // Flag that lets the classifier know whether we should allow tail calls to
599
    // be checked for similarity.
600
    bool EnableMustTailCalls = false;
601
  };
602
 
603
  /// Maps an Instruction to a member of InstrType.
604
  InstructionClassification InstClassifier;
605
};
606
 
607
/// This is a class that wraps a range of IRInstructionData from one point to
608
/// another in the vector of IRInstructionData, which is a region of the
609
/// program.  It is also responsible for defining the structure within this
610
/// region of instructions.
611
///
612
/// The structure of a region is defined through a value numbering system
613
/// assigned to each unique value in a region at the creation of the
614
/// IRSimilarityCandidate.
615
///
616
/// For example, for each Instruction we add a mapping for each new
617
/// value seen in that Instruction.
618
/// IR:                    Mapping Added:
619
/// %add1 = add i32 %a, c1    %add1 -> 3, %a -> 1, c1 -> 2
620
/// %add2 = add i32 %a, %1    %add2 -> 4
621
/// %add3 = add i32 c2, c1    %add3 -> 6, c2 -> 5
622
///
623
/// We can compare IRSimilarityCandidates against one another.
624
/// The \ref isSimilar function compares each IRInstructionData against one
625
/// another and if we have the same sequences of IRInstructionData that would
626
/// create the same hash, we have similar IRSimilarityCandidates.
627
///
628
/// We can also compare the structure of IRSimilarityCandidates. If we can
629
/// create a mapping of registers in the region contained by one
630
/// IRSimilarityCandidate to the region contained by different
631
/// IRSimilarityCandidate, they can be considered structurally similar.
632
///
633
/// IRSimilarityCandidate1:   IRSimilarityCandidate2:
634
/// %add1 = add i32 %a, %b    %add1 = add i32 %d, %e
635
/// %add2 = add i32 %a, %c    %add2 = add i32 %d, %f
636
/// %add3 = add i32 c1, c2    %add3 = add i32 c3, c4
637
///
638
/// Can have the following mapping from candidate to candidate of:
639
/// %a -> %d, %b -> %e, %c -> %f, c1 -> c3, c2 -> c4
640
/// and can be considered similar.
641
///
642
/// IRSimilarityCandidate1:   IRSimilarityCandidate2:
643
/// %add1 = add i32 %a, %b    %add1 = add i32 %d, c4
644
/// %add2 = add i32 %a, %c    %add2 = add i32 %d, %f
645
/// %add3 = add i32 c1, c2    %add3 = add i32 c3, c4
646
///
647
/// We cannot create the same mapping since the use of c4 is not used in the
648
/// same way as %b or c2.
649
class IRSimilarityCandidate {
650
private:
651
  /// The start index of this IRSimilarityCandidate in the instruction list.
652
  unsigned StartIdx = 0;
653
 
654
  /// The number of instructions in this IRSimilarityCandidate.
655
  unsigned Len = 0;
656
 
657
  /// The first instruction in this IRSimilarityCandidate.
658
  IRInstructionData *FirstInst = nullptr;
659
 
660
  /// The last instruction in this IRSimilarityCandidate.
661
  IRInstructionData *LastInst = nullptr;
662
 
663
  /// Global Value Numbering structures
664
  /// @{
665
  /// Stores the mapping of the value to the number assigned to it in the
666
  /// IRSimilarityCandidate.
667
  DenseMap<Value *, unsigned> ValueToNumber;
668
  /// Stores the mapping of the number to the value assigned this number.
669
  DenseMap<unsigned, Value *> NumberToValue;
670
  /// Stores the mapping of a value's number to canonical numbering in the
671
  /// candidate's respective similarity group.
672
  DenseMap<unsigned, unsigned> NumberToCanonNum;
673
  /// Stores the mapping of canonical number in the candidate's respective
674
  /// similarity group to a value number.
675
  DenseMap<unsigned, unsigned> CanonNumToNumber;
676
  /// @}
677
 
678
public:
679
  /// \param StartIdx - The starting location of the region.
680
  /// \param Len - The length of the region.
681
  /// \param FirstInstIt - The starting IRInstructionData of the region.
682
  /// \param LastInstIt - The ending IRInstructionData of the region.
683
  IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
684
                        IRInstructionData *FirstInstIt,
685
                        IRInstructionData *LastInstIt);
686
 
687
  /// \param A - The first IRInstructionCandidate to compare.
688
  /// \param B - The second IRInstructionCandidate to compare.
689
  /// \returns True when every IRInstructionData in \p A is similar to every
690
  /// IRInstructionData in \p B.
691
  static bool isSimilar(const IRSimilarityCandidate &A,
692
                        const IRSimilarityCandidate &B);
693
 
694
  /// \param [in] A - The first IRInstructionCandidate to compare.
695
  /// \param [in] B - The second IRInstructionCandidate to compare.
696
  /// \returns True when every IRInstructionData in \p A is structurally similar
697
  /// to \p B.
698
  static bool compareStructure(const IRSimilarityCandidate &A,
699
                               const IRSimilarityCandidate &B);
700
 
701
  /// \param [in] A - The first IRInstructionCandidate to compare.
702
  /// \param [in] B - The second IRInstructionCandidate to compare.
703
  /// \param [in,out] ValueNumberMappingA - A mapping of value numbers from
704
  /// candidate \p A to candidate \B.
705
  /// \param [in,out] ValueNumberMappingB - A mapping of value numbers from
706
  /// candidate \p B to candidate \A.
707
  /// \returns True when every IRInstructionData in \p A is structurally similar
708
  /// to \p B.
709
  static bool
710
  compareStructure(const IRSimilarityCandidate &A,
711
                   const IRSimilarityCandidate &B,
712
                   DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
713
                   DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB);
714
 
715
  struct OperandMapping {
716
    /// The IRSimilarityCandidate that holds the instruction the OperVals were
717
    /// pulled from.
718
    const IRSimilarityCandidate &IRSC;
719
 
720
    /// The operand values to be analyzed.
721
    ArrayRef<Value *> &OperVals;
722
 
723
    /// The current mapping of global value numbers from one IRSimilarityCandidate
724
    /// to another IRSimilarityCandidate.
725
    DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMapping;
726
  };
727
 
728
  /// A helper struct to hold the candidate, for a branch instruction, the
729
  /// relative location of a label, and the label itself.  This is mostly to
730
  /// group the values together before passing them as a bundle to a function.
731
  struct RelativeLocMapping {
732
    /// The IRSimilarityCandidate that holds the instruction the relative
733
    /// location was pulled from.
734
    const IRSimilarityCandidate &IRSC;
735
 
736
    /// The relative location to be analyzed.
737
    int RelativeLocation;
738
 
739
    /// The corresponding value.
740
    Value *OperVal;
741
  };
742
 
743
  /// Compare the operands in \p A and \p B and check that the current mapping
744
  /// of global value numbers from \p A to \p B and \p B to \A is consistent.
745
  ///
746
  /// \param A - The first IRInstructionCandidate, operand values, and current
747
  /// operand mappings to compare.
748
  /// \param B - The second IRInstructionCandidate, operand values, and current
749
  /// operand mappings to compare.
750
  /// \returns true if the IRSimilarityCandidates operands are compatible.
751
  static bool compareNonCommutativeOperandMapping(OperandMapping A,
752
                                                  OperandMapping B);
753
 
754
  /// Compare the operands in \p A and \p B and check that the current mapping
755
  /// of global value numbers from \p A to \p B and \p B to \A is consistent
756
  /// given that the operands are commutative.
757
  ///
758
  /// \param A - The first IRInstructionCandidate, operand values, and current
759
  /// operand mappings to compare.
760
  /// \param B - The second IRInstructionCandidate, operand values, and current
761
  /// operand mappings to compare.
762
  /// \returns true if the IRSimilarityCandidates operands are compatible.
763
  static bool compareCommutativeOperandMapping(OperandMapping A,
764
                                               OperandMapping B);
765
 
766
  /// Compare the relative locations in \p A and \p B and check that the
767
  /// distances match if both locations are contained in the region, and that
768
  /// the branches both point outside the region if they do not.
769
  /// Example Region:
770
  /// \code
771
  /// entry:
772
  ///   br i1 %0, label %block_1, label %block_3
773
  /// block_0:
774
  ///   br i1 %0, label %block_1, label %block_2
775
  /// block_1:
776
  ///   br i1 %0, label %block_2, label %block_3
777
  /// block_2:
778
  ///   br i1 %1, label %block_1, label %block_4
779
  /// block_3:
780
  ///   br i1 %2, label %block_2, label %block_5
781
  /// \endcode
782
  /// If we compare the branches in block_0 and block_1 the relative values are
783
  /// 1 and 2 for both, so we consider this a match.
784
  ///
785
  /// If we compare the branches in entry and block_0 the relative values are
786
  /// 2 and 3, and 1 and 2 respectively.  Since these are not the same we do not
787
  /// consider them a match.
788
  ///
789
  /// If we compare the branches in block_1 and block_2 the relative values are
790
  /// 1 and 2, and -1 and None respectively.  As a result we do not consider
791
  /// these to be the same
792
  ///
793
  /// If we compare the branches in block_2 and block_3 the relative values are
794
  /// -1 and None for both.  We do consider these to be a match.
795
  ///
796
  /// \param A - The first IRInstructionCandidate, relative location value,
797
  /// and incoming block.
798
  /// \param B - The second IRInstructionCandidate, relative location value,
799
  /// and incoming block.
800
  /// \returns true if the relative locations match.
801
  static bool checkRelativeLocations(RelativeLocMapping A,
802
                                     RelativeLocMapping B);
803
 
804
  /// Create a mapping from the value numbering to a different separate set of
805
  /// numbers. This will serve as a guide for relating one candidate to another.
806
  /// The canonical number gives use the ability identify which global value
807
  /// number in one candidate relates to the global value number in the other.
808
  ///
809
  /// \param [in, out] CurrCand - The IRSimilarityCandidate to create a
810
  /// canonical numbering for.
811
  static void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand);
812
 
813
  /// Create a mapping for the value numbering of the calling
814
  /// IRSimilarityCandidate, to a different separate set of numbers, based on
815
  /// the canonical ordering in \p SourceCand. These are defined based on the
816
  /// found mappings in \p ToSourceMapping and \p FromSourceMapping.  Both of
817
  /// these relationships should have the same information, just in opposite
818
  /// directions.
819
  ///
820
  /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a
821
  /// canonical numbering from.
822
  /// \param ToSourceMapping - The mapping of value numbers from this candidate
823
  /// to \p SourceCand.
824
  /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand
825
  /// to this candidate.
826
  void createCanonicalRelationFrom(
827
      IRSimilarityCandidate &SourceCand,
828
      DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
829
      DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping);
830
 
831
  /// \param [in,out] BBSet - The set to track the basic blocks.
832
  void getBasicBlocks(DenseSet<BasicBlock *> &BBSet) const {
833
    for (IRInstructionData &ID : *this) {
834
      BasicBlock *BB = ID.Inst->getParent();
835
      BBSet.insert(BB);
836
    }
837
  }
838
 
839
  /// \param [in,out] BBSet - The set to track the basic blocks.
840
  /// \param [in,out] BBList - A list in order of use to track the basic blocks.
841
  void getBasicBlocks(DenseSet<BasicBlock *> &BBSet,
842
                      SmallVector<BasicBlock *> &BBList) const {
843
    for (IRInstructionData &ID : *this) {
844
      BasicBlock *BB = ID.Inst->getParent();
845
      if (BBSet.insert(BB).second)
846
        BBList.push_back(BB);
847
    }
848
  }
849
 
850
  /// Compare the start and end indices of the two IRSimilarityCandidates for
851
  /// whether they overlap. If the start instruction of one
852
  /// IRSimilarityCandidate is less than the end instruction of the other, and
853
  /// the start instruction of one is greater than the start instruction of the
854
  /// other, they overlap.
855
  ///
856
  /// \returns true if the IRSimilarityCandidates do not have overlapping
857
  /// instructions.
858
  static bool overlap(const IRSimilarityCandidate &A,
859
                      const IRSimilarityCandidate &B);
860
 
861
  /// \returns the number of instructions in this Candidate.
862
  unsigned getLength() const { return Len; }
863
 
864
  /// \returns the start index of this IRSimilarityCandidate.
865
  unsigned getStartIdx() const { return StartIdx; }
866
 
867
  /// \returns the end index of this IRSimilarityCandidate.
868
  unsigned getEndIdx() const { return StartIdx + Len - 1; }
869
 
870
  /// \returns The first IRInstructionData.
871
  IRInstructionData *front() const { return FirstInst; }
872
  /// \returns The last IRInstructionData.
873
  IRInstructionData *back() const { return LastInst; }
874
 
875
  /// \returns The first Instruction.
876
  Instruction *frontInstruction() { return FirstInst->Inst; }
877
  /// \returns The last Instruction
878
  Instruction *backInstruction() { return LastInst->Inst; }
879
 
880
  /// \returns The BasicBlock the IRSimilarityCandidate starts in.
881
  BasicBlock *getStartBB() { return FirstInst->Inst->getParent(); }
882
  /// \returns The BasicBlock the IRSimilarityCandidate ends in.
883
  BasicBlock *getEndBB() { return LastInst->Inst->getParent(); }
884
 
885
  /// \returns The Function that the IRSimilarityCandidate is located in.
886
  Function *getFunction() { return getStartBB()->getParent(); }
887
 
888
  /// Finds the positive number associated with \p V if it has been mapped.
889
  /// \param [in] V - the Value to find.
890
  /// \returns The positive number corresponding to the value.
891
  /// \returns std::nullopt if not present.
892
  std::optional<unsigned> getGVN(Value *V) {
893
    assert(V != nullptr && "Value is a nullptr?");
894
    DenseMap<Value *, unsigned>::iterator VNIt = ValueToNumber.find(V);
895
    if (VNIt == ValueToNumber.end())
896
      return std::nullopt;
897
    return VNIt->second;
898
  }
899
 
900
  /// Finds the Value associate with \p Num if it exists.
901
  /// \param [in] Num - the number to find.
902
  /// \returns The Value associated with the number.
903
  /// \returns std::nullopt if not present.
904
  std::optional<Value *> fromGVN(unsigned Num) {
905
    DenseMap<unsigned, Value *>::iterator VNIt = NumberToValue.find(Num);
906
    if (VNIt == NumberToValue.end())
907
      return std::nullopt;
908
    assert(VNIt->second != nullptr && "Found value is a nullptr!");
909
    return VNIt->second;
910
  }
911
 
912
  /// Find the canonical number from the global value number \p N stored in the
913
  /// candidate.
914
  ///
915
  /// \param N - The global value number to find the canonical number for.
916
  /// \returns An optional containing the value, and std::nullopt if it could
917
  /// not be found.
918
  std::optional<unsigned> getCanonicalNum(unsigned N) {
919
    DenseMap<unsigned, unsigned>::iterator NCIt = NumberToCanonNum.find(N);
920
    if (NCIt == NumberToCanonNum.end())
921
      return std::nullopt;
922
    return NCIt->second;
923
  }
924
 
925
  /// Find the global value number from the canonical number \p N stored in the
926
  /// candidate.
927
  ///
928
  /// \param N - The canonical number to find the global vlaue number for.
929
  /// \returns An optional containing the value, and std::nullopt if it could
930
  /// not be found.
931
  std::optional<unsigned> fromCanonicalNum(unsigned N) {
932
    DenseMap<unsigned, unsigned>::iterator CNIt = CanonNumToNumber.find(N);
933
    if (CNIt == CanonNumToNumber.end())
934
      return std::nullopt;
935
    return CNIt->second;
936
  }
937
 
938
  /// \param RHS -The IRSimilarityCandidate to compare against
939
  /// \returns true if the IRSimilarityCandidate is occurs after the
940
  /// IRSimilarityCandidate in the program.
941
  bool operator<(const IRSimilarityCandidate &RHS) const {
942
    return getStartIdx() > RHS.getStartIdx();
943
  }
944
 
945
  using iterator = IRInstructionDataList::iterator;
946
  iterator begin() const { return iterator(front()); }
947
  iterator end() const { return std::next(iterator(back())); }
948
};
949
 
950
typedef DenseMap<IRSimilarityCandidate *,
951
                 DenseMap<unsigned, DenseSet<unsigned>>>
952
    CandidateGVNMapping;
953
typedef std::vector<IRSimilarityCandidate> SimilarityGroup;
954
typedef std::vector<SimilarityGroup> SimilarityGroupList;
955
 
956
/// This class puts all the pieces of the IRInstructionData,
957
/// IRInstructionMapper, IRSimilarityCandidate together.
958
///
959
/// It first feeds the Module or vector of Modules into the IRInstructionMapper,
960
/// and puts all the mapped instructions into a single long list of
961
/// IRInstructionData.
962
///
963
/// The list of unsigned integers is given to the Suffix Tree or similar data
964
/// structure to find repeated subsequences.  We construct an
965
/// IRSimilarityCandidate for each instance of the subsequence.  We compare them
966
/// against one another since  These repeated subsequences can have different
967
/// structure.  For each different kind of structure found, we create a
968
/// similarity group.
969
///
970
/// If we had four IRSimilarityCandidates A, B, C, and D where A, B and D are
971
/// structurally similar to one another, while C is different we would have two
972
/// SimilarityGroups:
973
///
974
/// SimilarityGroup 1:  SimilarityGroup 2
975
/// A, B, D             C
976
///
977
/// A list of the different similarity groups is then returned after
978
/// analyzing the module.
979
class IRSimilarityIdentifier {
980
public:
981
  IRSimilarityIdentifier(bool MatchBranches = true,
982
                         bool MatchIndirectCalls = true,
983
                         bool MatchCallsWithName = false,
984
                         bool MatchIntrinsics = true,
985
                         bool MatchMustTailCalls = true)
986
      : Mapper(&InstDataAllocator, &InstDataListAllocator),
987
        EnableBranches(MatchBranches), EnableIndirectCalls(MatchIndirectCalls),
988
        EnableMatchingCallsByName(MatchCallsWithName),
989
        EnableIntrinsics(MatchIntrinsics),
990
        EnableMustTailCalls(MatchMustTailCalls) {}
991
 
992
private:
993
  /// Map the instructions in the module to unsigned integers, using mapping
994
  /// already present in the Mapper if possible.
995
  ///
996
  /// \param [in] M Module - To map to integers.
997
  /// \param [in,out] InstrList - The vector to append IRInstructionData to.
998
  /// \param [in,out] IntegerMapping - The vector to append integers to.
999
  void populateMapper(Module &M, std::vector<IRInstructionData *> &InstrList,
1000
                      std::vector<unsigned> &IntegerMapping);
1001
 
1002
  /// Map the instructions in the modules vector to unsigned integers, using
1003
  /// mapping already present in the mapper if possible.
1004
  ///
1005
  /// \param [in] Modules - The list of modules to use to populate the mapper
1006
  /// \param [in,out] InstrList - The vector to append IRInstructionData to.
1007
  /// \param [in,out] IntegerMapping - The vector to append integers to.
1008
  void populateMapper(ArrayRef<std::unique_ptr<Module>> &Modules,
1009
                      std::vector<IRInstructionData *> &InstrList,
1010
                      std::vector<unsigned> &IntegerMapping);
1011
 
1012
  /// Find the similarity candidates in \p InstrList and corresponding
1013
  /// \p UnsignedVec
1014
  ///
1015
  /// \param [in,out] InstrList - The vector to append IRInstructionData to.
1016
  /// \param [in,out] IntegerMapping - The vector to append integers to.
1017
  /// candidates found in the program.
1018
  void findCandidates(std::vector<IRInstructionData *> &InstrList,
1019
                      std::vector<unsigned> &IntegerMapping);
1020
 
1021
public:
1022
  // Find the IRSimilarityCandidates in the \p Modules and group by structural
1023
  // similarity in a SimilarityGroup, each group is returned in a
1024
  // SimilarityGroupList.
1025
  //
1026
  // \param [in] Modules - the modules to analyze.
1027
  // \returns The groups of similarity ranges found in the modules.
1028
  SimilarityGroupList &
1029
  findSimilarity(ArrayRef<std::unique_ptr<Module>> Modules);
1030
 
1031
  // Find the IRSimilarityCandidates in the given Module grouped by structural
1032
  // similarity in a SimilarityGroup, contained inside a SimilarityGroupList.
1033
  //
1034
  // \param [in] M - the module to analyze.
1035
  // \returns The groups of similarity ranges found in the module.
1036
  SimilarityGroupList &findSimilarity(Module &M);
1037
 
1038
  // Clears \ref SimilarityCandidates if it is already filled by a previous run.
1039
  void resetSimilarityCandidates() {
1040
    // If we've already analyzed a Module or set of Modules, so we must clear
1041
    // the SimilarityCandidates to make sure we do not have only old values
1042
    // hanging around.
1043
    if (SimilarityCandidates)
1044
      SimilarityCandidates->clear();
1045
    else
1046
      SimilarityCandidates = SimilarityGroupList();
1047
  }
1048
 
1049
  // \returns The groups of similarity ranges found in the most recently passed
1050
  // set of modules.
1051
  std::optional<SimilarityGroupList> &getSimilarity() {
1052
    return SimilarityCandidates;
1053
  }
1054
 
1055
private:
1056
  /// The allocator for IRInstructionData.
1057
  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
1058
 
1059
  /// The allocator for IRInstructionDataLists.
1060
  SpecificBumpPtrAllocator<IRInstructionDataList> InstDataListAllocator;
1061
 
1062
  /// Map Instructions to unsigned integers and wraps the Instruction in an
1063
  /// instance of IRInstructionData.
1064
  IRInstructionMapper Mapper;
1065
 
1066
  /// The flag variable that marks whether we should check branches for
1067
  /// similarity, or only look within basic blocks.
1068
  bool EnableBranches = true;
1069
 
1070
  /// The flag variable that marks whether we allow indirect calls to be checked
1071
  /// for similarity, or exclude them as a legal instruction.
1072
  bool EnableIndirectCalls = true;
1073
 
1074
  /// The flag variable that marks whether we allow calls to be marked as
1075
  /// similar if they do not have the same name, only the same calling
1076
  /// convention, attributes and type signature.
1077
  bool EnableMatchingCallsByName = true;
1078
 
1079
  /// The flag variable that marks whether we should check intrinsics for
1080
  /// similarity.
1081
  bool EnableIntrinsics = true;
1082
 
1083
  // The flag variable that marks whether we should allow tailcalls
1084
  // to be checked for similarity.
1085
  bool EnableMustTailCalls = false;
1086
 
1087
  /// The SimilarityGroups found with the most recent run of \ref
1088
  /// findSimilarity. std::nullopt if there is no recent run.
1089
  std::optional<SimilarityGroupList> SimilarityCandidates;
1090
};
1091
 
1092
} // end namespace IRSimilarity
1093
 
1094
/// An analysis pass based on legacy pass manager that runs and returns
1095
/// IRSimilarityIdentifier run on the Module.
1096
class IRSimilarityIdentifierWrapperPass : public ModulePass {
1097
  std::unique_ptr<IRSimilarity::IRSimilarityIdentifier> IRSI;
1098
 
1099
public:
1100
  static char ID;
1101
  IRSimilarityIdentifierWrapperPass();
1102
 
1103
  IRSimilarity::IRSimilarityIdentifier &getIRSI() { return *IRSI; }
1104
  const IRSimilarity::IRSimilarityIdentifier &getIRSI() const { return *IRSI; }
1105
 
1106
  bool doInitialization(Module &M) override;
1107
  bool doFinalization(Module &M) override;
1108
  bool runOnModule(Module &M) override;
1109
  void getAnalysisUsage(AnalysisUsage &AU) const override {
1110
    AU.setPreservesAll();
1111
  }
1112
};
1113
 
1114
/// An analysis pass that runs and returns the IRSimilarityIdentifier run on the
1115
/// Module.
1116
class IRSimilarityAnalysis : public AnalysisInfoMixin<IRSimilarityAnalysis> {
1117
public:
1118
  typedef IRSimilarity::IRSimilarityIdentifier Result;
1119
 
1120
  Result run(Module &M, ModuleAnalysisManager &);
1121
 
1122
private:
1123
  friend AnalysisInfoMixin<IRSimilarityAnalysis>;
1124
  static AnalysisKey Key;
1125
};
1126
 
1127
/// Printer pass that uses \c IRSimilarityAnalysis.
1128
class IRSimilarityAnalysisPrinterPass
1129
    : public PassInfoMixin<IRSimilarityAnalysisPrinterPass> {
1130
  raw_ostream &OS;
1131
 
1132
public:
1133
  explicit IRSimilarityAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
1134
  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
1135
};
1136
 
1137
} // end namespace llvm
1138
 
1139
#endif // LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H