Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. //===- IRSimilarityIdentifier.h - Find similarity in a module --------------==//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // \file
  10. // Interface file for the IRSimilarityIdentifier for identifying similarities in
  11. // IR including the IRInstructionMapper, which maps an Instruction to unsigned
  12. // integers.
  13. //
  14. // Two sequences of instructions are called "similar" if they perform the same
  15. // series of operations for all inputs.
  16. //
  17. // \code
  18. // %1 = add i32 %a, 10
  19. // %2 = add i32 %a, %1
  20. // %3 = icmp slt icmp %1, %2
  21. // \endcode
  22. //
  23. // and
  24. //
  25. // \code
  26. // %1 = add i32 11, %a
  27. // %2 = sub i32 %a, %1
  28. // %3 = icmp sgt icmp %2, %1
  29. // \endcode
  30. //
  31. // ultimately have the same result, even if the inputs, and structure are
  32. // slightly different.
  33. //
  34. // For instructions, we do not worry about operands that do not have fixed
  35. // semantic meaning to the program.  We consider the opcode that the instruction
  36. // has, the types, parameters, and extra information such as the function name,
  37. // or comparison predicate.  These are used to create a hash to map instructions
  38. // to integers to be used in similarity matching in sequences of instructions
  39. //
  40. // Terminology:
  41. // An IRSimilarityCandidate is a region of IRInstructionData (wrapped
  42. // Instructions), usually used to denote a region of similarity has been found.
  43. //
  44. // A SimilarityGroup is a set of IRSimilarityCandidates that are structurally
  45. // similar to one another.
  46. //
  47. //===----------------------------------------------------------------------===//
  48.  
  49. #ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
  50. #define LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
  51.  
  52. #include "llvm/IR/InstVisitor.h"
  53. #include "llvm/IR/Instructions.h"
  54. #include "llvm/IR/PassManager.h"
  55. #include "llvm/Pass.h"
  56. #include "llvm/Support/Allocator.h"
  57. #include <optional>
  58.  
  59. namespace llvm {
  60. class Module;
  61.  
  62. namespace IRSimilarity {
  63.  
  64. struct IRInstructionDataList;
  65.  
  66. /// This represents what is and is not supported when finding similarity in
  67. /// Instructions.
  68. ///
  69. /// Legal Instructions are considered when looking at similarity between
  70. /// Instructions.
  71. ///
  72. /// Illegal Instructions cannot be considered when looking for similarity
  73. /// between Instructions. They act as boundaries between similarity regions.
  74. ///
  75. /// Invisible Instructions are skipped over during analysis.
  76. // TODO: Shared with MachineOutliner
  77. enum InstrType { Legal, Illegal, Invisible };
  78.  
  79. /// This provides the utilities for hashing an Instruction to an unsigned
  80. /// integer. Two IRInstructionDatas produce the same hash value when their
  81. /// underlying Instructions perform the same operation (even if they don't have
  82. /// the same input operands.)
  83. /// As a more concrete example, consider the following:
  84. ///
  85. /// \code
  86. /// %add1 = add i32 %a, %b
  87. /// %add2 = add i32 %c, %d
  88. /// %add3 = add i64 %e, %f
  89. /// \endcode
  90. ///
  91. // Then the IRInstructionData wrappers for these Instructions may be hashed like
  92. /// so:
  93. ///
  94. /// \code
  95. /// ; These two adds have the same types and operand types, so they hash to the
  96. /// ; same number.
  97. /// %add1 = add i32 %a, %b ; Hash: 1
  98. /// %add2 = add i32 %c, %d ; Hash: 1
  99. /// ; This add produces an i64. This differentiates it from %add1 and %add2. So,
  100. /// ; it hashes to a different number.
  101. /// %add3 = add i64 %e, %f; Hash: 2
  102. /// \endcode
  103. ///
  104. ///
  105. /// This hashing scheme will be used to represent the program as a very long
  106. /// string. This string can then be placed in a data structure which can be used
  107. /// for similarity queries.
  108. ///
  109. /// TODO: Handle types of Instructions which can be equal even with different
  110. /// operands. (E.g. comparisons with swapped predicates.)
  111. /// TODO: Handle CallInsts, which are only checked for function type
  112. /// by \ref isSameOperationAs.
  113. /// TODO: Handle GetElementPtrInsts, as some of the operands have to be the
  114. /// exact same, and some do not.
  115. struct IRInstructionData
  116.     : ilist_node<IRInstructionData, ilist_sentinel_tracking<true>> {
  117.  
  118.   /// The source Instruction that is being wrapped.
  119.   Instruction *Inst = nullptr;
  120.   /// The values of the operands in the Instruction.
  121.   SmallVector<Value *, 4> OperVals;
  122.   /// The legality of the wrapped instruction. This is informed by InstrType,
  123.   /// and is used when checking when two instructions are considered similar.
  124.   /// If either instruction is not legal, the instructions are automatically not
  125.   /// considered similar.
  126.   bool Legal = false;
  127.  
  128.   /// This is only relevant if we are wrapping a CmpInst where we needed to
  129.   /// change the predicate of a compare instruction from a greater than form
  130.   /// to a less than form.  It is None otherwise.
  131.   std::optional<CmpInst::Predicate> RevisedPredicate;
  132.  
  133.   /// This is only relevant if we are wrapping a CallInst. If we are requiring
  134.   /// that the function calls have matching names as well as types, and the
  135.   /// call is not an indirect call, this will hold the name of the function.  If
  136.   /// it is an indirect string, it will be the empty string.  However, if this
  137.   /// requirement is not in place it will be the empty string regardless of the
  138.   /// function call type.  The value held here is used to create the hash of the
  139.   /// instruction, and check to make sure two instructions are close to one
  140.   /// another.
  141.   std::optional<std::string> CalleeName;
  142.  
  143.   /// This structure holds the distances of how far "ahead of" or "behind" the
  144.   /// target blocks of a branch, or the incoming blocks of a phi nodes are.
  145.   /// If the value is negative, it means that the block was registered before
  146.   /// the block of this instruction in terms of blocks in the function.
  147.   /// Code Example:
  148.   /// \code
  149.   /// block_1:
  150.   ///   br i1 %0, label %block_2, label %block_3
  151.   /// block_2:
  152.   ///   br i1 %1, label %block_1, label %block_2
  153.   /// block_3:
  154.   ///   br i1 %2, label %block_2, label %block_1
  155.   /// ; Replacing the labels with relative values, this becomes:
  156.   /// block_1:
  157.   ///   br i1 %0, distance 1, distance 2
  158.   /// block_2:
  159.   ///   br i1 %1, distance -1, distance 0
  160.   /// block_3:
  161.   ///   br i1 %2, distance -1, distance -2
  162.   /// \endcode
  163.   /// Taking block_2 as our example, block_1 is "behind" block_2, and block_2 is
  164.   /// "ahead" of block_2.
  165.   SmallVector<int, 4> RelativeBlockLocations;
  166.  
  167.   /// Gather the information that is difficult to gather for an Instruction, or
  168.   /// is changed. i.e. the operands of an Instruction and the Types of those
  169.   /// operands. This extra information allows for similarity matching to make
  170.   /// assertions that allow for more flexibility when checking for whether an
  171.   /// Instruction performs the same operation.
  172.   IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL);
  173.   IRInstructionData(IRInstructionDataList &IDL);
  174.  
  175.   /// Fills data stuctures for IRInstructionData when it is constructed from a
  176.   // reference or a pointer.
  177.   void initializeInstruction();
  178.  
  179.   /// Get the predicate that the compare instruction is using for hashing the
  180.   /// instruction. the IRInstructionData must be wrapping a CmpInst.
  181.   CmpInst::Predicate getPredicate() const;
  182.  
  183.   /// Get the callee name that the call instruction is using for hashing the
  184.   /// instruction. The IRInstructionData must be wrapping a CallInst.
  185.   StringRef getCalleeName() const;
  186.  
  187.   /// A function that swaps the predicates to their less than form if they are
  188.   /// in a greater than form. Otherwise, the predicate is unchanged.
  189.   ///
  190.   /// \param CI - The comparison operation to find a consistent preidcate for.
  191.   /// \return the consistent comparison predicate.
  192.   static CmpInst::Predicate predicateForConsistency(CmpInst *CI);
  193.  
  194.   /// For an IRInstructionData containing a branch, finds the
  195.   /// relative distances from the source basic block to the target by taking
  196.   /// the difference of the number assigned to the current basic block and the
  197.   /// target basic block of the branch.
  198.   ///
  199.   /// \param BasicBlockToInteger - The mapping of basic blocks to their location
  200.   /// in the module.
  201.   void
  202.   setBranchSuccessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger);
  203.  
  204.   /// For an IRInstructionData containing a CallInst, set the function name
  205.   /// appropriately.  This will be an empty string if it is an indirect call,
  206.   /// or we are not matching by name of the called function.  It will be the
  207.   /// name of the function if \p MatchByName is true and it is not an indirect
  208.   /// call.  We may decide not to match by name in order to expand the
  209.   /// size of the regions we can match.  If a function name has the same type
  210.   /// signature, but the different name, the region of code is still almost the
  211.   /// same.  Since function names can be treated as constants, the name itself
  212.   /// could be extrapolated away.  However, matching by name provides a
  213.   /// specificity and more "identical" code than not matching by name.
  214.   ///
  215.   /// \param MatchByName - A flag to mark whether we are using the called
  216.   /// function name as a differentiating parameter.
  217.   void setCalleeName(bool MatchByName = true);
  218.  
  219.   /// For an IRInstructionData containing a PHINode, finds the
  220.   /// relative distances from the incoming basic block to the current block by
  221.   /// taking the difference of the number assigned to the current basic block
  222.   /// and the incoming basic block of the branch.
  223.   ///
  224.   /// \param BasicBlockToInteger - The mapping of basic blocks to their location
  225.   /// in the module.
  226.   void
  227.   setPHIPredecessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger);
  228.  
  229.   /// Hashes \p Value based on its opcode, types, and operand types.
  230.   /// Two IRInstructionData instances produce the same hash when they perform
  231.   /// the same operation.
  232.   ///
  233.   /// As a simple example, consider the following instructions.
  234.   ///
  235.   /// \code
  236.   /// %add1 = add i32 %x1, %y1
  237.   /// %add2 = add i32 %x2, %y2
  238.   ///
  239.   /// %sub = sub i32 %x1, %y1
  240.   ///
  241.   /// %add_i64 = add i64 %x2, %y2
  242.   /// \endcode
  243.   ///
  244.   /// Because the first two adds operate the same types, and are performing the
  245.   /// same action, they will be hashed to the same value.
  246.   ///
  247.   /// However, the subtraction instruction is not the same as an addition, and
  248.   /// will be hashed to a different value.
  249.   ///
  250.   /// Finally, the last add has a different type compared to the first two add
  251.   /// instructions, so it will also be hashed to a different value that any of
  252.   /// the previous instructions.
  253.   ///
  254.   /// \param [in] ID - The IRInstructionData instance to be hashed.
  255.   /// \returns A hash_value of the IRInstructionData.
  256.   friend hash_code hash_value(const IRInstructionData &ID) {
  257.     SmallVector<Type *, 4> OperTypes;
  258.     for (Value *V : ID.OperVals)
  259.       OperTypes.push_back(V->getType());
  260.  
  261.     if (isa<CmpInst>(ID.Inst))
  262.       return llvm::hash_combine(
  263.           llvm::hash_value(ID.Inst->getOpcode()),
  264.           llvm::hash_value(ID.Inst->getType()),
  265.           llvm::hash_value(ID.getPredicate()),
  266.           llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
  267.  
  268.     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(ID.Inst)) {
  269.       // To hash intrinsics, we use the opcode, and types like the other
  270.       // instructions, but also, the Intrinsic ID, and the Name of the
  271.       // intrinsic.
  272.       Intrinsic::ID IntrinsicID = II->getIntrinsicID();
  273.       return llvm::hash_combine(
  274.           llvm::hash_value(ID.Inst->getOpcode()),
  275.           llvm::hash_value(ID.Inst->getType()), llvm::hash_value(IntrinsicID),
  276.           llvm::hash_value(*ID.CalleeName),
  277.           llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
  278.     }
  279.  
  280.     if (isa<CallInst>(ID.Inst)) {
  281.       std::string FunctionName = *ID.CalleeName;
  282.       return llvm::hash_combine(
  283.           llvm::hash_value(ID.Inst->getOpcode()),
  284.           llvm::hash_value(ID.Inst->getType()),
  285.           llvm::hash_value(ID.Inst->getType()), llvm::hash_value(FunctionName),
  286.           llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
  287.     }
  288.  
  289.     return llvm::hash_combine(
  290.         llvm::hash_value(ID.Inst->getOpcode()),
  291.         llvm::hash_value(ID.Inst->getType()),
  292.         llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
  293.   }
  294.  
  295.   IRInstructionDataList *IDL = nullptr;
  296. };
  297.  
  298. struct IRInstructionDataList
  299.     : simple_ilist<IRInstructionData, ilist_sentinel_tracking<true>> {};
  300.  
  301. /// Compare one IRInstructionData class to another IRInstructionData class for
  302. /// whether they are performing a the same operation, and can mapped to the
  303. /// same value. For regular instructions if the hash value is the same, then
  304. /// they will also be close.
  305. ///
  306. /// \param A - The first IRInstructionData class to compare
  307. /// \param B - The second IRInstructionData class to compare
  308. /// \returns true if \p A and \p B are similar enough to be mapped to the same
  309. /// value.
  310. bool isClose(const IRInstructionData &A, const IRInstructionData &B);
  311.  
  312. struct IRInstructionDataTraits : DenseMapInfo<IRInstructionData *> {
  313.   static inline IRInstructionData *getEmptyKey() { return nullptr; }
  314.   static inline IRInstructionData *getTombstoneKey() {
  315.     return reinterpret_cast<IRInstructionData *>(-1);
  316.   }
  317.  
  318.   static unsigned getHashValue(const IRInstructionData *E) {
  319.     using llvm::hash_value;
  320.     assert(E && "IRInstructionData is a nullptr?");
  321.     return hash_value(*E);
  322.   }
  323.  
  324.   static bool isEqual(const IRInstructionData *LHS,
  325.                       const IRInstructionData *RHS) {
  326.     if (RHS == getEmptyKey() || RHS == getTombstoneKey() ||
  327.         LHS == getEmptyKey() || LHS == getTombstoneKey())
  328.       return LHS == RHS;
  329.  
  330.     assert(LHS && RHS && "nullptr should have been caught by getEmptyKey?");
  331.     return isClose(*LHS, *RHS);
  332.   }
  333. };
  334.  
  335. /// Helper struct for converting the Instructions in a Module into a vector of
  336. /// unsigned integers. This vector of unsigned integers can be thought of as a
  337. /// "numeric string". This numeric string can then be queried by, for example,
  338. /// data structures that find repeated substrings.
  339. ///
  340. /// This hashing is done per BasicBlock in the module. To hash Instructions
  341. /// based off of their operations, each Instruction is wrapped in an
  342. /// IRInstructionData struct. The unsigned integer for an IRInstructionData
  343. /// depends on:
  344. /// - The hash provided by the IRInstructionData.
  345. /// - Which member of InstrType the IRInstructionData is classified as.
  346. // See InstrType for more details on the possible classifications, and how they
  347. // manifest in the numeric string.
  348. ///
  349. /// The numeric string for an individual BasicBlock is terminated by an unique
  350. /// unsigned integer. This prevents data structures which rely on repetition
  351. /// from matching across BasicBlocks. (For example, the SuffixTree.)
  352. /// As a concrete example, if we have the following two BasicBlocks:
  353. /// \code
  354. /// bb0:
  355. /// %add1 = add i32 %a, %b
  356. /// %add2 = add i32 %c, %d
  357. /// %add3 = add i64 %e, %f
  358. /// bb1:
  359. /// %sub = sub i32 %c, %d
  360. /// \endcode
  361. /// We may hash the Instructions like this (via IRInstructionData):
  362. /// \code
  363. /// bb0:
  364. /// %add1 = add i32 %a, %b ; Hash: 1
  365. /// %add2 = add i32 %c, %d; Hash: 1
  366. /// %add3 = add i64 %e, %f; Hash: 2
  367. /// bb1:
  368. /// %sub = sub i32 %c, %d; Hash: 3
  369. /// %add4 = add i32 %c, %d ; Hash: 1
  370. /// \endcode
  371. /// And produce a "numeric string representation" like so:
  372. /// 1, 1, 2, unique_integer_1, 3, 1, unique_integer_2
  373. ///
  374. /// TODO: This is very similar to the MachineOutliner, and should be
  375. /// consolidated into the same interface.
  376. struct IRInstructionMapper {
  377.   /// The starting illegal instruction number to map to.
  378.   ///
  379.   /// Set to -3 for compatibility with DenseMapInfo<unsigned>.
  380.   unsigned IllegalInstrNumber = static_cast<unsigned>(-3);
  381.  
  382.   /// The next available integer to assign to a legal Instruction to.
  383.   unsigned LegalInstrNumber = 0;
  384.  
  385.   /// Correspondence from IRInstructionData to unsigned integers.
  386.   DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>
  387.       InstructionIntegerMap;
  388.  
  389.   /// A mapping for a basic block in a module to its assigned number/location
  390.   /// in the module.
  391.   DenseMap<BasicBlock *, unsigned> BasicBlockToInteger;
  392.  
  393.   /// Set if we added an illegal number in the previous step.
  394.   /// Since each illegal number is unique, we only need one of them between
  395.   /// each range of legal numbers. This lets us make sure we don't add more
  396.   /// than one illegal number per range.
  397.   bool AddedIllegalLastTime = false;
  398.  
  399.   /// Marks whether we found a illegal instruction in the previous step.
  400.   bool CanCombineWithPrevInstr = false;
  401.  
  402.   /// Marks whether we have found a set of instructions that is long enough
  403.   /// to be considered for similarity.
  404.   bool HaveLegalRange = false;
  405.  
  406.   /// Marks whether we should use exact function names, as well as types to
  407.   /// find similarity between calls.
  408.   bool EnableMatchCallsByName = false;
  409.  
  410.   /// This allocator pointer is in charge of holding on to the IRInstructionData
  411.   /// so it is not deallocated until whatever external tool is using it is done
  412.   /// with the information.
  413.   SpecificBumpPtrAllocator<IRInstructionData> *InstDataAllocator = nullptr;
  414.  
  415.   /// This allocator pointer is in charge of creating the IRInstructionDataList
  416.   /// so it is not deallocated until whatever external tool is using it is done
  417.   /// with the information.
  418.   SpecificBumpPtrAllocator<IRInstructionDataList> *IDLAllocator = nullptr;
  419.  
  420.   /// Get an allocated IRInstructionData struct using the InstDataAllocator.
  421.   ///
  422.   /// \param I - The Instruction to wrap with IRInstructionData.
  423.   /// \param Legality - A boolean value that is true if the instruction is to
  424.   /// be considered for similarity, and false if not.
  425.   /// \param IDL - The InstructionDataList that the IRInstructionData is
  426.   /// inserted into.
  427.   /// \returns An allocated IRInstructionData struct.
  428.   IRInstructionData *allocateIRInstructionData(Instruction &I, bool Legality,
  429.                                                IRInstructionDataList &IDL);
  430.  
  431.   /// Get an empty allocated IRInstructionData struct using the
  432.   /// InstDataAllocator.
  433.   ///
  434.   /// \param IDL - The InstructionDataList that the IRInstructionData is
  435.   /// inserted into.
  436.   /// \returns An allocated IRInstructionData struct.
  437.   IRInstructionData *allocateIRInstructionData(IRInstructionDataList &IDL);
  438.  
  439.   /// Get an allocated IRInstructionDataList object using the IDLAllocator.
  440.   ///
  441.   /// \returns An allocated IRInstructionDataList object.
  442.   IRInstructionDataList *allocateIRInstructionDataList();
  443.  
  444.   IRInstructionDataList *IDL = nullptr;
  445.  
  446.   /// Assigns values to all the basic blocks in function \p F starting from
  447.   /// integer \p BBNumber.
  448.   ///
  449.   /// \param F - The function containing the basic blocks to assign numbers to.
  450.   /// \param BBNumber - The number to start from.
  451.   void initializeForBBs(Function &F, unsigned &BBNumber) {
  452.     for (BasicBlock &BB : F)
  453.       BasicBlockToInteger.insert(std::make_pair(&BB, BBNumber++));
  454.   }
  455.  
  456.   /// Assigns values to all the basic blocks in Module \p M.
  457.   /// \param M - The module containing the basic blocks to assign numbers to.
  458.   void initializeForBBs(Module &M) {
  459.     unsigned BBNumber = 0;
  460.     for (Function &F : M)
  461.       initializeForBBs(F, BBNumber);
  462.   }
  463.  
  464.   /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers
  465.   /// determined by \p InstrType. Two Instructions are mapped to the same value
  466.   /// if they are close as defined by the InstructionData class above.
  467.   ///
  468.   /// \param [in] BB - The BasicBlock to be mapped to integers.
  469.   /// \param [in,out] InstrList - Vector of IRInstructionData to append to.
  470.   /// \param [in,out] IntegerMapping - Vector of unsigned integers to append to.
  471.   void convertToUnsignedVec(BasicBlock &BB,
  472.                             std::vector<IRInstructionData *> &InstrList,
  473.                             std::vector<unsigned> &IntegerMapping);
  474.  
  475.   /// Maps an Instruction to a legal integer.
  476.   ///
  477.   /// \param [in] It - The Instruction to be mapped to an integer.
  478.   /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to
  479.   /// append to.
  480.   /// \param [in,out] InstrListForBB - Vector of InstructionData to append to.
  481.   /// \returns The integer \p It was mapped to.
  482.   unsigned mapToLegalUnsigned(BasicBlock::iterator &It,
  483.                               std::vector<unsigned> &IntegerMappingForBB,
  484.                               std::vector<IRInstructionData *> &InstrListForBB);
  485.  
  486.   /// Maps an Instruction to an illegal integer.
  487.   ///
  488.   /// \param [in] It - The \p Instruction to be mapped to an integer.
  489.   /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to
  490.   /// append to.
  491.   /// \param [in,out] InstrListForBB - Vector of IRInstructionData to append to.
  492.   /// \param End - true if creating a dummy IRInstructionData at the end of a
  493.   /// basic block.
  494.   /// \returns The integer \p It was mapped to.
  495.   unsigned mapToIllegalUnsigned(
  496.       BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
  497.       std::vector<IRInstructionData *> &InstrListForBB, bool End = false);
  498.  
  499.   IRInstructionMapper(SpecificBumpPtrAllocator<IRInstructionData> *IDA,
  500.                       SpecificBumpPtrAllocator<IRInstructionDataList> *IDLA)
  501.       : InstDataAllocator(IDA), IDLAllocator(IDLA) {
  502.     // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
  503.     // changed.
  504.     assert(DenseMapInfo<unsigned>::getEmptyKey() == static_cast<unsigned>(-1) &&
  505.            "DenseMapInfo<unsigned>'s empty key isn't -1!");
  506.     assert(DenseMapInfo<unsigned>::getTombstoneKey() ==
  507.                static_cast<unsigned>(-2) &&
  508.            "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
  509.  
  510.     IDL = new (IDLAllocator->Allocate())
  511.         IRInstructionDataList();
  512.   }
  513.  
  514.   /// Custom InstVisitor to classify different instructions for whether it can
  515.   /// be analyzed for similarity.
  516.   struct InstructionClassification
  517.       : public InstVisitor<InstructionClassification, InstrType> {
  518.     InstructionClassification() = default;
  519.  
  520.     // TODO: Determine a scheme to resolve when the label is similar enough.
  521.     InstrType visitBranchInst(BranchInst &BI) {
  522.       if (EnableBranches)
  523.         return Legal;
  524.       return Illegal;
  525.     }
  526.     InstrType visitPHINode(PHINode &PN) {
  527.       if (EnableBranches)
  528.         return Legal;
  529.       return Illegal;
  530.     }
  531.     // TODO: Handle allocas.
  532.     InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; }
  533.     // We exclude variable argument instructions since variable arguments
  534.     // requires extra checking of the argument list.
  535.     InstrType visitVAArgInst(VAArgInst &VI) { return Illegal; }
  536.     // We exclude all exception handling cases since they are so context
  537.     // dependent.
  538.     InstrType visitLandingPadInst(LandingPadInst &LPI) { return Illegal; }
  539.     InstrType visitFuncletPadInst(FuncletPadInst &FPI) { return Illegal; }
  540.     // DebugInfo should be included in the regions, but should not be
  541.     // analyzed for similarity as it has no bearing on the outcome of the
  542.     // program.
  543.     InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; }
  544.     InstrType visitIntrinsicInst(IntrinsicInst &II) {
  545.       // These are disabled due to complications in the CodeExtractor when
  546.       // outlining these instructions.  For instance, It is unclear what we
  547.       // should do when moving only the start or end lifetime instruction into
  548.       // an outlined function. Also, assume-like intrinsics could be removed
  549.       // from the region, removing arguments, causing discrepencies in the
  550.       // number of inputs between different regions.
  551.       if (II.isAssumeLikeIntrinsic())
  552.         return Illegal;
  553.       return EnableIntrinsics ? Legal : Illegal;
  554.     }
  555.     // We only allow call instructions where the function has a name and
  556.     // is not an indirect call.
  557.     InstrType visitCallInst(CallInst &CI) {
  558.       Function *F = CI.getCalledFunction();
  559.       bool IsIndirectCall = CI.isIndirectCall();
  560.       if (IsIndirectCall && !EnableIndirectCalls)
  561.         return Illegal;
  562.       if (!F && !IsIndirectCall)
  563.         return Illegal;
  564.       // Functions marked with the swifttailcc and tailcc calling conventions
  565.       // require special handling when outlining musttail functions.  The
  566.       // calling convention must be passed down to the outlined function as
  567.       // well. Further, there is special handling for musttail calls as well,
  568.       // requiring a return call directly after.  For now, the outliner does not
  569.       // support this, so we do not handle matching this case either.
  570.       if ((CI.getCallingConv() == CallingConv::SwiftTail ||
  571.            CI.getCallingConv() == CallingConv::Tail) &&
  572.           !EnableMustTailCalls)
  573.         return Illegal;
  574.       if (CI.isMustTailCall() && !EnableMustTailCalls)
  575.         return Illegal;
  576.       return Legal;
  577.     }
  578.     // TODO: We do not current handle similarity that changes the control flow.
  579.     InstrType visitInvokeInst(InvokeInst &II) { return Illegal; }
  580.     // TODO: We do not current handle similarity that changes the control flow.
  581.     InstrType visitCallBrInst(CallBrInst &CBI) { return Illegal; }
  582.     // TODO: Handle interblock similarity.
  583.     InstrType visitTerminator(Instruction &I) { return Illegal; }
  584.     InstrType visitInstruction(Instruction &I) { return Legal; }
  585.  
  586.     // The flag variable that lets the classifier know whether we should
  587.     // allow branches to be checked for similarity.
  588.     bool EnableBranches = false;
  589.  
  590.     // The flag variable that lets the classifier know whether we should
  591.     // allow indirect calls to be considered legal instructions.
  592.     bool EnableIndirectCalls = false;
  593.  
  594.     // Flag that lets the classifier know whether we should allow intrinsics to
  595.     // be checked for similarity.
  596.     bool EnableIntrinsics = false;
  597.  
  598.     // Flag that lets the classifier know whether we should allow tail calls to
  599.     // be checked for similarity.
  600.     bool EnableMustTailCalls = false;
  601.   };
  602.  
  603.   /// Maps an Instruction to a member of InstrType.
  604.   InstructionClassification InstClassifier;
  605. };
  606.  
  607. /// This is a class that wraps a range of IRInstructionData from one point to
  608. /// another in the vector of IRInstructionData, which is a region of the
  609. /// program.  It is also responsible for defining the structure within this
  610. /// region of instructions.
  611. ///
  612. /// The structure of a region is defined through a value numbering system
  613. /// assigned to each unique value in a region at the creation of the
  614. /// IRSimilarityCandidate.
  615. ///
  616. /// For example, for each Instruction we add a mapping for each new
  617. /// value seen in that Instruction.
  618. /// IR:                    Mapping Added:
  619. /// %add1 = add i32 %a, c1    %add1 -> 3, %a -> 1, c1 -> 2
  620. /// %add2 = add i32 %a, %1    %add2 -> 4
  621. /// %add3 = add i32 c2, c1    %add3 -> 6, c2 -> 5
  622. ///
  623. /// We can compare IRSimilarityCandidates against one another.
  624. /// The \ref isSimilar function compares each IRInstructionData against one
  625. /// another and if we have the same sequences of IRInstructionData that would
  626. /// create the same hash, we have similar IRSimilarityCandidates.
  627. ///
  628. /// We can also compare the structure of IRSimilarityCandidates. If we can
  629. /// create a mapping of registers in the region contained by one
  630. /// IRSimilarityCandidate to the region contained by different
  631. /// IRSimilarityCandidate, they can be considered structurally similar.
  632. ///
  633. /// IRSimilarityCandidate1:   IRSimilarityCandidate2:
  634. /// %add1 = add i32 %a, %b    %add1 = add i32 %d, %e
  635. /// %add2 = add i32 %a, %c    %add2 = add i32 %d, %f
  636. /// %add3 = add i32 c1, c2    %add3 = add i32 c3, c4
  637. ///
  638. /// Can have the following mapping from candidate to candidate of:
  639. /// %a -> %d, %b -> %e, %c -> %f, c1 -> c3, c2 -> c4
  640. /// and can be considered similar.
  641. ///
  642. /// IRSimilarityCandidate1:   IRSimilarityCandidate2:
  643. /// %add1 = add i32 %a, %b    %add1 = add i32 %d, c4
  644. /// %add2 = add i32 %a, %c    %add2 = add i32 %d, %f
  645. /// %add3 = add i32 c1, c2    %add3 = add i32 c3, c4
  646. ///
  647. /// We cannot create the same mapping since the use of c4 is not used in the
  648. /// same way as %b or c2.
  649. class IRSimilarityCandidate {
  650. private:
  651.   /// The start index of this IRSimilarityCandidate in the instruction list.
  652.   unsigned StartIdx = 0;
  653.  
  654.   /// The number of instructions in this IRSimilarityCandidate.
  655.   unsigned Len = 0;
  656.  
  657.   /// The first instruction in this IRSimilarityCandidate.
  658.   IRInstructionData *FirstInst = nullptr;
  659.  
  660.   /// The last instruction in this IRSimilarityCandidate.
  661.   IRInstructionData *LastInst = nullptr;
  662.  
  663.   /// Global Value Numbering structures
  664.   /// @{
  665.   /// Stores the mapping of the value to the number assigned to it in the
  666.   /// IRSimilarityCandidate.
  667.   DenseMap<Value *, unsigned> ValueToNumber;
  668.   /// Stores the mapping of the number to the value assigned this number.
  669.   DenseMap<unsigned, Value *> NumberToValue;
  670.   /// Stores the mapping of a value's number to canonical numbering in the
  671.   /// candidate's respective similarity group.
  672.   DenseMap<unsigned, unsigned> NumberToCanonNum;
  673.   /// Stores the mapping of canonical number in the candidate's respective
  674.   /// similarity group to a value number.
  675.   DenseMap<unsigned, unsigned> CanonNumToNumber;
  676.   /// @}
  677.  
  678. public:
  679.   /// \param StartIdx - The starting location of the region.
  680.   /// \param Len - The length of the region.
  681.   /// \param FirstInstIt - The starting IRInstructionData of the region.
  682.   /// \param LastInstIt - The ending IRInstructionData of the region.
  683.   IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
  684.                         IRInstructionData *FirstInstIt,
  685.                         IRInstructionData *LastInstIt);
  686.  
  687.   /// \param A - The first IRInstructionCandidate to compare.
  688.   /// \param B - The second IRInstructionCandidate to compare.
  689.   /// \returns True when every IRInstructionData in \p A is similar to every
  690.   /// IRInstructionData in \p B.
  691.   static bool isSimilar(const IRSimilarityCandidate &A,
  692.                         const IRSimilarityCandidate &B);
  693.  
  694.   /// \param [in] A - The first IRInstructionCandidate to compare.
  695.   /// \param [in] B - The second IRInstructionCandidate to compare.
  696.   /// \returns True when every IRInstructionData in \p A is structurally similar
  697.   /// to \p B.
  698.   static bool compareStructure(const IRSimilarityCandidate &A,
  699.                                const IRSimilarityCandidate &B);
  700.  
  701.   /// \param [in] A - The first IRInstructionCandidate to compare.
  702.   /// \param [in] B - The second IRInstructionCandidate to compare.
  703.   /// \param [in,out] ValueNumberMappingA - A mapping of value numbers from
  704.   /// candidate \p A to candidate \B.
  705.   /// \param [in,out] ValueNumberMappingB - A mapping of value numbers from
  706.   /// candidate \p B to candidate \A.
  707.   /// \returns True when every IRInstructionData in \p A is structurally similar
  708.   /// to \p B.
  709.   static bool
  710.   compareStructure(const IRSimilarityCandidate &A,
  711.                    const IRSimilarityCandidate &B,
  712.                    DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
  713.                    DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB);
  714.  
  715.   struct OperandMapping {
  716.     /// The IRSimilarityCandidate that holds the instruction the OperVals were
  717.     /// pulled from.
  718.     const IRSimilarityCandidate &IRSC;
  719.  
  720.     /// The operand values to be analyzed.
  721.     ArrayRef<Value *> &OperVals;
  722.  
  723.     /// The current mapping of global value numbers from one IRSimilarityCandidate
  724.     /// to another IRSimilarityCandidate.
  725.     DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMapping;
  726.   };
  727.  
  728.   /// A helper struct to hold the candidate, for a branch instruction, the
  729.   /// relative location of a label, and the label itself.  This is mostly to
  730.   /// group the values together before passing them as a bundle to a function.
  731.   struct RelativeLocMapping {
  732.     /// The IRSimilarityCandidate that holds the instruction the relative
  733.     /// location was pulled from.
  734.     const IRSimilarityCandidate &IRSC;
  735.  
  736.     /// The relative location to be analyzed.
  737.     int RelativeLocation;
  738.  
  739.     /// The corresponding value.
  740.     Value *OperVal;
  741.   };
  742.  
  743.   /// Compare the operands in \p A and \p B and check that the current mapping
  744.   /// of global value numbers from \p A to \p B and \p B to \A is consistent.
  745.   ///
  746.   /// \param A - The first IRInstructionCandidate, operand values, and current
  747.   /// operand mappings to compare.
  748.   /// \param B - The second IRInstructionCandidate, operand values, and current
  749.   /// operand mappings to compare.
  750.   /// \returns true if the IRSimilarityCandidates operands are compatible.
  751.   static bool compareNonCommutativeOperandMapping(OperandMapping A,
  752.                                                   OperandMapping B);
  753.  
  754.   /// Compare the operands in \p A and \p B and check that the current mapping
  755.   /// of global value numbers from \p A to \p B and \p B to \A is consistent
  756.   /// given that the operands are commutative.
  757.   ///
  758.   /// \param A - The first IRInstructionCandidate, operand values, and current
  759.   /// operand mappings to compare.
  760.   /// \param B - The second IRInstructionCandidate, operand values, and current
  761.   /// operand mappings to compare.
  762.   /// \returns true if the IRSimilarityCandidates operands are compatible.
  763.   static bool compareCommutativeOperandMapping(OperandMapping A,
  764.                                                OperandMapping B);
  765.  
  766.   /// Compare the relative locations in \p A and \p B and check that the
  767.   /// distances match if both locations are contained in the region, and that
  768.   /// the branches both point outside the region if they do not.
  769.   /// Example Region:
  770.   /// \code
  771.   /// entry:
  772.   ///   br i1 %0, label %block_1, label %block_3
  773.   /// block_0:
  774.   ///   br i1 %0, label %block_1, label %block_2
  775.   /// block_1:
  776.   ///   br i1 %0, label %block_2, label %block_3
  777.   /// block_2:
  778.   ///   br i1 %1, label %block_1, label %block_4
  779.   /// block_3:
  780.   ///   br i1 %2, label %block_2, label %block_5
  781.   /// \endcode
  782.   /// If we compare the branches in block_0 and block_1 the relative values are
  783.   /// 1 and 2 for both, so we consider this a match.
  784.   ///
  785.   /// If we compare the branches in entry and block_0 the relative values are
  786.   /// 2 and 3, and 1 and 2 respectively.  Since these are not the same we do not
  787.   /// consider them a match.
  788.   ///
  789.   /// If we compare the branches in block_1 and block_2 the relative values are
  790.   /// 1 and 2, and -1 and None respectively.  As a result we do not consider
  791.   /// these to be the same
  792.   ///
  793.   /// If we compare the branches in block_2 and block_3 the relative values are
  794.   /// -1 and None for both.  We do consider these to be a match.
  795.   ///
  796.   /// \param A - The first IRInstructionCandidate, relative location value,
  797.   /// and incoming block.
  798.   /// \param B - The second IRInstructionCandidate, relative location value,
  799.   /// and incoming block.
  800.   /// \returns true if the relative locations match.
  801.   static bool checkRelativeLocations(RelativeLocMapping A,
  802.                                      RelativeLocMapping B);
  803.  
  804.   /// Create a mapping from the value numbering to a different separate set of
  805.   /// numbers. This will serve as a guide for relating one candidate to another.
  806.   /// The canonical number gives use the ability identify which global value
  807.   /// number in one candidate relates to the global value number in the other.
  808.   ///
  809.   /// \param [in, out] CurrCand - The IRSimilarityCandidate to create a
  810.   /// canonical numbering for.
  811.   static void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand);
  812.  
  813.   /// Create a mapping for the value numbering of the calling
  814.   /// IRSimilarityCandidate, to a different separate set of numbers, based on
  815.   /// the canonical ordering in \p SourceCand. These are defined based on the
  816.   /// found mappings in \p ToSourceMapping and \p FromSourceMapping.  Both of
  817.   /// these relationships should have the same information, just in opposite
  818.   /// directions.
  819.   ///
  820.   /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a
  821.   /// canonical numbering from.
  822.   /// \param ToSourceMapping - The mapping of value numbers from this candidate
  823.   /// to \p SourceCand.
  824.   /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand
  825.   /// to this candidate.
  826.   void createCanonicalRelationFrom(
  827.       IRSimilarityCandidate &SourceCand,
  828.       DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
  829.       DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping);
  830.  
  831.   /// \param [in,out] BBSet - The set to track the basic blocks.
  832.   void getBasicBlocks(DenseSet<BasicBlock *> &BBSet) const {
  833.     for (IRInstructionData &ID : *this) {
  834.       BasicBlock *BB = ID.Inst->getParent();
  835.       BBSet.insert(BB);
  836.     }
  837.   }
  838.  
  839.   /// \param [in,out] BBSet - The set to track the basic blocks.
  840.   /// \param [in,out] BBList - A list in order of use to track the basic blocks.
  841.   void getBasicBlocks(DenseSet<BasicBlock *> &BBSet,
  842.                       SmallVector<BasicBlock *> &BBList) const {
  843.     for (IRInstructionData &ID : *this) {
  844.       BasicBlock *BB = ID.Inst->getParent();
  845.       if (BBSet.insert(BB).second)
  846.         BBList.push_back(BB);
  847.     }
  848.   }
  849.  
  850.   /// Compare the start and end indices of the two IRSimilarityCandidates for
  851.   /// whether they overlap. If the start instruction of one
  852.   /// IRSimilarityCandidate is less than the end instruction of the other, and
  853.   /// the start instruction of one is greater than the start instruction of the
  854.   /// other, they overlap.
  855.   ///
  856.   /// \returns true if the IRSimilarityCandidates do not have overlapping
  857.   /// instructions.
  858.   static bool overlap(const IRSimilarityCandidate &A,
  859.                       const IRSimilarityCandidate &B);
  860.  
  861.   /// \returns the number of instructions in this Candidate.
  862.   unsigned getLength() const { return Len; }
  863.  
  864.   /// \returns the start index of this IRSimilarityCandidate.
  865.   unsigned getStartIdx() const { return StartIdx; }
  866.  
  867.   /// \returns the end index of this IRSimilarityCandidate.
  868.   unsigned getEndIdx() const { return StartIdx + Len - 1; }
  869.  
  870.   /// \returns The first IRInstructionData.
  871.   IRInstructionData *front() const { return FirstInst; }
  872.   /// \returns The last IRInstructionData.
  873.   IRInstructionData *back() const { return LastInst; }
  874.  
  875.   /// \returns The first Instruction.
  876.   Instruction *frontInstruction() { return FirstInst->Inst; }
  877.   /// \returns The last Instruction
  878.   Instruction *backInstruction() { return LastInst->Inst; }
  879.  
  880.   /// \returns The BasicBlock the IRSimilarityCandidate starts in.
  881.   BasicBlock *getStartBB() { return FirstInst->Inst->getParent(); }
  882.   /// \returns The BasicBlock the IRSimilarityCandidate ends in.
  883.   BasicBlock *getEndBB() { return LastInst->Inst->getParent(); }
  884.  
  885.   /// \returns The Function that the IRSimilarityCandidate is located in.
  886.   Function *getFunction() { return getStartBB()->getParent(); }
  887.  
  888.   /// Finds the positive number associated with \p V if it has been mapped.
  889.   /// \param [in] V - the Value to find.
  890.   /// \returns The positive number corresponding to the value.
  891.   /// \returns std::nullopt if not present.
  892.   std::optional<unsigned> getGVN(Value *V) {
  893.     assert(V != nullptr && "Value is a nullptr?");
  894.     DenseMap<Value *, unsigned>::iterator VNIt = ValueToNumber.find(V);
  895.     if (VNIt == ValueToNumber.end())
  896.       return std::nullopt;
  897.     return VNIt->second;
  898.   }
  899.  
  900.   /// Finds the Value associate with \p Num if it exists.
  901.   /// \param [in] Num - the number to find.
  902.   /// \returns The Value associated with the number.
  903.   /// \returns std::nullopt if not present.
  904.   std::optional<Value *> fromGVN(unsigned Num) {
  905.     DenseMap<unsigned, Value *>::iterator VNIt = NumberToValue.find(Num);
  906.     if (VNIt == NumberToValue.end())
  907.       return std::nullopt;
  908.     assert(VNIt->second != nullptr && "Found value is a nullptr!");
  909.     return VNIt->second;
  910.   }
  911.  
  912.   /// Find the canonical number from the global value number \p N stored in the
  913.   /// candidate.
  914.   ///
  915.   /// \param N - The global value number to find the canonical number for.
  916.   /// \returns An optional containing the value, and std::nullopt if it could
  917.   /// not be found.
  918.   std::optional<unsigned> getCanonicalNum(unsigned N) {
  919.     DenseMap<unsigned, unsigned>::iterator NCIt = NumberToCanonNum.find(N);
  920.     if (NCIt == NumberToCanonNum.end())
  921.       return std::nullopt;
  922.     return NCIt->second;
  923.   }
  924.  
  925.   /// Find the global value number from the canonical number \p N stored in the
  926.   /// candidate.
  927.   ///
  928.   /// \param N - The canonical number to find the global vlaue number for.
  929.   /// \returns An optional containing the value, and std::nullopt if it could
  930.   /// not be found.
  931.   std::optional<unsigned> fromCanonicalNum(unsigned N) {
  932.     DenseMap<unsigned, unsigned>::iterator CNIt = CanonNumToNumber.find(N);
  933.     if (CNIt == CanonNumToNumber.end())
  934.       return std::nullopt;
  935.     return CNIt->second;
  936.   }
  937.  
  938.   /// \param RHS -The IRSimilarityCandidate to compare against
  939.   /// \returns true if the IRSimilarityCandidate is occurs after the
  940.   /// IRSimilarityCandidate in the program.
  941.   bool operator<(const IRSimilarityCandidate &RHS) const {
  942.     return getStartIdx() > RHS.getStartIdx();
  943.   }
  944.  
  945.   using iterator = IRInstructionDataList::iterator;
  946.   iterator begin() const { return iterator(front()); }
  947.   iterator end() const { return std::next(iterator(back())); }
  948. };
  949.  
  950. typedef DenseMap<IRSimilarityCandidate *,
  951.                  DenseMap<unsigned, DenseSet<unsigned>>>
  952.     CandidateGVNMapping;
  953. typedef std::vector<IRSimilarityCandidate> SimilarityGroup;
  954. typedef std::vector<SimilarityGroup> SimilarityGroupList;
  955.  
  956. /// This class puts all the pieces of the IRInstructionData,
  957. /// IRInstructionMapper, IRSimilarityCandidate together.
  958. ///
  959. /// It first feeds the Module or vector of Modules into the IRInstructionMapper,
  960. /// and puts all the mapped instructions into a single long list of
  961. /// IRInstructionData.
  962. ///
  963. /// The list of unsigned integers is given to the Suffix Tree or similar data
  964. /// structure to find repeated subsequences.  We construct an
  965. /// IRSimilarityCandidate for each instance of the subsequence.  We compare them
  966. /// against one another since  These repeated subsequences can have different
  967. /// structure.  For each different kind of structure found, we create a
  968. /// similarity group.
  969. ///
  970. /// If we had four IRSimilarityCandidates A, B, C, and D where A, B and D are
  971. /// structurally similar to one another, while C is different we would have two
  972. /// SimilarityGroups:
  973. ///
  974. /// SimilarityGroup 1:  SimilarityGroup 2
  975. /// A, B, D             C
  976. ///
  977. /// A list of the different similarity groups is then returned after
  978. /// analyzing the module.
  979. class IRSimilarityIdentifier {
  980. public:
  981.   IRSimilarityIdentifier(bool MatchBranches = true,
  982.                          bool MatchIndirectCalls = true,
  983.                          bool MatchCallsWithName = false,
  984.                          bool MatchIntrinsics = true,
  985.                          bool MatchMustTailCalls = true)
  986.       : Mapper(&InstDataAllocator, &InstDataListAllocator),
  987.         EnableBranches(MatchBranches), EnableIndirectCalls(MatchIndirectCalls),
  988.         EnableMatchingCallsByName(MatchCallsWithName),
  989.         EnableIntrinsics(MatchIntrinsics),
  990.         EnableMustTailCalls(MatchMustTailCalls) {}
  991.  
  992. private:
  993.   /// Map the instructions in the module to unsigned integers, using mapping
  994.   /// already present in the Mapper if possible.
  995.   ///
  996.   /// \param [in] M Module - To map to integers.
  997.   /// \param [in,out] InstrList - The vector to append IRInstructionData to.
  998.   /// \param [in,out] IntegerMapping - The vector to append integers to.
  999.   void populateMapper(Module &M, std::vector<IRInstructionData *> &InstrList,
  1000.                       std::vector<unsigned> &IntegerMapping);
  1001.  
  1002.   /// Map the instructions in the modules vector to unsigned integers, using
  1003.   /// mapping already present in the mapper if possible.
  1004.   ///
  1005.   /// \param [in] Modules - The list of modules to use to populate the mapper
  1006.   /// \param [in,out] InstrList - The vector to append IRInstructionData to.
  1007.   /// \param [in,out] IntegerMapping - The vector to append integers to.
  1008.   void populateMapper(ArrayRef<std::unique_ptr<Module>> &Modules,
  1009.                       std::vector<IRInstructionData *> &InstrList,
  1010.                       std::vector<unsigned> &IntegerMapping);
  1011.  
  1012.   /// Find the similarity candidates in \p InstrList and corresponding
  1013.   /// \p UnsignedVec
  1014.   ///
  1015.   /// \param [in,out] InstrList - The vector to append IRInstructionData to.
  1016.   /// \param [in,out] IntegerMapping - The vector to append integers to.
  1017.   /// candidates found in the program.
  1018.   void findCandidates(std::vector<IRInstructionData *> &InstrList,
  1019.                       std::vector<unsigned> &IntegerMapping);
  1020.  
  1021. public:
  1022.   // Find the IRSimilarityCandidates in the \p Modules and group by structural
  1023.   // similarity in a SimilarityGroup, each group is returned in a
  1024.   // SimilarityGroupList.
  1025.   //
  1026.   // \param [in] Modules - the modules to analyze.
  1027.   // \returns The groups of similarity ranges found in the modules.
  1028.   SimilarityGroupList &
  1029.   findSimilarity(ArrayRef<std::unique_ptr<Module>> Modules);
  1030.  
  1031.   // Find the IRSimilarityCandidates in the given Module grouped by structural
  1032.   // similarity in a SimilarityGroup, contained inside a SimilarityGroupList.
  1033.   //
  1034.   // \param [in] M - the module to analyze.
  1035.   // \returns The groups of similarity ranges found in the module.
  1036.   SimilarityGroupList &findSimilarity(Module &M);
  1037.  
  1038.   // Clears \ref SimilarityCandidates if it is already filled by a previous run.
  1039.   void resetSimilarityCandidates() {
  1040.     // If we've already analyzed a Module or set of Modules, so we must clear
  1041.     // the SimilarityCandidates to make sure we do not have only old values
  1042.     // hanging around.
  1043.     if (SimilarityCandidates)
  1044.       SimilarityCandidates->clear();
  1045.     else
  1046.       SimilarityCandidates = SimilarityGroupList();
  1047.   }
  1048.  
  1049.   // \returns The groups of similarity ranges found in the most recently passed
  1050.   // set of modules.
  1051.   std::optional<SimilarityGroupList> &getSimilarity() {
  1052.     return SimilarityCandidates;
  1053.   }
  1054.  
  1055. private:
  1056.   /// The allocator for IRInstructionData.
  1057.   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
  1058.  
  1059.   /// The allocator for IRInstructionDataLists.
  1060.   SpecificBumpPtrAllocator<IRInstructionDataList> InstDataListAllocator;
  1061.  
  1062.   /// Map Instructions to unsigned integers and wraps the Instruction in an
  1063.   /// instance of IRInstructionData.
  1064.   IRInstructionMapper Mapper;
  1065.  
  1066.   /// The flag variable that marks whether we should check branches for
  1067.   /// similarity, or only look within basic blocks.
  1068.   bool EnableBranches = true;
  1069.  
  1070.   /// The flag variable that marks whether we allow indirect calls to be checked
  1071.   /// for similarity, or exclude them as a legal instruction.
  1072.   bool EnableIndirectCalls = true;
  1073.  
  1074.   /// The flag variable that marks whether we allow calls to be marked as
  1075.   /// similar if they do not have the same name, only the same calling
  1076.   /// convention, attributes and type signature.
  1077.   bool EnableMatchingCallsByName = true;
  1078.  
  1079.   /// The flag variable that marks whether we should check intrinsics for
  1080.   /// similarity.
  1081.   bool EnableIntrinsics = true;
  1082.  
  1083.   // The flag variable that marks whether we should allow tailcalls
  1084.   // to be checked for similarity.
  1085.   bool EnableMustTailCalls = false;
  1086.  
  1087.   /// The SimilarityGroups found with the most recent run of \ref
  1088.   /// findSimilarity. std::nullopt if there is no recent run.
  1089.   std::optional<SimilarityGroupList> SimilarityCandidates;
  1090. };
  1091.  
  1092. } // end namespace IRSimilarity
  1093.  
  1094. /// An analysis pass based on legacy pass manager that runs and returns
  1095. /// IRSimilarityIdentifier run on the Module.
  1096. class IRSimilarityIdentifierWrapperPass : public ModulePass {
  1097.   std::unique_ptr<IRSimilarity::IRSimilarityIdentifier> IRSI;
  1098.  
  1099. public:
  1100.   static char ID;
  1101.   IRSimilarityIdentifierWrapperPass();
  1102.  
  1103.   IRSimilarity::IRSimilarityIdentifier &getIRSI() { return *IRSI; }
  1104.   const IRSimilarity::IRSimilarityIdentifier &getIRSI() const { return *IRSI; }
  1105.  
  1106.   bool doInitialization(Module &M) override;
  1107.   bool doFinalization(Module &M) override;
  1108.   bool runOnModule(Module &M) override;
  1109.   void getAnalysisUsage(AnalysisUsage &AU) const override {
  1110.     AU.setPreservesAll();
  1111.   }
  1112. };
  1113.  
  1114. /// An analysis pass that runs and returns the IRSimilarityIdentifier run on the
  1115. /// Module.
  1116. class IRSimilarityAnalysis : public AnalysisInfoMixin<IRSimilarityAnalysis> {
  1117. public:
  1118.   typedef IRSimilarity::IRSimilarityIdentifier Result;
  1119.  
  1120.   Result run(Module &M, ModuleAnalysisManager &);
  1121.  
  1122. private:
  1123.   friend AnalysisInfoMixin<IRSimilarityAnalysis>;
  1124.   static AnalysisKey Key;
  1125. };
  1126.  
  1127. /// Printer pass that uses \c IRSimilarityAnalysis.
  1128. class IRSimilarityAnalysisPrinterPass
  1129.     : public PassInfoMixin<IRSimilarityAnalysisPrinterPass> {
  1130.   raw_ostream &OS;
  1131.  
  1132. public:
  1133.   explicit IRSimilarityAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
  1134.   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
  1135. };
  1136.  
  1137. } // end namespace llvm
  1138.  
  1139. #endif // LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
  1140.