Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | //===- IRSimilarityIdentifier.h - Find similarity in a module --------------==// |
| 2 | // |
||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 6 | // |
||
| 7 | //===----------------------------------------------------------------------===// |
||
| 8 | // |
||
| 9 | // \file |
||
| 10 | // Interface file for the IRSimilarityIdentifier for identifying similarities in |
||
| 11 | // IR including the IRInstructionMapper, which maps an Instruction to unsigned |
||
| 12 | // integers. |
||
| 13 | // |
||
| 14 | // Two sequences of instructions are called "similar" if they perform the same |
||
| 15 | // series of operations for all inputs. |
||
| 16 | // |
||
| 17 | // \code |
||
| 18 | // %1 = add i32 %a, 10 |
||
| 19 | // %2 = add i32 %a, %1 |
||
| 20 | // %3 = icmp slt icmp %1, %2 |
||
| 21 | // \endcode |
||
| 22 | // |
||
| 23 | // and |
||
| 24 | // |
||
| 25 | // \code |
||
| 26 | // %1 = add i32 11, %a |
||
| 27 | // %2 = sub i32 %a, %1 |
||
| 28 | // %3 = icmp sgt icmp %2, %1 |
||
| 29 | // \endcode |
||
| 30 | // |
||
| 31 | // ultimately have the same result, even if the inputs, and structure are |
||
| 32 | // slightly different. |
||
| 33 | // |
||
| 34 | // For instructions, we do not worry about operands that do not have fixed |
||
| 35 | // semantic meaning to the program. We consider the opcode that the instruction |
||
| 36 | // has, the types, parameters, and extra information such as the function name, |
||
| 37 | // or comparison predicate. These are used to create a hash to map instructions |
||
| 38 | // to integers to be used in similarity matching in sequences of instructions |
||
| 39 | // |
||
| 40 | // Terminology: |
||
| 41 | // An IRSimilarityCandidate is a region of IRInstructionData (wrapped |
||
| 42 | // Instructions), usually used to denote a region of similarity has been found. |
||
| 43 | // |
||
| 44 | // A SimilarityGroup is a set of IRSimilarityCandidates that are structurally |
||
| 45 | // similar to one another. |
||
| 46 | // |
||
| 47 | //===----------------------------------------------------------------------===// |
||
| 48 | |||
| 49 | #ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H |
||
| 50 | #define LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H |
||
| 51 | |||
| 52 | #include "llvm/IR/InstVisitor.h" |
||
| 53 | #include "llvm/IR/Instructions.h" |
||
| 54 | #include "llvm/IR/PassManager.h" |
||
| 55 | #include "llvm/Pass.h" |
||
| 56 | #include "llvm/Support/Allocator.h" |
||
| 57 | #include <optional> |
||
| 58 | |||
| 59 | namespace llvm { |
||
| 60 | class Module; |
||
| 61 | |||
| 62 | namespace IRSimilarity { |
||
| 63 | |||
| 64 | struct IRInstructionDataList; |
||
| 65 | |||
| 66 | /// This represents what is and is not supported when finding similarity in |
||
| 67 | /// Instructions. |
||
| 68 | /// |
||
| 69 | /// Legal Instructions are considered when looking at similarity between |
||
| 70 | /// Instructions. |
||
| 71 | /// |
||
| 72 | /// Illegal Instructions cannot be considered when looking for similarity |
||
| 73 | /// between Instructions. They act as boundaries between similarity regions. |
||
| 74 | /// |
||
| 75 | /// Invisible Instructions are skipped over during analysis. |
||
| 76 | // TODO: Shared with MachineOutliner |
||
| 77 | enum InstrType { Legal, Illegal, Invisible }; |
||
| 78 | |||
| 79 | /// This provides the utilities for hashing an Instruction to an unsigned |
||
| 80 | /// integer. Two IRInstructionDatas produce the same hash value when their |
||
| 81 | /// underlying Instructions perform the same operation (even if they don't have |
||
| 82 | /// the same input operands.) |
||
| 83 | /// As a more concrete example, consider the following: |
||
| 84 | /// |
||
| 85 | /// \code |
||
| 86 | /// %add1 = add i32 %a, %b |
||
| 87 | /// %add2 = add i32 %c, %d |
||
| 88 | /// %add3 = add i64 %e, %f |
||
| 89 | /// \endcode |
||
| 90 | /// |
||
| 91 | // Then the IRInstructionData wrappers for these Instructions may be hashed like |
||
| 92 | /// so: |
||
| 93 | /// |
||
| 94 | /// \code |
||
| 95 | /// ; These two adds have the same types and operand types, so they hash to the |
||
| 96 | /// ; same number. |
||
| 97 | /// %add1 = add i32 %a, %b ; Hash: 1 |
||
| 98 | /// %add2 = add i32 %c, %d ; Hash: 1 |
||
| 99 | /// ; This add produces an i64. This differentiates it from %add1 and %add2. So, |
||
| 100 | /// ; it hashes to a different number. |
||
| 101 | /// %add3 = add i64 %e, %f; Hash: 2 |
||
| 102 | /// \endcode |
||
| 103 | /// |
||
| 104 | /// |
||
| 105 | /// This hashing scheme will be used to represent the program as a very long |
||
| 106 | /// string. This string can then be placed in a data structure which can be used |
||
| 107 | /// for similarity queries. |
||
| 108 | /// |
||
| 109 | /// TODO: Handle types of Instructions which can be equal even with different |
||
| 110 | /// operands. (E.g. comparisons with swapped predicates.) |
||
| 111 | /// TODO: Handle CallInsts, which are only checked for function type |
||
| 112 | /// by \ref isSameOperationAs. |
||
| 113 | /// TODO: Handle GetElementPtrInsts, as some of the operands have to be the |
||
| 114 | /// exact same, and some do not. |
||
| 115 | struct IRInstructionData |
||
| 116 | : ilist_node<IRInstructionData, ilist_sentinel_tracking<true>> { |
||
| 117 | |||
| 118 | /// The source Instruction that is being wrapped. |
||
| 119 | Instruction *Inst = nullptr; |
||
| 120 | /// The values of the operands in the Instruction. |
||
| 121 | SmallVector<Value *, 4> OperVals; |
||
| 122 | /// The legality of the wrapped instruction. This is informed by InstrType, |
||
| 123 | /// and is used when checking when two instructions are considered similar. |
||
| 124 | /// If either instruction is not legal, the instructions are automatically not |
||
| 125 | /// considered similar. |
||
| 126 | bool Legal = false; |
||
| 127 | |||
| 128 | /// This is only relevant if we are wrapping a CmpInst where we needed to |
||
| 129 | /// change the predicate of a compare instruction from a greater than form |
||
| 130 | /// to a less than form. It is None otherwise. |
||
| 131 | std::optional<CmpInst::Predicate> RevisedPredicate; |
||
| 132 | |||
| 133 | /// This is only relevant if we are wrapping a CallInst. If we are requiring |
||
| 134 | /// that the function calls have matching names as well as types, and the |
||
| 135 | /// call is not an indirect call, this will hold the name of the function. If |
||
| 136 | /// it is an indirect string, it will be the empty string. However, if this |
||
| 137 | /// requirement is not in place it will be the empty string regardless of the |
||
| 138 | /// function call type. The value held here is used to create the hash of the |
||
| 139 | /// instruction, and check to make sure two instructions are close to one |
||
| 140 | /// another. |
||
| 141 | std::optional<std::string> CalleeName; |
||
| 142 | |||
| 143 | /// This structure holds the distances of how far "ahead of" or "behind" the |
||
| 144 | /// target blocks of a branch, or the incoming blocks of a phi nodes are. |
||
| 145 | /// If the value is negative, it means that the block was registered before |
||
| 146 | /// the block of this instruction in terms of blocks in the function. |
||
| 147 | /// Code Example: |
||
| 148 | /// \code |
||
| 149 | /// block_1: |
||
| 150 | /// br i1 %0, label %block_2, label %block_3 |
||
| 151 | /// block_2: |
||
| 152 | /// br i1 %1, label %block_1, label %block_2 |
||
| 153 | /// block_3: |
||
| 154 | /// br i1 %2, label %block_2, label %block_1 |
||
| 155 | /// ; Replacing the labels with relative values, this becomes: |
||
| 156 | /// block_1: |
||
| 157 | /// br i1 %0, distance 1, distance 2 |
||
| 158 | /// block_2: |
||
| 159 | /// br i1 %1, distance -1, distance 0 |
||
| 160 | /// block_3: |
||
| 161 | /// br i1 %2, distance -1, distance -2 |
||
| 162 | /// \endcode |
||
| 163 | /// Taking block_2 as our example, block_1 is "behind" block_2, and block_2 is |
||
| 164 | /// "ahead" of block_2. |
||
| 165 | SmallVector<int, 4> RelativeBlockLocations; |
||
| 166 | |||
| 167 | /// Gather the information that is difficult to gather for an Instruction, or |
||
| 168 | /// is changed. i.e. the operands of an Instruction and the Types of those |
||
| 169 | /// operands. This extra information allows for similarity matching to make |
||
| 170 | /// assertions that allow for more flexibility when checking for whether an |
||
| 171 | /// Instruction performs the same operation. |
||
| 172 | IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL); |
||
| 173 | IRInstructionData(IRInstructionDataList &IDL); |
||
| 174 | |||
| 175 | /// Fills data stuctures for IRInstructionData when it is constructed from a |
||
| 176 | // reference or a pointer. |
||
| 177 | void initializeInstruction(); |
||
| 178 | |||
| 179 | /// Get the predicate that the compare instruction is using for hashing the |
||
| 180 | /// instruction. the IRInstructionData must be wrapping a CmpInst. |
||
| 181 | CmpInst::Predicate getPredicate() const; |
||
| 182 | |||
| 183 | /// Get the callee name that the call instruction is using for hashing the |
||
| 184 | /// instruction. The IRInstructionData must be wrapping a CallInst. |
||
| 185 | StringRef getCalleeName() const; |
||
| 186 | |||
| 187 | /// A function that swaps the predicates to their less than form if they are |
||
| 188 | /// in a greater than form. Otherwise, the predicate is unchanged. |
||
| 189 | /// |
||
| 190 | /// \param CI - The comparison operation to find a consistent preidcate for. |
||
| 191 | /// \return the consistent comparison predicate. |
||
| 192 | static CmpInst::Predicate predicateForConsistency(CmpInst *CI); |
||
| 193 | |||
| 194 | /// For an IRInstructionData containing a branch, finds the |
||
| 195 | /// relative distances from the source basic block to the target by taking |
||
| 196 | /// the difference of the number assigned to the current basic block and the |
||
| 197 | /// target basic block of the branch. |
||
| 198 | /// |
||
| 199 | /// \param BasicBlockToInteger - The mapping of basic blocks to their location |
||
| 200 | /// in the module. |
||
| 201 | void |
||
| 202 | setBranchSuccessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger); |
||
| 203 | |||
| 204 | /// For an IRInstructionData containing a CallInst, set the function name |
||
| 205 | /// appropriately. This will be an empty string if it is an indirect call, |
||
| 206 | /// or we are not matching by name of the called function. It will be the |
||
| 207 | /// name of the function if \p MatchByName is true and it is not an indirect |
||
| 208 | /// call. We may decide not to match by name in order to expand the |
||
| 209 | /// size of the regions we can match. If a function name has the same type |
||
| 210 | /// signature, but the different name, the region of code is still almost the |
||
| 211 | /// same. Since function names can be treated as constants, the name itself |
||
| 212 | /// could be extrapolated away. However, matching by name provides a |
||
| 213 | /// specificity and more "identical" code than not matching by name. |
||
| 214 | /// |
||
| 215 | /// \param MatchByName - A flag to mark whether we are using the called |
||
| 216 | /// function name as a differentiating parameter. |
||
| 217 | void setCalleeName(bool MatchByName = true); |
||
| 218 | |||
| 219 | /// For an IRInstructionData containing a PHINode, finds the |
||
| 220 | /// relative distances from the incoming basic block to the current block by |
||
| 221 | /// taking the difference of the number assigned to the current basic block |
||
| 222 | /// and the incoming basic block of the branch. |
||
| 223 | /// |
||
| 224 | /// \param BasicBlockToInteger - The mapping of basic blocks to their location |
||
| 225 | /// in the module. |
||
| 226 | void |
||
| 227 | setPHIPredecessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger); |
||
| 228 | |||
| 229 | /// Hashes \p Value based on its opcode, types, and operand types. |
||
| 230 | /// Two IRInstructionData instances produce the same hash when they perform |
||
| 231 | /// the same operation. |
||
| 232 | /// |
||
| 233 | /// As a simple example, consider the following instructions. |
||
| 234 | /// |
||
| 235 | /// \code |
||
| 236 | /// %add1 = add i32 %x1, %y1 |
||
| 237 | /// %add2 = add i32 %x2, %y2 |
||
| 238 | /// |
||
| 239 | /// %sub = sub i32 %x1, %y1 |
||
| 240 | /// |
||
| 241 | /// %add_i64 = add i64 %x2, %y2 |
||
| 242 | /// \endcode |
||
| 243 | /// |
||
| 244 | /// Because the first two adds operate the same types, and are performing the |
||
| 245 | /// same action, they will be hashed to the same value. |
||
| 246 | /// |
||
| 247 | /// However, the subtraction instruction is not the same as an addition, and |
||
| 248 | /// will be hashed to a different value. |
||
| 249 | /// |
||
| 250 | /// Finally, the last add has a different type compared to the first two add |
||
| 251 | /// instructions, so it will also be hashed to a different value that any of |
||
| 252 | /// the previous instructions. |
||
| 253 | /// |
||
| 254 | /// \param [in] ID - The IRInstructionData instance to be hashed. |
||
| 255 | /// \returns A hash_value of the IRInstructionData. |
||
| 256 | friend hash_code hash_value(const IRInstructionData &ID) { |
||
| 257 | SmallVector<Type *, 4> OperTypes; |
||
| 258 | for (Value *V : ID.OperVals) |
||
| 259 | OperTypes.push_back(V->getType()); |
||
| 260 | |||
| 261 | if (isa<CmpInst>(ID.Inst)) |
||
| 262 | return llvm::hash_combine( |
||
| 263 | llvm::hash_value(ID.Inst->getOpcode()), |
||
| 264 | llvm::hash_value(ID.Inst->getType()), |
||
| 265 | llvm::hash_value(ID.getPredicate()), |
||
| 266 | llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); |
||
| 267 | |||
| 268 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(ID.Inst)) { |
||
| 269 | // To hash intrinsics, we use the opcode, and types like the other |
||
| 270 | // instructions, but also, the Intrinsic ID, and the Name of the |
||
| 271 | // intrinsic. |
||
| 272 | Intrinsic::ID IntrinsicID = II->getIntrinsicID(); |
||
| 273 | return llvm::hash_combine( |
||
| 274 | llvm::hash_value(ID.Inst->getOpcode()), |
||
| 275 | llvm::hash_value(ID.Inst->getType()), llvm::hash_value(IntrinsicID), |
||
| 276 | llvm::hash_value(*ID.CalleeName), |
||
| 277 | llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); |
||
| 278 | } |
||
| 279 | |||
| 280 | if (isa<CallInst>(ID.Inst)) { |
||
| 281 | std::string FunctionName = *ID.CalleeName; |
||
| 282 | return llvm::hash_combine( |
||
| 283 | llvm::hash_value(ID.Inst->getOpcode()), |
||
| 284 | llvm::hash_value(ID.Inst->getType()), |
||
| 285 | llvm::hash_value(ID.Inst->getType()), llvm::hash_value(FunctionName), |
||
| 286 | llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); |
||
| 287 | } |
||
| 288 | |||
| 289 | return llvm::hash_combine( |
||
| 290 | llvm::hash_value(ID.Inst->getOpcode()), |
||
| 291 | llvm::hash_value(ID.Inst->getType()), |
||
| 292 | llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); |
||
| 293 | } |
||
| 294 | |||
| 295 | IRInstructionDataList *IDL = nullptr; |
||
| 296 | }; |
||
| 297 | |||
| 298 | struct IRInstructionDataList |
||
| 299 | : simple_ilist<IRInstructionData, ilist_sentinel_tracking<true>> {}; |
||
| 300 | |||
| 301 | /// Compare one IRInstructionData class to another IRInstructionData class for |
||
| 302 | /// whether they are performing a the same operation, and can mapped to the |
||
| 303 | /// same value. For regular instructions if the hash value is the same, then |
||
| 304 | /// they will also be close. |
||
| 305 | /// |
||
| 306 | /// \param A - The first IRInstructionData class to compare |
||
| 307 | /// \param B - The second IRInstructionData class to compare |
||
| 308 | /// \returns true if \p A and \p B are similar enough to be mapped to the same |
||
| 309 | /// value. |
||
| 310 | bool isClose(const IRInstructionData &A, const IRInstructionData &B); |
||
| 311 | |||
| 312 | struct IRInstructionDataTraits : DenseMapInfo<IRInstructionData *> { |
||
| 313 | static inline IRInstructionData *getEmptyKey() { return nullptr; } |
||
| 314 | static inline IRInstructionData *getTombstoneKey() { |
||
| 315 | return reinterpret_cast<IRInstructionData *>(-1); |
||
| 316 | } |
||
| 317 | |||
| 318 | static unsigned getHashValue(const IRInstructionData *E) { |
||
| 319 | using llvm::hash_value; |
||
| 320 | assert(E && "IRInstructionData is a nullptr?"); |
||
| 321 | return hash_value(*E); |
||
| 322 | } |
||
| 323 | |||
| 324 | static bool isEqual(const IRInstructionData *LHS, |
||
| 325 | const IRInstructionData *RHS) { |
||
| 326 | if (RHS == getEmptyKey() || RHS == getTombstoneKey() || |
||
| 327 | LHS == getEmptyKey() || LHS == getTombstoneKey()) |
||
| 328 | return LHS == RHS; |
||
| 329 | |||
| 330 | assert(LHS && RHS && "nullptr should have been caught by getEmptyKey?"); |
||
| 331 | return isClose(*LHS, *RHS); |
||
| 332 | } |
||
| 333 | }; |
||
| 334 | |||
| 335 | /// Helper struct for converting the Instructions in a Module into a vector of |
||
| 336 | /// unsigned integers. This vector of unsigned integers can be thought of as a |
||
| 337 | /// "numeric string". This numeric string can then be queried by, for example, |
||
| 338 | /// data structures that find repeated substrings. |
||
| 339 | /// |
||
| 340 | /// This hashing is done per BasicBlock in the module. To hash Instructions |
||
| 341 | /// based off of their operations, each Instruction is wrapped in an |
||
| 342 | /// IRInstructionData struct. The unsigned integer for an IRInstructionData |
||
| 343 | /// depends on: |
||
| 344 | /// - The hash provided by the IRInstructionData. |
||
| 345 | /// - Which member of InstrType the IRInstructionData is classified as. |
||
| 346 | // See InstrType for more details on the possible classifications, and how they |
||
| 347 | // manifest in the numeric string. |
||
| 348 | /// |
||
| 349 | /// The numeric string for an individual BasicBlock is terminated by an unique |
||
| 350 | /// unsigned integer. This prevents data structures which rely on repetition |
||
| 351 | /// from matching across BasicBlocks. (For example, the SuffixTree.) |
||
| 352 | /// As a concrete example, if we have the following two BasicBlocks: |
||
| 353 | /// \code |
||
| 354 | /// bb0: |
||
| 355 | /// %add1 = add i32 %a, %b |
||
| 356 | /// %add2 = add i32 %c, %d |
||
| 357 | /// %add3 = add i64 %e, %f |
||
| 358 | /// bb1: |
||
| 359 | /// %sub = sub i32 %c, %d |
||
| 360 | /// \endcode |
||
| 361 | /// We may hash the Instructions like this (via IRInstructionData): |
||
| 362 | /// \code |
||
| 363 | /// bb0: |
||
| 364 | /// %add1 = add i32 %a, %b ; Hash: 1 |
||
| 365 | /// %add2 = add i32 %c, %d; Hash: 1 |
||
| 366 | /// %add3 = add i64 %e, %f; Hash: 2 |
||
| 367 | /// bb1: |
||
| 368 | /// %sub = sub i32 %c, %d; Hash: 3 |
||
| 369 | /// %add4 = add i32 %c, %d ; Hash: 1 |
||
| 370 | /// \endcode |
||
| 371 | /// And produce a "numeric string representation" like so: |
||
| 372 | /// 1, 1, 2, unique_integer_1, 3, 1, unique_integer_2 |
||
| 373 | /// |
||
| 374 | /// TODO: This is very similar to the MachineOutliner, and should be |
||
| 375 | /// consolidated into the same interface. |
||
| 376 | struct IRInstructionMapper { |
||
| 377 | /// The starting illegal instruction number to map to. |
||
| 378 | /// |
||
| 379 | /// Set to -3 for compatibility with DenseMapInfo<unsigned>. |
||
| 380 | unsigned IllegalInstrNumber = static_cast<unsigned>(-3); |
||
| 381 | |||
| 382 | /// The next available integer to assign to a legal Instruction to. |
||
| 383 | unsigned LegalInstrNumber = 0; |
||
| 384 | |||
| 385 | /// Correspondence from IRInstructionData to unsigned integers. |
||
| 386 | DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits> |
||
| 387 | InstructionIntegerMap; |
||
| 388 | |||
| 389 | /// A mapping for a basic block in a module to its assigned number/location |
||
| 390 | /// in the module. |
||
| 391 | DenseMap<BasicBlock *, unsigned> BasicBlockToInteger; |
||
| 392 | |||
| 393 | /// Set if we added an illegal number in the previous step. |
||
| 394 | /// Since each illegal number is unique, we only need one of them between |
||
| 395 | /// each range of legal numbers. This lets us make sure we don't add more |
||
| 396 | /// than one illegal number per range. |
||
| 397 | bool AddedIllegalLastTime = false; |
||
| 398 | |||
| 399 | /// Marks whether we found a illegal instruction in the previous step. |
||
| 400 | bool CanCombineWithPrevInstr = false; |
||
| 401 | |||
| 402 | /// Marks whether we have found a set of instructions that is long enough |
||
| 403 | /// to be considered for similarity. |
||
| 404 | bool HaveLegalRange = false; |
||
| 405 | |||
| 406 | /// Marks whether we should use exact function names, as well as types to |
||
| 407 | /// find similarity between calls. |
||
| 408 | bool EnableMatchCallsByName = false; |
||
| 409 | |||
| 410 | /// This allocator pointer is in charge of holding on to the IRInstructionData |
||
| 411 | /// so it is not deallocated until whatever external tool is using it is done |
||
| 412 | /// with the information. |
||
| 413 | SpecificBumpPtrAllocator<IRInstructionData> *InstDataAllocator = nullptr; |
||
| 414 | |||
| 415 | /// This allocator pointer is in charge of creating the IRInstructionDataList |
||
| 416 | /// so it is not deallocated until whatever external tool is using it is done |
||
| 417 | /// with the information. |
||
| 418 | SpecificBumpPtrAllocator<IRInstructionDataList> *IDLAllocator = nullptr; |
||
| 419 | |||
| 420 | /// Get an allocated IRInstructionData struct using the InstDataAllocator. |
||
| 421 | /// |
||
| 422 | /// \param I - The Instruction to wrap with IRInstructionData. |
||
| 423 | /// \param Legality - A boolean value that is true if the instruction is to |
||
| 424 | /// be considered for similarity, and false if not. |
||
| 425 | /// \param IDL - The InstructionDataList that the IRInstructionData is |
||
| 426 | /// inserted into. |
||
| 427 | /// \returns An allocated IRInstructionData struct. |
||
| 428 | IRInstructionData *allocateIRInstructionData(Instruction &I, bool Legality, |
||
| 429 | IRInstructionDataList &IDL); |
||
| 430 | |||
| 431 | /// Get an empty allocated IRInstructionData struct using the |
||
| 432 | /// InstDataAllocator. |
||
| 433 | /// |
||
| 434 | /// \param IDL - The InstructionDataList that the IRInstructionData is |
||
| 435 | /// inserted into. |
||
| 436 | /// \returns An allocated IRInstructionData struct. |
||
| 437 | IRInstructionData *allocateIRInstructionData(IRInstructionDataList &IDL); |
||
| 438 | |||
| 439 | /// Get an allocated IRInstructionDataList object using the IDLAllocator. |
||
| 440 | /// |
||
| 441 | /// \returns An allocated IRInstructionDataList object. |
||
| 442 | IRInstructionDataList *allocateIRInstructionDataList(); |
||
| 443 | |||
| 444 | IRInstructionDataList *IDL = nullptr; |
||
| 445 | |||
| 446 | /// Assigns values to all the basic blocks in function \p F starting from |
||
| 447 | /// integer \p BBNumber. |
||
| 448 | /// |
||
| 449 | /// \param F - The function containing the basic blocks to assign numbers to. |
||
| 450 | /// \param BBNumber - The number to start from. |
||
| 451 | void initializeForBBs(Function &F, unsigned &BBNumber) { |
||
| 452 | for (BasicBlock &BB : F) |
||
| 453 | BasicBlockToInteger.insert(std::make_pair(&BB, BBNumber++)); |
||
| 454 | } |
||
| 455 | |||
| 456 | /// Assigns values to all the basic blocks in Module \p M. |
||
| 457 | /// \param M - The module containing the basic blocks to assign numbers to. |
||
| 458 | void initializeForBBs(Module &M) { |
||
| 459 | unsigned BBNumber = 0; |
||
| 460 | for (Function &F : M) |
||
| 461 | initializeForBBs(F, BBNumber); |
||
| 462 | } |
||
| 463 | |||
| 464 | /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers |
||
| 465 | /// determined by \p InstrType. Two Instructions are mapped to the same value |
||
| 466 | /// if they are close as defined by the InstructionData class above. |
||
| 467 | /// |
||
| 468 | /// \param [in] BB - The BasicBlock to be mapped to integers. |
||
| 469 | /// \param [in,out] InstrList - Vector of IRInstructionData to append to. |
||
| 470 | /// \param [in,out] IntegerMapping - Vector of unsigned integers to append to. |
||
| 471 | void convertToUnsignedVec(BasicBlock &BB, |
||
| 472 | std::vector<IRInstructionData *> &InstrList, |
||
| 473 | std::vector<unsigned> &IntegerMapping); |
||
| 474 | |||
| 475 | /// Maps an Instruction to a legal integer. |
||
| 476 | /// |
||
| 477 | /// \param [in] It - The Instruction to be mapped to an integer. |
||
| 478 | /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to |
||
| 479 | /// append to. |
||
| 480 | /// \param [in,out] InstrListForBB - Vector of InstructionData to append to. |
||
| 481 | /// \returns The integer \p It was mapped to. |
||
| 482 | unsigned mapToLegalUnsigned(BasicBlock::iterator &It, |
||
| 483 | std::vector<unsigned> &IntegerMappingForBB, |
||
| 484 | std::vector<IRInstructionData *> &InstrListForBB); |
||
| 485 | |||
| 486 | /// Maps an Instruction to an illegal integer. |
||
| 487 | /// |
||
| 488 | /// \param [in] It - The \p Instruction to be mapped to an integer. |
||
| 489 | /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to |
||
| 490 | /// append to. |
||
| 491 | /// \param [in,out] InstrListForBB - Vector of IRInstructionData to append to. |
||
| 492 | /// \param End - true if creating a dummy IRInstructionData at the end of a |
||
| 493 | /// basic block. |
||
| 494 | /// \returns The integer \p It was mapped to. |
||
| 495 | unsigned mapToIllegalUnsigned( |
||
| 496 | BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, |
||
| 497 | std::vector<IRInstructionData *> &InstrListForBB, bool End = false); |
||
| 498 | |||
| 499 | IRInstructionMapper(SpecificBumpPtrAllocator<IRInstructionData> *IDA, |
||
| 500 | SpecificBumpPtrAllocator<IRInstructionDataList> *IDLA) |
||
| 501 | : InstDataAllocator(IDA), IDLAllocator(IDLA) { |
||
| 502 | // Make sure that the implementation of DenseMapInfo<unsigned> hasn't |
||
| 503 | // changed. |
||
| 504 | assert(DenseMapInfo<unsigned>::getEmptyKey() == static_cast<unsigned>(-1) && |
||
| 505 | "DenseMapInfo<unsigned>'s empty key isn't -1!"); |
||
| 506 | assert(DenseMapInfo<unsigned>::getTombstoneKey() == |
||
| 507 | static_cast<unsigned>(-2) && |
||
| 508 | "DenseMapInfo<unsigned>'s tombstone key isn't -2!"); |
||
| 509 | |||
| 510 | IDL = new (IDLAllocator->Allocate()) |
||
| 511 | IRInstructionDataList(); |
||
| 512 | } |
||
| 513 | |||
| 514 | /// Custom InstVisitor to classify different instructions for whether it can |
||
| 515 | /// be analyzed for similarity. |
||
| 516 | struct InstructionClassification |
||
| 517 | : public InstVisitor<InstructionClassification, InstrType> { |
||
| 518 | InstructionClassification() = default; |
||
| 519 | |||
| 520 | // TODO: Determine a scheme to resolve when the label is similar enough. |
||
| 521 | InstrType visitBranchInst(BranchInst &BI) { |
||
| 522 | if (EnableBranches) |
||
| 523 | return Legal; |
||
| 524 | return Illegal; |
||
| 525 | } |
||
| 526 | InstrType visitPHINode(PHINode &PN) { |
||
| 527 | if (EnableBranches) |
||
| 528 | return Legal; |
||
| 529 | return Illegal; |
||
| 530 | } |
||
| 531 | // TODO: Handle allocas. |
||
| 532 | InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; } |
||
| 533 | // We exclude variable argument instructions since variable arguments |
||
| 534 | // requires extra checking of the argument list. |
||
| 535 | InstrType visitVAArgInst(VAArgInst &VI) { return Illegal; } |
||
| 536 | // We exclude all exception handling cases since they are so context |
||
| 537 | // dependent. |
||
| 538 | InstrType visitLandingPadInst(LandingPadInst &LPI) { return Illegal; } |
||
| 539 | InstrType visitFuncletPadInst(FuncletPadInst &FPI) { return Illegal; } |
||
| 540 | // DebugInfo should be included in the regions, but should not be |
||
| 541 | // analyzed for similarity as it has no bearing on the outcome of the |
||
| 542 | // program. |
||
| 543 | InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; } |
||
| 544 | InstrType visitIntrinsicInst(IntrinsicInst &II) { |
||
| 545 | // These are disabled due to complications in the CodeExtractor when |
||
| 546 | // outlining these instructions. For instance, It is unclear what we |
||
| 547 | // should do when moving only the start or end lifetime instruction into |
||
| 548 | // an outlined function. Also, assume-like intrinsics could be removed |
||
| 549 | // from the region, removing arguments, causing discrepencies in the |
||
| 550 | // number of inputs between different regions. |
||
| 551 | if (II.isAssumeLikeIntrinsic()) |
||
| 552 | return Illegal; |
||
| 553 | return EnableIntrinsics ? Legal : Illegal; |
||
| 554 | } |
||
| 555 | // We only allow call instructions where the function has a name and |
||
| 556 | // is not an indirect call. |
||
| 557 | InstrType visitCallInst(CallInst &CI) { |
||
| 558 | Function *F = CI.getCalledFunction(); |
||
| 559 | bool IsIndirectCall = CI.isIndirectCall(); |
||
| 560 | if (IsIndirectCall && !EnableIndirectCalls) |
||
| 561 | return Illegal; |
||
| 562 | if (!F && !IsIndirectCall) |
||
| 563 | return Illegal; |
||
| 564 | // Functions marked with the swifttailcc and tailcc calling conventions |
||
| 565 | // require special handling when outlining musttail functions. The |
||
| 566 | // calling convention must be passed down to the outlined function as |
||
| 567 | // well. Further, there is special handling for musttail calls as well, |
||
| 568 | // requiring a return call directly after. For now, the outliner does not |
||
| 569 | // support this, so we do not handle matching this case either. |
||
| 570 | if ((CI.getCallingConv() == CallingConv::SwiftTail || |
||
| 571 | CI.getCallingConv() == CallingConv::Tail) && |
||
| 572 | !EnableMustTailCalls) |
||
| 573 | return Illegal; |
||
| 574 | if (CI.isMustTailCall() && !EnableMustTailCalls) |
||
| 575 | return Illegal; |
||
| 576 | return Legal; |
||
| 577 | } |
||
| 578 | // TODO: We do not current handle similarity that changes the control flow. |
||
| 579 | InstrType visitInvokeInst(InvokeInst &II) { return Illegal; } |
||
| 580 | // TODO: We do not current handle similarity that changes the control flow. |
||
| 581 | InstrType visitCallBrInst(CallBrInst &CBI) { return Illegal; } |
||
| 582 | // TODO: Handle interblock similarity. |
||
| 583 | InstrType visitTerminator(Instruction &I) { return Illegal; } |
||
| 584 | InstrType visitInstruction(Instruction &I) { return Legal; } |
||
| 585 | |||
| 586 | // The flag variable that lets the classifier know whether we should |
||
| 587 | // allow branches to be checked for similarity. |
||
| 588 | bool EnableBranches = false; |
||
| 589 | |||
| 590 | // The flag variable that lets the classifier know whether we should |
||
| 591 | // allow indirect calls to be considered legal instructions. |
||
| 592 | bool EnableIndirectCalls = false; |
||
| 593 | |||
| 594 | // Flag that lets the classifier know whether we should allow intrinsics to |
||
| 595 | // be checked for similarity. |
||
| 596 | bool EnableIntrinsics = false; |
||
| 597 | |||
| 598 | // Flag that lets the classifier know whether we should allow tail calls to |
||
| 599 | // be checked for similarity. |
||
| 600 | bool EnableMustTailCalls = false; |
||
| 601 | }; |
||
| 602 | |||
| 603 | /// Maps an Instruction to a member of InstrType. |
||
| 604 | InstructionClassification InstClassifier; |
||
| 605 | }; |
||
| 606 | |||
| 607 | /// This is a class that wraps a range of IRInstructionData from one point to |
||
| 608 | /// another in the vector of IRInstructionData, which is a region of the |
||
| 609 | /// program. It is also responsible for defining the structure within this |
||
| 610 | /// region of instructions. |
||
| 611 | /// |
||
| 612 | /// The structure of a region is defined through a value numbering system |
||
| 613 | /// assigned to each unique value in a region at the creation of the |
||
| 614 | /// IRSimilarityCandidate. |
||
| 615 | /// |
||
| 616 | /// For example, for each Instruction we add a mapping for each new |
||
| 617 | /// value seen in that Instruction. |
||
| 618 | /// IR: Mapping Added: |
||
| 619 | /// %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 |
||
| 620 | /// %add2 = add i32 %a, %1 %add2 -> 4 |
||
| 621 | /// %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 |
||
| 622 | /// |
||
| 623 | /// We can compare IRSimilarityCandidates against one another. |
||
| 624 | /// The \ref isSimilar function compares each IRInstructionData against one |
||
| 625 | /// another and if we have the same sequences of IRInstructionData that would |
||
| 626 | /// create the same hash, we have similar IRSimilarityCandidates. |
||
| 627 | /// |
||
| 628 | /// We can also compare the structure of IRSimilarityCandidates. If we can |
||
| 629 | /// create a mapping of registers in the region contained by one |
||
| 630 | /// IRSimilarityCandidate to the region contained by different |
||
| 631 | /// IRSimilarityCandidate, they can be considered structurally similar. |
||
| 632 | /// |
||
| 633 | /// IRSimilarityCandidate1: IRSimilarityCandidate2: |
||
| 634 | /// %add1 = add i32 %a, %b %add1 = add i32 %d, %e |
||
| 635 | /// %add2 = add i32 %a, %c %add2 = add i32 %d, %f |
||
| 636 | /// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4 |
||
| 637 | /// |
||
| 638 | /// Can have the following mapping from candidate to candidate of: |
||
| 639 | /// %a -> %d, %b -> %e, %c -> %f, c1 -> c3, c2 -> c4 |
||
| 640 | /// and can be considered similar. |
||
| 641 | /// |
||
| 642 | /// IRSimilarityCandidate1: IRSimilarityCandidate2: |
||
| 643 | /// %add1 = add i32 %a, %b %add1 = add i32 %d, c4 |
||
| 644 | /// %add2 = add i32 %a, %c %add2 = add i32 %d, %f |
||
| 645 | /// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4 |
||
| 646 | /// |
||
| 647 | /// We cannot create the same mapping since the use of c4 is not used in the |
||
| 648 | /// same way as %b or c2. |
||
| 649 | class IRSimilarityCandidate { |
||
| 650 | private: |
||
| 651 | /// The start index of this IRSimilarityCandidate in the instruction list. |
||
| 652 | unsigned StartIdx = 0; |
||
| 653 | |||
| 654 | /// The number of instructions in this IRSimilarityCandidate. |
||
| 655 | unsigned Len = 0; |
||
| 656 | |||
| 657 | /// The first instruction in this IRSimilarityCandidate. |
||
| 658 | IRInstructionData *FirstInst = nullptr; |
||
| 659 | |||
| 660 | /// The last instruction in this IRSimilarityCandidate. |
||
| 661 | IRInstructionData *LastInst = nullptr; |
||
| 662 | |||
| 663 | /// Global Value Numbering structures |
||
| 664 | /// @{ |
||
| 665 | /// Stores the mapping of the value to the number assigned to it in the |
||
| 666 | /// IRSimilarityCandidate. |
||
| 667 | DenseMap<Value *, unsigned> ValueToNumber; |
||
| 668 | /// Stores the mapping of the number to the value assigned this number. |
||
| 669 | DenseMap<unsigned, Value *> NumberToValue; |
||
| 670 | /// Stores the mapping of a value's number to canonical numbering in the |
||
| 671 | /// candidate's respective similarity group. |
||
| 672 | DenseMap<unsigned, unsigned> NumberToCanonNum; |
||
| 673 | /// Stores the mapping of canonical number in the candidate's respective |
||
| 674 | /// similarity group to a value number. |
||
| 675 | DenseMap<unsigned, unsigned> CanonNumToNumber; |
||
| 676 | /// @} |
||
| 677 | |||
| 678 | public: |
||
| 679 | /// \param StartIdx - The starting location of the region. |
||
| 680 | /// \param Len - The length of the region. |
||
| 681 | /// \param FirstInstIt - The starting IRInstructionData of the region. |
||
| 682 | /// \param LastInstIt - The ending IRInstructionData of the region. |
||
| 683 | IRSimilarityCandidate(unsigned StartIdx, unsigned Len, |
||
| 684 | IRInstructionData *FirstInstIt, |
||
| 685 | IRInstructionData *LastInstIt); |
||
| 686 | |||
| 687 | /// \param A - The first IRInstructionCandidate to compare. |
||
| 688 | /// \param B - The second IRInstructionCandidate to compare. |
||
| 689 | /// \returns True when every IRInstructionData in \p A is similar to every |
||
| 690 | /// IRInstructionData in \p B. |
||
| 691 | static bool isSimilar(const IRSimilarityCandidate &A, |
||
| 692 | const IRSimilarityCandidate &B); |
||
| 693 | |||
| 694 | /// \param [in] A - The first IRInstructionCandidate to compare. |
||
| 695 | /// \param [in] B - The second IRInstructionCandidate to compare. |
||
| 696 | /// \returns True when every IRInstructionData in \p A is structurally similar |
||
| 697 | /// to \p B. |
||
| 698 | static bool compareStructure(const IRSimilarityCandidate &A, |
||
| 699 | const IRSimilarityCandidate &B); |
||
| 700 | |||
| 701 | /// \param [in] A - The first IRInstructionCandidate to compare. |
||
| 702 | /// \param [in] B - The second IRInstructionCandidate to compare. |
||
| 703 | /// \param [in,out] ValueNumberMappingA - A mapping of value numbers from |
||
| 704 | /// candidate \p A to candidate \B. |
||
| 705 | /// \param [in,out] ValueNumberMappingB - A mapping of value numbers from |
||
| 706 | /// candidate \p B to candidate \A. |
||
| 707 | /// \returns True when every IRInstructionData in \p A is structurally similar |
||
| 708 | /// to \p B. |
||
| 709 | static bool |
||
| 710 | compareStructure(const IRSimilarityCandidate &A, |
||
| 711 | const IRSimilarityCandidate &B, |
||
| 712 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, |
||
| 713 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB); |
||
| 714 | |||
| 715 | struct OperandMapping { |
||
| 716 | /// The IRSimilarityCandidate that holds the instruction the OperVals were |
||
| 717 | /// pulled from. |
||
| 718 | const IRSimilarityCandidate &IRSC; |
||
| 719 | |||
| 720 | /// The operand values to be analyzed. |
||
| 721 | ArrayRef<Value *> &OperVals; |
||
| 722 | |||
| 723 | /// The current mapping of global value numbers from one IRSimilarityCandidate |
||
| 724 | /// to another IRSimilarityCandidate. |
||
| 725 | DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMapping; |
||
| 726 | }; |
||
| 727 | |||
| 728 | /// A helper struct to hold the candidate, for a branch instruction, the |
||
| 729 | /// relative location of a label, and the label itself. This is mostly to |
||
| 730 | /// group the values together before passing them as a bundle to a function. |
||
| 731 | struct RelativeLocMapping { |
||
| 732 | /// The IRSimilarityCandidate that holds the instruction the relative |
||
| 733 | /// location was pulled from. |
||
| 734 | const IRSimilarityCandidate &IRSC; |
||
| 735 | |||
| 736 | /// The relative location to be analyzed. |
||
| 737 | int RelativeLocation; |
||
| 738 | |||
| 739 | /// The corresponding value. |
||
| 740 | Value *OperVal; |
||
| 741 | }; |
||
| 742 | |||
| 743 | /// Compare the operands in \p A and \p B and check that the current mapping |
||
| 744 | /// of global value numbers from \p A to \p B and \p B to \A is consistent. |
||
| 745 | /// |
||
| 746 | /// \param A - The first IRInstructionCandidate, operand values, and current |
||
| 747 | /// operand mappings to compare. |
||
| 748 | /// \param B - The second IRInstructionCandidate, operand values, and current |
||
| 749 | /// operand mappings to compare. |
||
| 750 | /// \returns true if the IRSimilarityCandidates operands are compatible. |
||
| 751 | static bool compareNonCommutativeOperandMapping(OperandMapping A, |
||
| 752 | OperandMapping B); |
||
| 753 | |||
| 754 | /// Compare the operands in \p A and \p B and check that the current mapping |
||
| 755 | /// of global value numbers from \p A to \p B and \p B to \A is consistent |
||
| 756 | /// given that the operands are commutative. |
||
| 757 | /// |
||
| 758 | /// \param A - The first IRInstructionCandidate, operand values, and current |
||
| 759 | /// operand mappings to compare. |
||
| 760 | /// \param B - The second IRInstructionCandidate, operand values, and current |
||
| 761 | /// operand mappings to compare. |
||
| 762 | /// \returns true if the IRSimilarityCandidates operands are compatible. |
||
| 763 | static bool compareCommutativeOperandMapping(OperandMapping A, |
||
| 764 | OperandMapping B); |
||
| 765 | |||
| 766 | /// Compare the relative locations in \p A and \p B and check that the |
||
| 767 | /// distances match if both locations are contained in the region, and that |
||
| 768 | /// the branches both point outside the region if they do not. |
||
| 769 | /// Example Region: |
||
| 770 | /// \code |
||
| 771 | /// entry: |
||
| 772 | /// br i1 %0, label %block_1, label %block_3 |
||
| 773 | /// block_0: |
||
| 774 | /// br i1 %0, label %block_1, label %block_2 |
||
| 775 | /// block_1: |
||
| 776 | /// br i1 %0, label %block_2, label %block_3 |
||
| 777 | /// block_2: |
||
| 778 | /// br i1 %1, label %block_1, label %block_4 |
||
| 779 | /// block_3: |
||
| 780 | /// br i1 %2, label %block_2, label %block_5 |
||
| 781 | /// \endcode |
||
| 782 | /// If we compare the branches in block_0 and block_1 the relative values are |
||
| 783 | /// 1 and 2 for both, so we consider this a match. |
||
| 784 | /// |
||
| 785 | /// If we compare the branches in entry and block_0 the relative values are |
||
| 786 | /// 2 and 3, and 1 and 2 respectively. Since these are not the same we do not |
||
| 787 | /// consider them a match. |
||
| 788 | /// |
||
| 789 | /// If we compare the branches in block_1 and block_2 the relative values are |
||
| 790 | /// 1 and 2, and -1 and None respectively. As a result we do not consider |
||
| 791 | /// these to be the same |
||
| 792 | /// |
||
| 793 | /// If we compare the branches in block_2 and block_3 the relative values are |
||
| 794 | /// -1 and None for both. We do consider these to be a match. |
||
| 795 | /// |
||
| 796 | /// \param A - The first IRInstructionCandidate, relative location value, |
||
| 797 | /// and incoming block. |
||
| 798 | /// \param B - The second IRInstructionCandidate, relative location value, |
||
| 799 | /// and incoming block. |
||
| 800 | /// \returns true if the relative locations match. |
||
| 801 | static bool checkRelativeLocations(RelativeLocMapping A, |
||
| 802 | RelativeLocMapping B); |
||
| 803 | |||
| 804 | /// Create a mapping from the value numbering to a different separate set of |
||
| 805 | /// numbers. This will serve as a guide for relating one candidate to another. |
||
| 806 | /// The canonical number gives use the ability identify which global value |
||
| 807 | /// number in one candidate relates to the global value number in the other. |
||
| 808 | /// |
||
| 809 | /// \param [in, out] CurrCand - The IRSimilarityCandidate to create a |
||
| 810 | /// canonical numbering for. |
||
| 811 | static void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand); |
||
| 812 | |||
| 813 | /// Create a mapping for the value numbering of the calling |
||
| 814 | /// IRSimilarityCandidate, to a different separate set of numbers, based on |
||
| 815 | /// the canonical ordering in \p SourceCand. These are defined based on the |
||
| 816 | /// found mappings in \p ToSourceMapping and \p FromSourceMapping. Both of |
||
| 817 | /// these relationships should have the same information, just in opposite |
||
| 818 | /// directions. |
||
| 819 | /// |
||
| 820 | /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a |
||
| 821 | /// canonical numbering from. |
||
| 822 | /// \param ToSourceMapping - The mapping of value numbers from this candidate |
||
| 823 | /// to \p SourceCand. |
||
| 824 | /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand |
||
| 825 | /// to this candidate. |
||
| 826 | void createCanonicalRelationFrom( |
||
| 827 | IRSimilarityCandidate &SourceCand, |
||
| 828 | DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping, |
||
| 829 | DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping); |
||
| 830 | |||
| 831 | /// \param [in,out] BBSet - The set to track the basic blocks. |
||
| 832 | void getBasicBlocks(DenseSet<BasicBlock *> &BBSet) const { |
||
| 833 | for (IRInstructionData &ID : *this) { |
||
| 834 | BasicBlock *BB = ID.Inst->getParent(); |
||
| 835 | BBSet.insert(BB); |
||
| 836 | } |
||
| 837 | } |
||
| 838 | |||
| 839 | /// \param [in,out] BBSet - The set to track the basic blocks. |
||
| 840 | /// \param [in,out] BBList - A list in order of use to track the basic blocks. |
||
| 841 | void getBasicBlocks(DenseSet<BasicBlock *> &BBSet, |
||
| 842 | SmallVector<BasicBlock *> &BBList) const { |
||
| 843 | for (IRInstructionData &ID : *this) { |
||
| 844 | BasicBlock *BB = ID.Inst->getParent(); |
||
| 845 | if (BBSet.insert(BB).second) |
||
| 846 | BBList.push_back(BB); |
||
| 847 | } |
||
| 848 | } |
||
| 849 | |||
| 850 | /// Compare the start and end indices of the two IRSimilarityCandidates for |
||
| 851 | /// whether they overlap. If the start instruction of one |
||
| 852 | /// IRSimilarityCandidate is less than the end instruction of the other, and |
||
| 853 | /// the start instruction of one is greater than the start instruction of the |
||
| 854 | /// other, they overlap. |
||
| 855 | /// |
||
| 856 | /// \returns true if the IRSimilarityCandidates do not have overlapping |
||
| 857 | /// instructions. |
||
| 858 | static bool overlap(const IRSimilarityCandidate &A, |
||
| 859 | const IRSimilarityCandidate &B); |
||
| 860 | |||
| 861 | /// \returns the number of instructions in this Candidate. |
||
| 862 | unsigned getLength() const { return Len; } |
||
| 863 | |||
| 864 | /// \returns the start index of this IRSimilarityCandidate. |
||
| 865 | unsigned getStartIdx() const { return StartIdx; } |
||
| 866 | |||
| 867 | /// \returns the end index of this IRSimilarityCandidate. |
||
| 868 | unsigned getEndIdx() const { return StartIdx + Len - 1; } |
||
| 869 | |||
| 870 | /// \returns The first IRInstructionData. |
||
| 871 | IRInstructionData *front() const { return FirstInst; } |
||
| 872 | /// \returns The last IRInstructionData. |
||
| 873 | IRInstructionData *back() const { return LastInst; } |
||
| 874 | |||
| 875 | /// \returns The first Instruction. |
||
| 876 | Instruction *frontInstruction() { return FirstInst->Inst; } |
||
| 877 | /// \returns The last Instruction |
||
| 878 | Instruction *backInstruction() { return LastInst->Inst; } |
||
| 879 | |||
| 880 | /// \returns The BasicBlock the IRSimilarityCandidate starts in. |
||
| 881 | BasicBlock *getStartBB() { return FirstInst->Inst->getParent(); } |
||
| 882 | /// \returns The BasicBlock the IRSimilarityCandidate ends in. |
||
| 883 | BasicBlock *getEndBB() { return LastInst->Inst->getParent(); } |
||
| 884 | |||
| 885 | /// \returns The Function that the IRSimilarityCandidate is located in. |
||
| 886 | Function *getFunction() { return getStartBB()->getParent(); } |
||
| 887 | |||
| 888 | /// Finds the positive number associated with \p V if it has been mapped. |
||
| 889 | /// \param [in] V - the Value to find. |
||
| 890 | /// \returns The positive number corresponding to the value. |
||
| 891 | /// \returns std::nullopt if not present. |
||
| 892 | std::optional<unsigned> getGVN(Value *V) { |
||
| 893 | assert(V != nullptr && "Value is a nullptr?"); |
||
| 894 | DenseMap<Value *, unsigned>::iterator VNIt = ValueToNumber.find(V); |
||
| 895 | if (VNIt == ValueToNumber.end()) |
||
| 896 | return std::nullopt; |
||
| 897 | return VNIt->second; |
||
| 898 | } |
||
| 899 | |||
| 900 | /// Finds the Value associate with \p Num if it exists. |
||
| 901 | /// \param [in] Num - the number to find. |
||
| 902 | /// \returns The Value associated with the number. |
||
| 903 | /// \returns std::nullopt if not present. |
||
| 904 | std::optional<Value *> fromGVN(unsigned Num) { |
||
| 905 | DenseMap<unsigned, Value *>::iterator VNIt = NumberToValue.find(Num); |
||
| 906 | if (VNIt == NumberToValue.end()) |
||
| 907 | return std::nullopt; |
||
| 908 | assert(VNIt->second != nullptr && "Found value is a nullptr!"); |
||
| 909 | return VNIt->second; |
||
| 910 | } |
||
| 911 | |||
| 912 | /// Find the canonical number from the global value number \p N stored in the |
||
| 913 | /// candidate. |
||
| 914 | /// |
||
| 915 | /// \param N - The global value number to find the canonical number for. |
||
| 916 | /// \returns An optional containing the value, and std::nullopt if it could |
||
| 917 | /// not be found. |
||
| 918 | std::optional<unsigned> getCanonicalNum(unsigned N) { |
||
| 919 | DenseMap<unsigned, unsigned>::iterator NCIt = NumberToCanonNum.find(N); |
||
| 920 | if (NCIt == NumberToCanonNum.end()) |
||
| 921 | return std::nullopt; |
||
| 922 | return NCIt->second; |
||
| 923 | } |
||
| 924 | |||
| 925 | /// Find the global value number from the canonical number \p N stored in the |
||
| 926 | /// candidate. |
||
| 927 | /// |
||
| 928 | /// \param N - The canonical number to find the global vlaue number for. |
||
| 929 | /// \returns An optional containing the value, and std::nullopt if it could |
||
| 930 | /// not be found. |
||
| 931 | std::optional<unsigned> fromCanonicalNum(unsigned N) { |
||
| 932 | DenseMap<unsigned, unsigned>::iterator CNIt = CanonNumToNumber.find(N); |
||
| 933 | if (CNIt == CanonNumToNumber.end()) |
||
| 934 | return std::nullopt; |
||
| 935 | return CNIt->second; |
||
| 936 | } |
||
| 937 | |||
| 938 | /// \param RHS -The IRSimilarityCandidate to compare against |
||
| 939 | /// \returns true if the IRSimilarityCandidate is occurs after the |
||
| 940 | /// IRSimilarityCandidate in the program. |
||
| 941 | bool operator<(const IRSimilarityCandidate &RHS) const { |
||
| 942 | return getStartIdx() > RHS.getStartIdx(); |
||
| 943 | } |
||
| 944 | |||
| 945 | using iterator = IRInstructionDataList::iterator; |
||
| 946 | iterator begin() const { return iterator(front()); } |
||
| 947 | iterator end() const { return std::next(iterator(back())); } |
||
| 948 | }; |
||
| 949 | |||
| 950 | typedef DenseMap<IRSimilarityCandidate *, |
||
| 951 | DenseMap<unsigned, DenseSet<unsigned>>> |
||
| 952 | CandidateGVNMapping; |
||
| 953 | typedef std::vector<IRSimilarityCandidate> SimilarityGroup; |
||
| 954 | typedef std::vector<SimilarityGroup> SimilarityGroupList; |
||
| 955 | |||
| 956 | /// This class puts all the pieces of the IRInstructionData, |
||
| 957 | /// IRInstructionMapper, IRSimilarityCandidate together. |
||
| 958 | /// |
||
| 959 | /// It first feeds the Module or vector of Modules into the IRInstructionMapper, |
||
| 960 | /// and puts all the mapped instructions into a single long list of |
||
| 961 | /// IRInstructionData. |
||
| 962 | /// |
||
| 963 | /// The list of unsigned integers is given to the Suffix Tree or similar data |
||
| 964 | /// structure to find repeated subsequences. We construct an |
||
| 965 | /// IRSimilarityCandidate for each instance of the subsequence. We compare them |
||
| 966 | /// against one another since These repeated subsequences can have different |
||
| 967 | /// structure. For each different kind of structure found, we create a |
||
| 968 | /// similarity group. |
||
| 969 | /// |
||
| 970 | /// If we had four IRSimilarityCandidates A, B, C, and D where A, B and D are |
||
| 971 | /// structurally similar to one another, while C is different we would have two |
||
| 972 | /// SimilarityGroups: |
||
| 973 | /// |
||
| 974 | /// SimilarityGroup 1: SimilarityGroup 2 |
||
| 975 | /// A, B, D C |
||
| 976 | /// |
||
| 977 | /// A list of the different similarity groups is then returned after |
||
| 978 | /// analyzing the module. |
||
| 979 | class IRSimilarityIdentifier { |
||
| 980 | public: |
||
| 981 | IRSimilarityIdentifier(bool MatchBranches = true, |
||
| 982 | bool MatchIndirectCalls = true, |
||
| 983 | bool MatchCallsWithName = false, |
||
| 984 | bool MatchIntrinsics = true, |
||
| 985 | bool MatchMustTailCalls = true) |
||
| 986 | : Mapper(&InstDataAllocator, &InstDataListAllocator), |
||
| 987 | EnableBranches(MatchBranches), EnableIndirectCalls(MatchIndirectCalls), |
||
| 988 | EnableMatchingCallsByName(MatchCallsWithName), |
||
| 989 | EnableIntrinsics(MatchIntrinsics), |
||
| 990 | EnableMustTailCalls(MatchMustTailCalls) {} |
||
| 991 | |||
| 992 | private: |
||
| 993 | /// Map the instructions in the module to unsigned integers, using mapping |
||
| 994 | /// already present in the Mapper if possible. |
||
| 995 | /// |
||
| 996 | /// \param [in] M Module - To map to integers. |
||
| 997 | /// \param [in,out] InstrList - The vector to append IRInstructionData to. |
||
| 998 | /// \param [in,out] IntegerMapping - The vector to append integers to. |
||
| 999 | void populateMapper(Module &M, std::vector<IRInstructionData *> &InstrList, |
||
| 1000 | std::vector<unsigned> &IntegerMapping); |
||
| 1001 | |||
| 1002 | /// Map the instructions in the modules vector to unsigned integers, using |
||
| 1003 | /// mapping already present in the mapper if possible. |
||
| 1004 | /// |
||
| 1005 | /// \param [in] Modules - The list of modules to use to populate the mapper |
||
| 1006 | /// \param [in,out] InstrList - The vector to append IRInstructionData to. |
||
| 1007 | /// \param [in,out] IntegerMapping - The vector to append integers to. |
||
| 1008 | void populateMapper(ArrayRef<std::unique_ptr<Module>> &Modules, |
||
| 1009 | std::vector<IRInstructionData *> &InstrList, |
||
| 1010 | std::vector<unsigned> &IntegerMapping); |
||
| 1011 | |||
| 1012 | /// Find the similarity candidates in \p InstrList and corresponding |
||
| 1013 | /// \p UnsignedVec |
||
| 1014 | /// |
||
| 1015 | /// \param [in,out] InstrList - The vector to append IRInstructionData to. |
||
| 1016 | /// \param [in,out] IntegerMapping - The vector to append integers to. |
||
| 1017 | /// candidates found in the program. |
||
| 1018 | void findCandidates(std::vector<IRInstructionData *> &InstrList, |
||
| 1019 | std::vector<unsigned> &IntegerMapping); |
||
| 1020 | |||
| 1021 | public: |
||
| 1022 | // Find the IRSimilarityCandidates in the \p Modules and group by structural |
||
| 1023 | // similarity in a SimilarityGroup, each group is returned in a |
||
| 1024 | // SimilarityGroupList. |
||
| 1025 | // |
||
| 1026 | // \param [in] Modules - the modules to analyze. |
||
| 1027 | // \returns The groups of similarity ranges found in the modules. |
||
| 1028 | SimilarityGroupList & |
||
| 1029 | findSimilarity(ArrayRef<std::unique_ptr<Module>> Modules); |
||
| 1030 | |||
| 1031 | // Find the IRSimilarityCandidates in the given Module grouped by structural |
||
| 1032 | // similarity in a SimilarityGroup, contained inside a SimilarityGroupList. |
||
| 1033 | // |
||
| 1034 | // \param [in] M - the module to analyze. |
||
| 1035 | // \returns The groups of similarity ranges found in the module. |
||
| 1036 | SimilarityGroupList &findSimilarity(Module &M); |
||
| 1037 | |||
| 1038 | // Clears \ref SimilarityCandidates if it is already filled by a previous run. |
||
| 1039 | void resetSimilarityCandidates() { |
||
| 1040 | // If we've already analyzed a Module or set of Modules, so we must clear |
||
| 1041 | // the SimilarityCandidates to make sure we do not have only old values |
||
| 1042 | // hanging around. |
||
| 1043 | if (SimilarityCandidates) |
||
| 1044 | SimilarityCandidates->clear(); |
||
| 1045 | else |
||
| 1046 | SimilarityCandidates = SimilarityGroupList(); |
||
| 1047 | } |
||
| 1048 | |||
| 1049 | // \returns The groups of similarity ranges found in the most recently passed |
||
| 1050 | // set of modules. |
||
| 1051 | std::optional<SimilarityGroupList> &getSimilarity() { |
||
| 1052 | return SimilarityCandidates; |
||
| 1053 | } |
||
| 1054 | |||
| 1055 | private: |
||
| 1056 | /// The allocator for IRInstructionData. |
||
| 1057 | SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator; |
||
| 1058 | |||
| 1059 | /// The allocator for IRInstructionDataLists. |
||
| 1060 | SpecificBumpPtrAllocator<IRInstructionDataList> InstDataListAllocator; |
||
| 1061 | |||
| 1062 | /// Map Instructions to unsigned integers and wraps the Instruction in an |
||
| 1063 | /// instance of IRInstructionData. |
||
| 1064 | IRInstructionMapper Mapper; |
||
| 1065 | |||
| 1066 | /// The flag variable that marks whether we should check branches for |
||
| 1067 | /// similarity, or only look within basic blocks. |
||
| 1068 | bool EnableBranches = true; |
||
| 1069 | |||
| 1070 | /// The flag variable that marks whether we allow indirect calls to be checked |
||
| 1071 | /// for similarity, or exclude them as a legal instruction. |
||
| 1072 | bool EnableIndirectCalls = true; |
||
| 1073 | |||
| 1074 | /// The flag variable that marks whether we allow calls to be marked as |
||
| 1075 | /// similar if they do not have the same name, only the same calling |
||
| 1076 | /// convention, attributes and type signature. |
||
| 1077 | bool EnableMatchingCallsByName = true; |
||
| 1078 | |||
| 1079 | /// The flag variable that marks whether we should check intrinsics for |
||
| 1080 | /// similarity. |
||
| 1081 | bool EnableIntrinsics = true; |
||
| 1082 | |||
| 1083 | // The flag variable that marks whether we should allow tailcalls |
||
| 1084 | // to be checked for similarity. |
||
| 1085 | bool EnableMustTailCalls = false; |
||
| 1086 | |||
| 1087 | /// The SimilarityGroups found with the most recent run of \ref |
||
| 1088 | /// findSimilarity. std::nullopt if there is no recent run. |
||
| 1089 | std::optional<SimilarityGroupList> SimilarityCandidates; |
||
| 1090 | }; |
||
| 1091 | |||
| 1092 | } // end namespace IRSimilarity |
||
| 1093 | |||
| 1094 | /// An analysis pass based on legacy pass manager that runs and returns |
||
| 1095 | /// IRSimilarityIdentifier run on the Module. |
||
| 1096 | class IRSimilarityIdentifierWrapperPass : public ModulePass { |
||
| 1097 | std::unique_ptr<IRSimilarity::IRSimilarityIdentifier> IRSI; |
||
| 1098 | |||
| 1099 | public: |
||
| 1100 | static char ID; |
||
| 1101 | IRSimilarityIdentifierWrapperPass(); |
||
| 1102 | |||
| 1103 | IRSimilarity::IRSimilarityIdentifier &getIRSI() { return *IRSI; } |
||
| 1104 | const IRSimilarity::IRSimilarityIdentifier &getIRSI() const { return *IRSI; } |
||
| 1105 | |||
| 1106 | bool doInitialization(Module &M) override; |
||
| 1107 | bool doFinalization(Module &M) override; |
||
| 1108 | bool runOnModule(Module &M) override; |
||
| 1109 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
||
| 1110 | AU.setPreservesAll(); |
||
| 1111 | } |
||
| 1112 | }; |
||
| 1113 | |||
| 1114 | /// An analysis pass that runs and returns the IRSimilarityIdentifier run on the |
||
| 1115 | /// Module. |
||
| 1116 | class IRSimilarityAnalysis : public AnalysisInfoMixin<IRSimilarityAnalysis> { |
||
| 1117 | public: |
||
| 1118 | typedef IRSimilarity::IRSimilarityIdentifier Result; |
||
| 1119 | |||
| 1120 | Result run(Module &M, ModuleAnalysisManager &); |
||
| 1121 | |||
| 1122 | private: |
||
| 1123 | friend AnalysisInfoMixin<IRSimilarityAnalysis>; |
||
| 1124 | static AnalysisKey Key; |
||
| 1125 | }; |
||
| 1126 | |||
| 1127 | /// Printer pass that uses \c IRSimilarityAnalysis. |
||
| 1128 | class IRSimilarityAnalysisPrinterPass |
||
| 1129 | : public PassInfoMixin<IRSimilarityAnalysisPrinterPass> { |
||
| 1130 | raw_ostream &OS; |
||
| 1131 | |||
| 1132 | public: |
||
| 1133 | explicit IRSimilarityAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} |
||
| 1134 | PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); |
||
| 1135 | }; |
||
| 1136 | |||
| 1137 | } // end namespace llvm |
||
| 1138 | |||
| 1139 | #endif // LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H |