Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. //===--- CloneDetection.h - Finds code clones in an AST ---------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file defines classes for searching and analyzing source code clones.
  11. ///
  12. //===----------------------------------------------------------------------===//
  13.  
  14. #ifndef LLVM_CLANG_ANALYSIS_CLONEDETECTION_H
  15. #define LLVM_CLANG_ANALYSIS_CLONEDETECTION_H
  16.  
  17. #include "clang/AST/StmtVisitor.h"
  18. #include "llvm/Support/Regex.h"
  19. #include <vector>
  20.  
  21. namespace clang {
  22.  
  23. class Stmt;
  24. class Decl;
  25. class VarDecl;
  26. class ASTContext;
  27. class CompoundStmt;
  28.  
  29. /// Identifies a list of statements.
  30. ///
  31. /// Can either identify a single arbitrary Stmt object, a continuous sequence of
  32. /// child statements inside a CompoundStmt or no statements at all.
  33. class StmtSequence {
  34.   /// If this object identifies a sequence of statements inside a CompoundStmt,
  35.   /// S points to this CompoundStmt. If this object only identifies a single
  36.   /// Stmt, then S is a pointer to this Stmt.
  37.   const Stmt *S;
  38.  
  39.   /// The declaration that contains the statements.
  40.   const Decl *D;
  41.  
  42.   /// If EndIndex is non-zero, then S is a CompoundStmt and this StmtSequence
  43.   /// instance is representing the CompoundStmt children inside the array
  44.   /// [StartIndex, EndIndex).
  45.   unsigned StartIndex;
  46.   unsigned EndIndex;
  47.  
  48. public:
  49.   /// Constructs a StmtSequence holding multiple statements.
  50.   ///
  51.   /// The resulting StmtSequence identifies a continuous sequence of statements
  52.   /// in the body of the given CompoundStmt. Which statements of the body should
  53.   /// be identified needs to be specified by providing a start and end index
  54.   /// that describe a non-empty sub-array in the body of the given CompoundStmt.
  55.   ///
  56.   /// \param Stmt A CompoundStmt that contains all statements in its body.
  57.   /// \param D The Decl containing this Stmt.
  58.   /// \param StartIndex The inclusive start index in the children array of
  59.   ///                   \p Stmt
  60.   /// \param EndIndex The exclusive end index in the children array of \p Stmt.
  61.   StmtSequence(const CompoundStmt *Stmt, const Decl *D, unsigned StartIndex,
  62.                unsigned EndIndex);
  63.  
  64.   /// Constructs a StmtSequence holding a single statement.
  65.   ///
  66.   /// \param Stmt An arbitrary Stmt.
  67.   /// \param D The Decl containing this Stmt.
  68.   StmtSequence(const Stmt *Stmt, const Decl *D);
  69.  
  70.   /// Constructs an empty StmtSequence.
  71.   StmtSequence();
  72.  
  73.   typedef const Stmt *const *iterator;
  74.  
  75.   /// Returns an iterator pointing to the first statement in this sequence.
  76.   iterator begin() const;
  77.  
  78.   /// Returns an iterator pointing behind the last statement in this sequence.
  79.   iterator end() const;
  80.  
  81.   /// Returns the first statement in this sequence.
  82.   ///
  83.   /// This method should only be called on a non-empty StmtSequence object.
  84.   const Stmt *front() const {
  85.     assert(!empty());
  86.     return begin()[0];
  87.   }
  88.  
  89.   /// Returns the last statement in this sequence.
  90.   ///
  91.   /// This method should only be called on a non-empty StmtSequence object.
  92.   const Stmt *back() const {
  93.     assert(!empty());
  94.     return begin()[size() - 1];
  95.   }
  96.  
  97.   /// Returns the number of statements this object holds.
  98.   unsigned size() const {
  99.     if (holdsSequence())
  100.       return EndIndex - StartIndex;
  101.     if (S == nullptr)
  102.       return 0;
  103.     return 1;
  104.   }
  105.  
  106.   /// Returns true if and only if this StmtSequence contains no statements.
  107.   bool empty() const { return size() == 0; }
  108.  
  109.   /// Returns the related ASTContext for the stored Stmts.
  110.   ASTContext &getASTContext() const;
  111.  
  112.   /// Returns the declaration that contains the stored Stmts.
  113.   const Decl *getContainingDecl() const {
  114.     assert(D);
  115.     return D;
  116.   }
  117.  
  118.   /// Returns true if this objects holds a list of statements.
  119.   bool holdsSequence() const { return EndIndex != 0; }
  120.  
  121.   /// Returns the start sourcelocation of the first statement in this sequence.
  122.   ///
  123.   /// This method should only be called on a non-empty StmtSequence object.
  124.   SourceLocation getBeginLoc() const;
  125.  
  126.   /// Returns the end sourcelocation of the last statement in this sequence.
  127.   ///
  128.   /// This method should only be called on a non-empty StmtSequence object.
  129.   SourceLocation getEndLoc() const;
  130.  
  131.   /// Returns the source range of the whole sequence - from the beginning
  132.   /// of the first statement to the end of the last statement.
  133.   SourceRange getSourceRange() const;
  134.  
  135.   bool operator==(const StmtSequence &Other) const {
  136.     return std::tie(S, StartIndex, EndIndex) ==
  137.            std::tie(Other.S, Other.StartIndex, Other.EndIndex);
  138.   }
  139.  
  140.   bool operator!=(const StmtSequence &Other) const {
  141.     return std::tie(S, StartIndex, EndIndex) !=
  142.            std::tie(Other.S, Other.StartIndex, Other.EndIndex);
  143.   }
  144.  
  145.   /// Returns true if and only if this sequence covers a source range that
  146.   /// contains the source range of the given sequence \p Other.
  147.   ///
  148.   /// This method should only be called on a non-empty StmtSequence object
  149.   /// and passed a non-empty StmtSequence object.
  150.   bool contains(const StmtSequence &Other) const;
  151. };
  152.  
  153. /// Searches for similar subtrees in the AST.
  154. ///
  155. /// First, this class needs several declarations with statement bodies which
  156. /// can be passed via analyzeCodeBody. Afterwards all statements can be
  157. /// searched for clones by calling findClones with a given list of constraints
  158. /// that should specify the wanted properties of the clones.
  159. ///
  160. /// The result of findClones can be further constrained with the constrainClones
  161. /// method.
  162. ///
  163. /// This class only searches for clones in executable source code
  164. /// (e.g. function bodies). Other clones (e.g. cloned comments or declarations)
  165. /// are not supported.
  166. class CloneDetector {
  167.  
  168. public:
  169.   /// A collection of StmtSequences that share an arbitrary property.
  170.   typedef llvm::SmallVector<StmtSequence, 8> CloneGroup;
  171.  
  172.   /// Generates and stores search data for all statements in the body of
  173.   /// the given Decl.
  174.   void analyzeCodeBody(const Decl *D);
  175.  
  176.   /// Constrains the given list of clone groups with the given constraint.
  177.   ///
  178.   /// The constraint is expected to have a method with the signature
  179.   ///     `void constrain(std::vector<CloneDetector::CloneGroup> &Sequences)`
  180.   /// as this is the interface that the CloneDetector uses for applying the
  181.   /// constraint. The constraint is supposed to directly modify the passed list
  182.   /// so that all clones in the list fulfill the specific property this
  183.   /// constraint ensures.
  184.   template <typename T>
  185.   static void constrainClones(std::vector<CloneGroup> &CloneGroups, T C) {
  186.     C.constrain(CloneGroups);
  187.   }
  188.  
  189.   /// Constrains the given list of clone groups with the given list of
  190.   /// constraints.
  191.   ///
  192.   /// The constraints are applied in sequence in the order in which they are
  193.   /// passed to this function.
  194.   template <typename T1, typename... Ts>
  195.   static void constrainClones(std::vector<CloneGroup> &CloneGroups, T1 C,
  196.                               Ts... ConstraintList) {
  197.     constrainClones(CloneGroups, C);
  198.     constrainClones(CloneGroups, ConstraintList...);
  199.   }
  200.  
  201.   /// Searches for clones in all previously passed statements.
  202.   /// \param Result Output parameter to which all created clone groups are
  203.   ///               added.
  204.   /// \param ConstraintList The constraints that should be applied to the
  205.   //         result.
  206.   template <typename... Ts>
  207.   void findClones(std::vector<CloneGroup> &Result, Ts... ConstraintList) {
  208.     // The initial assumption is that there is only one clone group and every
  209.     // statement is a clone of the others. This clone group will then be
  210.     // split up with the help of the constraints.
  211.     Result.push_back(Sequences);
  212.  
  213.     constrainClones(Result, ConstraintList...);
  214.   }
  215.  
  216. private:
  217.   CloneGroup Sequences;
  218. };
  219.  
  220. /// This class is a utility class that contains utility functions for building
  221. /// custom constraints.
  222. class CloneConstraint {
  223. public:
  224.   /// Removes all groups by using a filter function.
  225.   /// \param CloneGroups The list of CloneGroups that is supposed to be
  226.   ///                    filtered.
  227.   /// \param Filter The filter function that should return true for all groups
  228.   ///               that should be removed from the list.
  229.   static void filterGroups(
  230.       std::vector<CloneDetector::CloneGroup> &CloneGroups,
  231.       llvm::function_ref<bool(const CloneDetector::CloneGroup &)> Filter) {
  232.     llvm::erase_if(CloneGroups, Filter);
  233.   }
  234.  
  235.   /// Splits the given CloneGroups until the given Compare function returns true
  236.   /// for all clones in a single group.
  237.   /// \param CloneGroups A list of CloneGroups that should be modified.
  238.   /// \param Compare The comparison function that all clones are supposed to
  239.   ///                pass. Should return true if and only if two clones belong
  240.   ///                to the same CloneGroup.
  241.   static void splitCloneGroups(
  242.       std::vector<CloneDetector::CloneGroup> &CloneGroups,
  243.       llvm::function_ref<bool(const StmtSequence &, const StmtSequence &)>
  244.           Compare);
  245. };
  246.  
  247. /// This constraint moves clones into clone groups of type II via hashing.
  248. ///
  249. /// Clones with different hash values are moved into separate clone groups.
  250. /// Collisions are possible, and this constraint does nothing to address this
  251. /// them. Add the slower RecursiveCloneTypeIIVerifyConstraint later in the
  252. /// constraint chain, not necessarily immediately, to eliminate hash collisions
  253. /// through a more detailed analysis.
  254. class RecursiveCloneTypeIIHashConstraint {
  255. public:
  256.   void constrain(std::vector<CloneDetector::CloneGroup> &Sequences);
  257. };
  258.  
  259. /// This constraint moves clones into clone groups of type II by comparing them.
  260. ///
  261. /// Clones that aren't type II clones are moved into separate clone groups.
  262. /// In contrast to the RecursiveCloneTypeIIHashConstraint, all clones in a clone
  263. /// group are guaranteed to be type II clones of each other, but it is too
  264. /// slow to efficiently handle large amounts of clones.
  265. class RecursiveCloneTypeIIVerifyConstraint {
  266. public:
  267.   void constrain(std::vector<CloneDetector::CloneGroup> &Sequences);
  268. };
  269.  
  270. /// Ensures that every clone has at least the given complexity.
  271. ///
  272. /// Complexity is here defined as the total amount of children of a statement.
  273. /// This constraint assumes the first statement in the group is representative
  274. /// for all other statements in the group in terms of complexity.
  275. class MinComplexityConstraint {
  276.   unsigned MinComplexity;
  277.  
  278. public:
  279.   MinComplexityConstraint(unsigned MinComplexity)
  280.       : MinComplexity(MinComplexity) {}
  281.  
  282.   /// Calculates the complexity of the given StmtSequence.
  283.   /// \param Limit The limit of complexity we probe for. After reaching
  284.   ///              this limit during calculation, this method is exiting
  285.   ///              early to improve performance and returns this limit.
  286.   size_t calculateStmtComplexity(const StmtSequence &Seq, std::size_t Limit,
  287.                                  const std::string &ParentMacroStack = "");
  288.  
  289.   void constrain(std::vector<CloneDetector::CloneGroup> &CloneGroups) {
  290.     CloneConstraint::filterGroups(
  291.         CloneGroups, [this](const CloneDetector::CloneGroup &A) {
  292.           if (!A.empty())
  293.             return calculateStmtComplexity(A.front(), MinComplexity) <
  294.                    MinComplexity;
  295.           else
  296.             return false;
  297.         });
  298.   }
  299. };
  300.  
  301. /// Ensures that all clone groups contain at least the given amount of clones.
  302. class MinGroupSizeConstraint {
  303.   unsigned MinGroupSize;
  304.  
  305. public:
  306.   MinGroupSizeConstraint(unsigned MinGroupSize = 2)
  307.       : MinGroupSize(MinGroupSize) {}
  308.  
  309.   void constrain(std::vector<CloneDetector::CloneGroup> &CloneGroups) {
  310.     CloneConstraint::filterGroups(CloneGroups,
  311.                                   [this](const CloneDetector::CloneGroup &A) {
  312.                                     return A.size() < MinGroupSize;
  313.                                   });
  314.   }
  315. };
  316.  
  317. /// Ensures that no clone group fully contains another clone group.
  318. struct OnlyLargestCloneConstraint {
  319.   void constrain(std::vector<CloneDetector::CloneGroup> &Result);
  320. };
  321.  
  322. struct FilenamePatternConstraint {
  323.   StringRef IgnoredFilesPattern;
  324.   std::shared_ptr<llvm::Regex> IgnoredFilesRegex;
  325.  
  326.   FilenamePatternConstraint(StringRef IgnoredFilesPattern)
  327.       : IgnoredFilesPattern(IgnoredFilesPattern) {
  328.     IgnoredFilesRegex = std::make_shared<llvm::Regex>("^(" +
  329.         IgnoredFilesPattern.str() + "$)");
  330.   }
  331.  
  332.   bool isAutoGenerated(const CloneDetector::CloneGroup &Group);
  333.  
  334.   void constrain(std::vector<CloneDetector::CloneGroup> &CloneGroups) {
  335.     CloneConstraint::filterGroups(
  336.         CloneGroups, [this](const CloneDetector::CloneGroup &Group) {
  337.           return isAutoGenerated(Group);
  338.         });
  339.   }
  340. };
  341.  
  342. /// Analyzes the pattern of the referenced variables in a statement.
  343. class VariablePattern {
  344.  
  345.   /// Describes an occurrence of a variable reference in a statement.
  346.   struct VariableOccurence {
  347.     /// The index of the associated VarDecl in the Variables vector.
  348.     size_t KindID;
  349.     /// The statement in the code where the variable was referenced.
  350.     const Stmt *Mention;
  351.  
  352.     VariableOccurence(size_t KindID, const Stmt *Mention)
  353.         : KindID(KindID), Mention(Mention) {}
  354.   };
  355.  
  356.   /// All occurrences of referenced variables in the order of appearance.
  357.   std::vector<VariableOccurence> Occurences;
  358.   /// List of referenced variables in the order of appearance.
  359.   /// Every item in this list is unique.
  360.   std::vector<const VarDecl *> Variables;
  361.  
  362.   /// Adds a new variable referenced to this pattern.
  363.   /// \param VarDecl The declaration of the variable that is referenced.
  364.   /// \param Mention The SourceRange where this variable is referenced.
  365.   void addVariableOccurence(const VarDecl *VarDecl, const Stmt *Mention);
  366.  
  367.   /// Adds each referenced variable from the given statement.
  368.   void addVariables(const Stmt *S);
  369.  
  370. public:
  371.   /// Creates an VariablePattern object with information about the given
  372.   /// StmtSequence.
  373.   VariablePattern(const StmtSequence &Sequence) {
  374.     for (const Stmt *S : Sequence)
  375.       addVariables(S);
  376.   }
  377.  
  378.   /// Describes two clones that reference their variables in a different pattern
  379.   /// which could indicate a programming error.
  380.   struct SuspiciousClonePair {
  381.     /// Utility class holding the relevant information about a single
  382.     /// clone in this pair.
  383.     struct SuspiciousCloneInfo {
  384.       /// The variable which referencing in this clone was against the pattern.
  385.       const VarDecl *Variable;
  386.       /// Where the variable was referenced.
  387.       const Stmt *Mention;
  388.       /// The variable that should have been referenced to follow the pattern.
  389.       /// If Suggestion is a nullptr then it's not possible to fix the pattern
  390.       /// by referencing a different variable in this clone.
  391.       const VarDecl *Suggestion;
  392.       SuspiciousCloneInfo(const VarDecl *Variable, const Stmt *Mention,
  393.                           const VarDecl *Suggestion)
  394.           : Variable(Variable), Mention(Mention), Suggestion(Suggestion) {}
  395.       SuspiciousCloneInfo() {}
  396.     };
  397.     /// The first clone in the pair which always has a suggested variable.
  398.     SuspiciousCloneInfo FirstCloneInfo;
  399.     /// This other clone in the pair which can have a suggested variable.
  400.     SuspiciousCloneInfo SecondCloneInfo;
  401.   };
  402.  
  403.   /// Counts the differences between this pattern and the given one.
  404.   /// \param Other The given VariablePattern to compare with.
  405.   /// \param FirstMismatch Output parameter that will be filled with information
  406.   ///        about the first difference between the two patterns. This parameter
  407.   ///        can be a nullptr, in which case it will be ignored.
  408.   /// \return Returns the number of differences between the pattern this object
  409.   ///         is following and the given VariablePattern.
  410.   ///
  411.   /// For example, the following statements all have the same pattern and this
  412.   /// function would return zero:
  413.   ///
  414.   ///   if (a < b) return a; return b;
  415.   ///   if (x < y) return x; return y;
  416.   ///   if (u2 < u1) return u2; return u1;
  417.   ///
  418.   /// But the following statement has a different pattern (note the changed
  419.   /// variables in the return statements) and would have two differences when
  420.   /// compared with one of the statements above.
  421.   ///
  422.   ///   if (a < b) return b; return a;
  423.   ///
  424.   /// This function should only be called if the related statements of the given
  425.   /// pattern and the statements of this objects are clones of each other.
  426.   unsigned countPatternDifferences(
  427.       const VariablePattern &Other,
  428.       VariablePattern::SuspiciousClonePair *FirstMismatch = nullptr);
  429. };
  430.  
  431. /// Ensures that all clones reference variables in the same pattern.
  432. struct MatchingVariablePatternConstraint {
  433.   void constrain(std::vector<CloneDetector::CloneGroup> &CloneGroups);
  434. };
  435.  
  436. } // end namespace clang
  437.  
  438. #endif // LLVM_CLANG_ANALYSIS_CLONEDETECTION_H
  439.