Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
//===--- CloneDetection.h - Finds code clones in an AST ---------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This file defines classes for searching and analyzing source code clones.
11
///
12
//===----------------------------------------------------------------------===//
13
 
14
#ifndef LLVM_CLANG_ANALYSIS_CLONEDETECTION_H
15
#define LLVM_CLANG_ANALYSIS_CLONEDETECTION_H
16
 
17
#include "clang/AST/StmtVisitor.h"
18
#include "llvm/Support/Regex.h"
19
#include <vector>
20
 
21
namespace clang {
22
 
23
class Stmt;
24
class Decl;
25
class VarDecl;
26
class ASTContext;
27
class CompoundStmt;
28
 
29
/// Identifies a list of statements.
30
///
31
/// Can either identify a single arbitrary Stmt object, a continuous sequence of
32
/// child statements inside a CompoundStmt or no statements at all.
33
class StmtSequence {
34
  /// If this object identifies a sequence of statements inside a CompoundStmt,
35
  /// S points to this CompoundStmt. If this object only identifies a single
36
  /// Stmt, then S is a pointer to this Stmt.
37
  const Stmt *S;
38
 
39
  /// The declaration that contains the statements.
40
  const Decl *D;
41
 
42
  /// If EndIndex is non-zero, then S is a CompoundStmt and this StmtSequence
43
  /// instance is representing the CompoundStmt children inside the array
44
  /// [StartIndex, EndIndex).
45
  unsigned StartIndex;
46
  unsigned EndIndex;
47
 
48
public:
49
  /// Constructs a StmtSequence holding multiple statements.
50
  ///
51
  /// The resulting StmtSequence identifies a continuous sequence of statements
52
  /// in the body of the given CompoundStmt. Which statements of the body should
53
  /// be identified needs to be specified by providing a start and end index
54
  /// that describe a non-empty sub-array in the body of the given CompoundStmt.
55
  ///
56
  /// \param Stmt A CompoundStmt that contains all statements in its body.
57
  /// \param D The Decl containing this Stmt.
58
  /// \param StartIndex The inclusive start index in the children array of
59
  ///                   \p Stmt
60
  /// \param EndIndex The exclusive end index in the children array of \p Stmt.
61
  StmtSequence(const CompoundStmt *Stmt, const Decl *D, unsigned StartIndex,
62
               unsigned EndIndex);
63
 
64
  /// Constructs a StmtSequence holding a single statement.
65
  ///
66
  /// \param Stmt An arbitrary Stmt.
67
  /// \param D The Decl containing this Stmt.
68
  StmtSequence(const Stmt *Stmt, const Decl *D);
69
 
70
  /// Constructs an empty StmtSequence.
71
  StmtSequence();
72
 
73
  typedef const Stmt *const *iterator;
74
 
75
  /// Returns an iterator pointing to the first statement in this sequence.
76
  iterator begin() const;
77
 
78
  /// Returns an iterator pointing behind the last statement in this sequence.
79
  iterator end() const;
80
 
81
  /// Returns the first statement in this sequence.
82
  ///
83
  /// This method should only be called on a non-empty StmtSequence object.
84
  const Stmt *front() const {
85
    assert(!empty());
86
    return begin()[0];
87
  }
88
 
89
  /// Returns the last statement in this sequence.
90
  ///
91
  /// This method should only be called on a non-empty StmtSequence object.
92
  const Stmt *back() const {
93
    assert(!empty());
94
    return begin()[size() - 1];
95
  }
96
 
97
  /// Returns the number of statements this object holds.
98
  unsigned size() const {
99
    if (holdsSequence())
100
      return EndIndex - StartIndex;
101
    if (S == nullptr)
102
      return 0;
103
    return 1;
104
  }
105
 
106
  /// Returns true if and only if this StmtSequence contains no statements.
107
  bool empty() const { return size() == 0; }
108
 
109
  /// Returns the related ASTContext for the stored Stmts.
110
  ASTContext &getASTContext() const;
111
 
112
  /// Returns the declaration that contains the stored Stmts.
113
  const Decl *getContainingDecl() const {
114
    assert(D);
115
    return D;
116
  }
117
 
118
  /// Returns true if this objects holds a list of statements.
119
  bool holdsSequence() const { return EndIndex != 0; }
120
 
121
  /// Returns the start sourcelocation of the first statement in this sequence.
122
  ///
123
  /// This method should only be called on a non-empty StmtSequence object.
124
  SourceLocation getBeginLoc() const;
125
 
126
  /// Returns the end sourcelocation of the last statement in this sequence.
127
  ///
128
  /// This method should only be called on a non-empty StmtSequence object.
129
  SourceLocation getEndLoc() const;
130
 
131
  /// Returns the source range of the whole sequence - from the beginning
132
  /// of the first statement to the end of the last statement.
133
  SourceRange getSourceRange() const;
134
 
135
  bool operator==(const StmtSequence &Other) const {
136
    return std::tie(S, StartIndex, EndIndex) ==
137
           std::tie(Other.S, Other.StartIndex, Other.EndIndex);
138
  }
139
 
140
  bool operator!=(const StmtSequence &Other) const {
141
    return std::tie(S, StartIndex, EndIndex) !=
142
           std::tie(Other.S, Other.StartIndex, Other.EndIndex);
143
  }
144
 
145
  /// Returns true if and only if this sequence covers a source range that
146
  /// contains the source range of the given sequence \p Other.
147
  ///
148
  /// This method should only be called on a non-empty StmtSequence object
149
  /// and passed a non-empty StmtSequence object.
150
  bool contains(const StmtSequence &Other) const;
151
};
152
 
153
/// Searches for similar subtrees in the AST.
154
///
155
/// First, this class needs several declarations with statement bodies which
156
/// can be passed via analyzeCodeBody. Afterwards all statements can be
157
/// searched for clones by calling findClones with a given list of constraints
158
/// that should specify the wanted properties of the clones.
159
///
160
/// The result of findClones can be further constrained with the constrainClones
161
/// method.
162
///
163
/// This class only searches for clones in executable source code
164
/// (e.g. function bodies). Other clones (e.g. cloned comments or declarations)
165
/// are not supported.
166
class CloneDetector {
167
 
168
public:
169
  /// A collection of StmtSequences that share an arbitrary property.
170
  typedef llvm::SmallVector<StmtSequence, 8> CloneGroup;
171
 
172
  /// Generates and stores search data for all statements in the body of
173
  /// the given Decl.
174
  void analyzeCodeBody(const Decl *D);
175
 
176
  /// Constrains the given list of clone groups with the given constraint.
177
  ///
178
  /// The constraint is expected to have a method with the signature
179
  ///     `void constrain(std::vector<CloneDetector::CloneGroup> &Sequences)`
180
  /// as this is the interface that the CloneDetector uses for applying the
181
  /// constraint. The constraint is supposed to directly modify the passed list
182
  /// so that all clones in the list fulfill the specific property this
183
  /// constraint ensures.
184
  template <typename T>
185
  static void constrainClones(std::vector<CloneGroup> &CloneGroups, T C) {
186
    C.constrain(CloneGroups);
187
  }
188
 
189
  /// Constrains the given list of clone groups with the given list of
190
  /// constraints.
191
  ///
192
  /// The constraints are applied in sequence in the order in which they are
193
  /// passed to this function.
194
  template <typename T1, typename... Ts>
195
  static void constrainClones(std::vector<CloneGroup> &CloneGroups, T1 C,
196
                              Ts... ConstraintList) {
197
    constrainClones(CloneGroups, C);
198
    constrainClones(CloneGroups, ConstraintList...);
199
  }
200
 
201
  /// Searches for clones in all previously passed statements.
202
  /// \param Result Output parameter to which all created clone groups are
203
  ///               added.
204
  /// \param ConstraintList The constraints that should be applied to the
205
  //         result.
206
  template <typename... Ts>
207
  void findClones(std::vector<CloneGroup> &Result, Ts... ConstraintList) {
208
    // The initial assumption is that there is only one clone group and every
209
    // statement is a clone of the others. This clone group will then be
210
    // split up with the help of the constraints.
211
    Result.push_back(Sequences);
212
 
213
    constrainClones(Result, ConstraintList...);
214
  }
215
 
216
private:
217
  CloneGroup Sequences;
218
};
219
 
220
/// This class is a utility class that contains utility functions for building
221
/// custom constraints.
222
class CloneConstraint {
223
public:
224
  /// Removes all groups by using a filter function.
225
  /// \param CloneGroups The list of CloneGroups that is supposed to be
226
  ///                    filtered.
227
  /// \param Filter The filter function that should return true for all groups
228
  ///               that should be removed from the list.
229
  static void filterGroups(
230
      std::vector<CloneDetector::CloneGroup> &CloneGroups,
231
      llvm::function_ref<bool(const CloneDetector::CloneGroup &)> Filter) {
232
    llvm::erase_if(CloneGroups, Filter);
233
  }
234
 
235
  /// Splits the given CloneGroups until the given Compare function returns true
236
  /// for all clones in a single group.
237
  /// \param CloneGroups A list of CloneGroups that should be modified.
238
  /// \param Compare The comparison function that all clones are supposed to
239
  ///                pass. Should return true if and only if two clones belong
240
  ///                to the same CloneGroup.
241
  static void splitCloneGroups(
242
      std::vector<CloneDetector::CloneGroup> &CloneGroups,
243
      llvm::function_ref<bool(const StmtSequence &, const StmtSequence &)>
244
          Compare);
245
};
246
 
247
/// This constraint moves clones into clone groups of type II via hashing.
248
///
249
/// Clones with different hash values are moved into separate clone groups.
250
/// Collisions are possible, and this constraint does nothing to address this
251
/// them. Add the slower RecursiveCloneTypeIIVerifyConstraint later in the
252
/// constraint chain, not necessarily immediately, to eliminate hash collisions
253
/// through a more detailed analysis.
254
class RecursiveCloneTypeIIHashConstraint {
255
public:
256
  void constrain(std::vector<CloneDetector::CloneGroup> &Sequences);
257
};
258
 
259
/// This constraint moves clones into clone groups of type II by comparing them.
260
///
261
/// Clones that aren't type II clones are moved into separate clone groups.
262
/// In contrast to the RecursiveCloneTypeIIHashConstraint, all clones in a clone
263
/// group are guaranteed to be type II clones of each other, but it is too
264
/// slow to efficiently handle large amounts of clones.
265
class RecursiveCloneTypeIIVerifyConstraint {
266
public:
267
  void constrain(std::vector<CloneDetector::CloneGroup> &Sequences);
268
};
269
 
270
/// Ensures that every clone has at least the given complexity.
271
///
272
/// Complexity is here defined as the total amount of children of a statement.
273
/// This constraint assumes the first statement in the group is representative
274
/// for all other statements in the group in terms of complexity.
275
class MinComplexityConstraint {
276
  unsigned MinComplexity;
277
 
278
public:
279
  MinComplexityConstraint(unsigned MinComplexity)
280
      : MinComplexity(MinComplexity) {}
281
 
282
  /// Calculates the complexity of the given StmtSequence.
283
  /// \param Limit The limit of complexity we probe for. After reaching
284
  ///              this limit during calculation, this method is exiting
285
  ///              early to improve performance and returns this limit.
286
  size_t calculateStmtComplexity(const StmtSequence &Seq, std::size_t Limit,
287
                                 const std::string &ParentMacroStack = "");
288
 
289
  void constrain(std::vector<CloneDetector::CloneGroup> &CloneGroups) {
290
    CloneConstraint::filterGroups(
291
        CloneGroups, [this](const CloneDetector::CloneGroup &A) {
292
          if (!A.empty())
293
            return calculateStmtComplexity(A.front(), MinComplexity) <
294
                   MinComplexity;
295
          else
296
            return false;
297
        });
298
  }
299
};
300
 
301
/// Ensures that all clone groups contain at least the given amount of clones.
302
class MinGroupSizeConstraint {
303
  unsigned MinGroupSize;
304
 
305
public:
306
  MinGroupSizeConstraint(unsigned MinGroupSize = 2)
307
      : MinGroupSize(MinGroupSize) {}
308
 
309
  void constrain(std::vector<CloneDetector::CloneGroup> &CloneGroups) {
310
    CloneConstraint::filterGroups(CloneGroups,
311
                                  [this](const CloneDetector::CloneGroup &A) {
312
                                    return A.size() < MinGroupSize;
313
                                  });
314
  }
315
};
316
 
317
/// Ensures that no clone group fully contains another clone group.
318
struct OnlyLargestCloneConstraint {
319
  void constrain(std::vector<CloneDetector::CloneGroup> &Result);
320
};
321
 
322
struct FilenamePatternConstraint {
323
  StringRef IgnoredFilesPattern;
324
  std::shared_ptr<llvm::Regex> IgnoredFilesRegex;
325
 
326
  FilenamePatternConstraint(StringRef IgnoredFilesPattern)
327
      : IgnoredFilesPattern(IgnoredFilesPattern) {
328
    IgnoredFilesRegex = std::make_shared<llvm::Regex>("^(" +
329
        IgnoredFilesPattern.str() + "$)");
330
  }
331
 
332
  bool isAutoGenerated(const CloneDetector::CloneGroup &Group);
333
 
334
  void constrain(std::vector<CloneDetector::CloneGroup> &CloneGroups) {
335
    CloneConstraint::filterGroups(
336
        CloneGroups, [this](const CloneDetector::CloneGroup &Group) {
337
          return isAutoGenerated(Group);
338
        });
339
  }
340
};
341
 
342
/// Analyzes the pattern of the referenced variables in a statement.
343
class VariablePattern {
344
 
345
  /// Describes an occurrence of a variable reference in a statement.
346
  struct VariableOccurence {
347
    /// The index of the associated VarDecl in the Variables vector.
348
    size_t KindID;
349
    /// The statement in the code where the variable was referenced.
350
    const Stmt *Mention;
351
 
352
    VariableOccurence(size_t KindID, const Stmt *Mention)
353
        : KindID(KindID), Mention(Mention) {}
354
  };
355
 
356
  /// All occurrences of referenced variables in the order of appearance.
357
  std::vector<VariableOccurence> Occurences;
358
  /// List of referenced variables in the order of appearance.
359
  /// Every item in this list is unique.
360
  std::vector<const VarDecl *> Variables;
361
 
362
  /// Adds a new variable referenced to this pattern.
363
  /// \param VarDecl The declaration of the variable that is referenced.
364
  /// \param Mention The SourceRange where this variable is referenced.
365
  void addVariableOccurence(const VarDecl *VarDecl, const Stmt *Mention);
366
 
367
  /// Adds each referenced variable from the given statement.
368
  void addVariables(const Stmt *S);
369
 
370
public:
371
  /// Creates an VariablePattern object with information about the given
372
  /// StmtSequence.
373
  VariablePattern(const StmtSequence &Sequence) {
374
    for (const Stmt *S : Sequence)
375
      addVariables(S);
376
  }
377
 
378
  /// Describes two clones that reference their variables in a different pattern
379
  /// which could indicate a programming error.
380
  struct SuspiciousClonePair {
381
    /// Utility class holding the relevant information about a single
382
    /// clone in this pair.
383
    struct SuspiciousCloneInfo {
384
      /// The variable which referencing in this clone was against the pattern.
385
      const VarDecl *Variable;
386
      /// Where the variable was referenced.
387
      const Stmt *Mention;
388
      /// The variable that should have been referenced to follow the pattern.
389
      /// If Suggestion is a nullptr then it's not possible to fix the pattern
390
      /// by referencing a different variable in this clone.
391
      const VarDecl *Suggestion;
392
      SuspiciousCloneInfo(const VarDecl *Variable, const Stmt *Mention,
393
                          const VarDecl *Suggestion)
394
          : Variable(Variable), Mention(Mention), Suggestion(Suggestion) {}
395
      SuspiciousCloneInfo() {}
396
    };
397
    /// The first clone in the pair which always has a suggested variable.
398
    SuspiciousCloneInfo FirstCloneInfo;
399
    /// This other clone in the pair which can have a suggested variable.
400
    SuspiciousCloneInfo SecondCloneInfo;
401
  };
402
 
403
  /// Counts the differences between this pattern and the given one.
404
  /// \param Other The given VariablePattern to compare with.
405
  /// \param FirstMismatch Output parameter that will be filled with information
406
  ///        about the first difference between the two patterns. This parameter
407
  ///        can be a nullptr, in which case it will be ignored.
408
  /// \return Returns the number of differences between the pattern this object
409
  ///         is following and the given VariablePattern.
410
  ///
411
  /// For example, the following statements all have the same pattern and this
412
  /// function would return zero:
413
  ///
414
  ///   if (a < b) return a; return b;
415
  ///   if (x < y) return x; return y;
416
  ///   if (u2 < u1) return u2; return u1;
417
  ///
418
  /// But the following statement has a different pattern (note the changed
419
  /// variables in the return statements) and would have two differences when
420
  /// compared with one of the statements above.
421
  ///
422
  ///   if (a < b) return b; return a;
423
  ///
424
  /// This function should only be called if the related statements of the given
425
  /// pattern and the statements of this objects are clones of each other.
426
  unsigned countPatternDifferences(
427
      const VariablePattern &Other,
428
      VariablePattern::SuspiciousClonePair *FirstMismatch = nullptr);
429
};
430
 
431
/// Ensures that all clones reference variables in the same pattern.
432
struct MatchingVariablePatternConstraint {
433
  void constrain(std::vector<CloneDetector::CloneGroup> &CloneGroups);
434
};
435
 
436
} // end namespace clang
437
 
438
#endif // LLVM_CLANG_ANALYSIS_CLONEDETECTION_H