Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
//===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
// Defines the basic structure of the syntax tree. There are two kinds of nodes:
9
//   - leaf nodes correspond to tokens,
10
//   - tree nodes correspond to language grammar constructs.
11
//
12
// The tree is initially built from an AST. Each node of a newly built tree
13
// covers a continuous subrange of expanded tokens (i.e. tokens after
14
// preprocessing), the specific tokens coverered are stored in the leaf nodes of
15
// a tree. A post-order traversal of a tree will visit leaf nodes in an order
16
// corresponding the original order of expanded tokens.
17
//
18
// This is still work in progress and highly experimental, we leave room for
19
// ourselves to completely change the design and/or implementation.
20
//===----------------------------------------------------------------------===//
21
#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H
22
#define LLVM_CLANG_TOOLING_SYNTAX_TREE_H
23
 
24
#include "clang/Basic/TokenKinds.h"
25
#include "clang/Tooling/Syntax/TokenManager.h"
26
#include "llvm/ADT/iterator.h"
27
#include "llvm/Support/Allocator.h"
28
#include <cstdint>
29
#include <vector>
30
 
31
namespace clang {
32
namespace syntax {
33
 
34
/// A memory arena for syntax trees.
35
// FIXME: use BumpPtrAllocator directly.
36
class Arena {
37
public:
38
  llvm::BumpPtrAllocator &getAllocator() { return Allocator; }
39
private:
40
  /// Keeps all the allocated nodes and their intermediate data structures.
41
  llvm::BumpPtrAllocator Allocator;
42
};
43
 
44
class Tree;
45
class TreeBuilder;
46
class FactoryImpl;
47
class MutationsImpl;
48
 
49
enum class NodeKind : uint16_t;
50
enum class NodeRole : uint8_t;
51
 
52
/// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
53
/// a Tree (representing language constructrs).
54
class Node {
55
protected:
56
  /// Newly created nodes are detached from a tree, parent and sibling links are
57
  /// set when the node is added as a child to another one.
58
  Node(NodeKind Kind);
59
  /// Nodes are allocated on Arenas; the destructor is never called.
60
  ~Node() = default;
61
 
62
public:
63
  /// Nodes cannot simply be copied without violating tree invariants.
64
  Node(const Node &) = delete;
65
  Node &operator=(const Node &) = delete;
66
  /// Idiomatically, nodes are allocated on an Arena and never moved.
67
  Node(Node &&) = delete;
68
  Node &operator=(Node &&) = delete;
69
 
70
  NodeKind getKind() const { return static_cast<NodeKind>(Kind); }
71
  NodeRole getRole() const { return static_cast<NodeRole>(Role); }
72
 
73
  /// Whether the node is detached from a tree, i.e. does not have a parent.
74
  bool isDetached() const;
75
  /// Whether the node was created from the AST backed by the source code
76
  /// rather than added later through mutation APIs or created with factory
77
  /// functions.
78
  /// When this flag is true, all subtrees are also original.
79
  /// This flag is set to false on any modifications to the node or any of its
80
  /// subtrees, even if this simply involves swapping existing subtrees.
81
  bool isOriginal() const { return Original; }
82
  /// If this function return false, the tree cannot be modified because there
83
  /// is no reasonable way to produce the corresponding textual replacements.
84
  /// This can happen when the node crosses macro expansion boundaries.
85
  ///
86
  /// Note that even if the node is not modifiable, its child nodes can be
87
  /// modifiable.
88
  bool canModify() const { return CanModify; }
89
 
90
  const Tree *getParent() const { return Parent; }
91
  Tree *getParent() { return Parent; }
92
 
93
  const Node *getNextSibling() const { return NextSibling; }
94
  Node *getNextSibling() { return NextSibling; }
95
  const Node *getPreviousSibling() const { return PreviousSibling; }
96
  Node *getPreviousSibling() { return PreviousSibling; }
97
 
98
  /// Dumps the structure of a subtree. For debugging and testing purposes.
99
  std::string dump(const TokenManager &SM) const;
100
  /// Dumps the tokens forming this subtree.
101
  std::string dumpTokens(const TokenManager &SM) const;
102
 
103
  /// Asserts invariants on this node of the tree and its immediate children.
104
  /// Will not recurse into the subtree. No-op if NDEBUG is set.
105
  void assertInvariants() const;
106
  /// Runs checkInvariants on all nodes in the subtree. No-op if NDEBUG is set.
107
  void assertInvariantsRecursive() const;
108
 
109
private:
110
  // Tree is allowed to change the Parent link and Role.
111
  friend class Tree;
112
  // TreeBuilder is allowed to set the Original and CanModify flags.
113
  friend class TreeBuilder;
114
  // MutationsImpl sets roles and CanModify flag.
115
  friend class MutationsImpl;
116
  // FactoryImpl sets CanModify flag.
117
  friend class FactoryImpl;
118
 
119
  void setRole(NodeRole NR);
120
 
121
  Tree *Parent;
122
  Node *NextSibling;
123
  Node *PreviousSibling;
124
  unsigned Kind : 16;
125
  unsigned Role : 8;
126
  unsigned Original : 1;
127
  unsigned CanModify : 1;
128
};
129
 
130
/// A leaf node points to a single token.
131
// FIXME: add TokenKind field (borrow some bits from the Node::kind).
132
class Leaf final : public Node {
133
public:
134
  Leaf(TokenManager::Key K);
135
  static bool classof(const Node *N);
136
 
137
  TokenManager::Key getTokenKey() const { return K; }
138
 
139
private:
140
  TokenManager::Key K;
141
};
142
 
143
/// A node that has children and represents a syntactic language construct.
144
class Tree : public Node {
145
  /// Iterator over children (common base for const/non-const).
146
  /// Not invalidated by tree mutations (holds a stable node pointer).
147
  template <typename DerivedT, typename NodeT>
148
  class ChildIteratorBase
149
      : public llvm::iterator_facade_base<DerivedT, std::forward_iterator_tag,
150
                                          NodeT> {
151
  protected:
152
    NodeT *N = nullptr;
153
    using Base = ChildIteratorBase;
154
 
155
  public:
156
    ChildIteratorBase() = default;
157
    explicit ChildIteratorBase(NodeT *N) : N(N) {}
158
 
159
    friend bool operator==(const DerivedT &LHS, const DerivedT &RHS) {
160
      return LHS.N == RHS.N;
161
    }
162
 
163
    NodeT &operator*() const { return *N; }
164
    DerivedT &operator++() {
165
      N = N->getNextSibling();
166
      return *static_cast<DerivedT *>(this);
167
    }
168
 
169
    /// Truthy if valid (not past-the-end).
170
    /// This allows: if (auto It = find_if(N.children(), ...) )
171
    explicit operator bool() const { return N != nullptr; }
172
    /// The element, or nullptr if past-the-end.
173
    NodeT *asPointer() const { return N; }
174
  };
175
 
176
public:
177
  static bool classof(const Node *N);
178
 
179
  Node *getFirstChild() { return FirstChild; }
180
  const Node *getFirstChild() const { return FirstChild; }
181
  Node *getLastChild() { return LastChild; }
182
  const Node *getLastChild() const { return LastChild; }
183
 
184
  const Leaf *findFirstLeaf() const;
185
  Leaf *findFirstLeaf() {
186
    return const_cast<Leaf *>(const_cast<const Tree *>(this)->findFirstLeaf());
187
  }
188
 
189
  const Leaf *findLastLeaf() const;
190
  Leaf *findLastLeaf() {
191
    return const_cast<Leaf *>(const_cast<const Tree *>(this)->findLastLeaf());
192
  }
193
 
194
  /// child_iterator is not invalidated by mutations.
195
  struct ChildIterator : ChildIteratorBase<ChildIterator, Node> {
196
    using Base::ChildIteratorBase;
197
  };
198
  struct ConstChildIterator
199
      : ChildIteratorBase<ConstChildIterator, const Node> {
200
    using Base::ChildIteratorBase;
201
    ConstChildIterator() = default;
202
    ConstChildIterator(const ChildIterator &I) : Base(I.asPointer()) {}
203
  };
204
 
205
  llvm::iterator_range<ChildIterator> getChildren() {
206
    return {ChildIterator(getFirstChild()), ChildIterator()};
207
  }
208
  llvm::iterator_range<ConstChildIterator> getChildren() const {
209
    return {ConstChildIterator(getFirstChild()), ConstChildIterator()};
210
  }
211
 
212
  /// Find the first node with a corresponding role.
213
  const Node *findChild(NodeRole R) const;
214
  Node *findChild(NodeRole R) {
215
    return const_cast<Node *>(const_cast<const Tree *>(this)->findChild(R));
216
  }
217
 
218
protected:
219
  using Node::Node;
220
 
221
private:
222
  /// Append \p Child to the list of children and sets the parent pointer.
223
  /// A very low-level operation that does not check any invariants, only used
224
  /// by TreeBuilder and FactoryImpl.
225
  /// EXPECTS: Role != Detached.
226
  void appendChildLowLevel(Node *Child, NodeRole Role);
227
  /// Similar but prepends.
228
  void prependChildLowLevel(Node *Child, NodeRole Role);
229
 
230
  /// Like the previous overloads, but does not set role for \p Child.
231
  /// EXPECTS: Child->Role != Detached
232
  void appendChildLowLevel(Node *Child);
233
  void prependChildLowLevel(Node *Child);
234
  friend class TreeBuilder;
235
  friend class FactoryImpl;
236
 
237
  /// Replace a range of children [Begin, End) with a list of
238
  /// new nodes starting at \p New.
239
  /// Only used by MutationsImpl to implement higher-level mutation operations.
240
  /// (!) \p New can be null to model removal of the child range.
241
  /// (!) \p End can be null to model one past the end.
242
  /// (!) \p Begin can be null to model an append.
243
  void replaceChildRangeLowLevel(Node *Begin, Node *End, Node *New);
244
  friend class MutationsImpl;
245
 
246
  Node *FirstChild = nullptr;
247
  Node *LastChild = nullptr;
248
};
249
 
250
/// A list of Elements separated or terminated by a fixed token.
251
///
252
/// This type models the following grammar construct:
253
/// delimited-list(element, delimiter, termination, canBeEmpty)
254
class List : public Tree {
255
public:
256
  template <typename Element> struct ElementAndDelimiter {
257
    Element *element;
258
    Leaf *delimiter;
259
  };
260
 
261
  enum class TerminationKind {
262
    Terminated,
263
    MaybeTerminated,
264
    Separated,
265
  };
266
 
267
  using Tree::Tree;
268
  static bool classof(const Node *N);
269
  /// Returns the elements and corresponding delimiters. Missing elements
270
  /// and delimiters are represented as null pointers.
271
  ///
272
  /// For example, in a separated list:
273
  /// "a, b, c"  <=> [("a" , ","), ("b" , "," ), ("c" , null)]
274
  /// "a,  , c"  <=> [("a" , ","), (null, "," ), ("c" , null)]
275
  /// "a, b  c"  <=> [("a" , ","), ("b" , null), ("c" , null)]
276
  /// "a, b,"    <=> [("a" , ","), ("b" , "," ), (null, null)]
277
  ///
278
  /// In a terminated or maybe-terminated list:
279
  /// "a; b; c;" <=> [("a" , ";"), ("b" , ";" ), ("c" , ";" )]
280
  /// "a;  ; c;" <=> [("a" , ";"), (null, ";" ), ("c" , ";" )]
281
  /// "a; b  c;" <=> [("a" , ";"), ("b" , null), ("c" , ";" )]
282
  /// "a; b; c"  <=> [("a" , ";"), ("b" , ";" ), ("c" , null)]
283
  std::vector<ElementAndDelimiter<Node>> getElementsAsNodesAndDelimiters();
284
 
285
  /// Returns the elements of the list. Missing elements are represented
286
  /// as null pointers in the same way as in the return value of
287
  /// `getElementsAsNodesAndDelimiters()`.
288
  std::vector<Node *> getElementsAsNodes();
289
 
290
  // These can't be implemented with the information we have!
291
 
292
  /// Returns the appropriate delimiter for this list.
293
  ///
294
  /// Useful for discovering the correct delimiter to use when adding
295
  /// elements to empty or one-element lists.
296
  clang::tok::TokenKind getDelimiterTokenKind() const;
297
 
298
  TerminationKind getTerminationKind() const;
299
 
300
  /// Whether this list can be empty in syntactically and semantically correct
301
  /// code.
302
  ///
303
  /// This list may be empty when the source code has errors even if
304
  /// canBeEmpty() returns false.
305
  bool canBeEmpty() const;
306
};
307
 
308
} // namespace syntax
309
} // namespace clang
310
 
311
#endif