Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. //===- Tree.h - structure of the syntax tree ------------------*- C++ -*-=====//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. // Defines the basic structure of the syntax tree. There are two kinds of nodes:
  9. //   - leaf nodes correspond to tokens,
  10. //   - tree nodes correspond to language grammar constructs.
  11. //
  12. // The tree is initially built from an AST. Each node of a newly built tree
  13. // covers a continuous subrange of expanded tokens (i.e. tokens after
  14. // preprocessing), the specific tokens coverered are stored in the leaf nodes of
  15. // a tree. A post-order traversal of a tree will visit leaf nodes in an order
  16. // corresponding the original order of expanded tokens.
  17. //
  18. // This is still work in progress and highly experimental, we leave room for
  19. // ourselves to completely change the design and/or implementation.
  20. //===----------------------------------------------------------------------===//
  21. #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H
  22. #define LLVM_CLANG_TOOLING_SYNTAX_TREE_H
  23.  
  24. #include "clang/Basic/TokenKinds.h"
  25. #include "clang/Tooling/Syntax/TokenManager.h"
  26. #include "llvm/ADT/iterator.h"
  27. #include "llvm/Support/Allocator.h"
  28. #include <cstdint>
  29. #include <vector>
  30.  
  31. namespace clang {
  32. namespace syntax {
  33.  
  34. /// A memory arena for syntax trees.
  35. // FIXME: use BumpPtrAllocator directly.
  36. class Arena {
  37. public:
  38.   llvm::BumpPtrAllocator &getAllocator() { return Allocator; }
  39. private:
  40.   /// Keeps all the allocated nodes and their intermediate data structures.
  41.   llvm::BumpPtrAllocator Allocator;
  42. };
  43.  
  44. class Tree;
  45. class TreeBuilder;
  46. class FactoryImpl;
  47. class MutationsImpl;
  48.  
  49. enum class NodeKind : uint16_t;
  50. enum class NodeRole : uint8_t;
  51.  
  52. /// A node in a syntax tree. Each node is either a Leaf (representing tokens) or
  53. /// a Tree (representing language constructrs).
  54. class Node {
  55. protected:
  56.   /// Newly created nodes are detached from a tree, parent and sibling links are
  57.   /// set when the node is added as a child to another one.
  58.   Node(NodeKind Kind);
  59.   /// Nodes are allocated on Arenas; the destructor is never called.
  60.   ~Node() = default;
  61.  
  62. public:
  63.   /// Nodes cannot simply be copied without violating tree invariants.
  64.   Node(const Node &) = delete;
  65.   Node &operator=(const Node &) = delete;
  66.   /// Idiomatically, nodes are allocated on an Arena and never moved.
  67.   Node(Node &&) = delete;
  68.   Node &operator=(Node &&) = delete;
  69.  
  70.   NodeKind getKind() const { return static_cast<NodeKind>(Kind); }
  71.   NodeRole getRole() const { return static_cast<NodeRole>(Role); }
  72.  
  73.   /// Whether the node is detached from a tree, i.e. does not have a parent.
  74.   bool isDetached() const;
  75.   /// Whether the node was created from the AST backed by the source code
  76.   /// rather than added later through mutation APIs or created with factory
  77.   /// functions.
  78.   /// When this flag is true, all subtrees are also original.
  79.   /// This flag is set to false on any modifications to the node or any of its
  80.   /// subtrees, even if this simply involves swapping existing subtrees.
  81.   bool isOriginal() const { return Original; }
  82.   /// If this function return false, the tree cannot be modified because there
  83.   /// is no reasonable way to produce the corresponding textual replacements.
  84.   /// This can happen when the node crosses macro expansion boundaries.
  85.   ///
  86.   /// Note that even if the node is not modifiable, its child nodes can be
  87.   /// modifiable.
  88.   bool canModify() const { return CanModify; }
  89.  
  90.   const Tree *getParent() const { return Parent; }
  91.   Tree *getParent() { return Parent; }
  92.  
  93.   const Node *getNextSibling() const { return NextSibling; }
  94.   Node *getNextSibling() { return NextSibling; }
  95.   const Node *getPreviousSibling() const { return PreviousSibling; }
  96.   Node *getPreviousSibling() { return PreviousSibling; }
  97.  
  98.   /// Dumps the structure of a subtree. For debugging and testing purposes.
  99.   std::string dump(const TokenManager &SM) const;
  100.   /// Dumps the tokens forming this subtree.
  101.   std::string dumpTokens(const TokenManager &SM) const;
  102.  
  103.   /// Asserts invariants on this node of the tree and its immediate children.
  104.   /// Will not recurse into the subtree. No-op if NDEBUG is set.
  105.   void assertInvariants() const;
  106.   /// Runs checkInvariants on all nodes in the subtree. No-op if NDEBUG is set.
  107.   void assertInvariantsRecursive() const;
  108.  
  109. private:
  110.   // Tree is allowed to change the Parent link and Role.
  111.   friend class Tree;
  112.   // TreeBuilder is allowed to set the Original and CanModify flags.
  113.   friend class TreeBuilder;
  114.   // MutationsImpl sets roles and CanModify flag.
  115.   friend class MutationsImpl;
  116.   // FactoryImpl sets CanModify flag.
  117.   friend class FactoryImpl;
  118.  
  119.   void setRole(NodeRole NR);
  120.  
  121.   Tree *Parent;
  122.   Node *NextSibling;
  123.   Node *PreviousSibling;
  124.   unsigned Kind : 16;
  125.   unsigned Role : 8;
  126.   unsigned Original : 1;
  127.   unsigned CanModify : 1;
  128. };
  129.  
  130. /// A leaf node points to a single token.
  131. // FIXME: add TokenKind field (borrow some bits from the Node::kind).
  132. class Leaf final : public Node {
  133. public:
  134.   Leaf(TokenManager::Key K);
  135.   static bool classof(const Node *N);
  136.  
  137.   TokenManager::Key getTokenKey() const { return K; }
  138.  
  139. private:
  140.   TokenManager::Key K;
  141. };
  142.  
  143. /// A node that has children and represents a syntactic language construct.
  144. class Tree : public Node {
  145.   /// Iterator over children (common base for const/non-const).
  146.   /// Not invalidated by tree mutations (holds a stable node pointer).
  147.   template <typename DerivedT, typename NodeT>
  148.   class ChildIteratorBase
  149.       : public llvm::iterator_facade_base<DerivedT, std::forward_iterator_tag,
  150.                                           NodeT> {
  151.   protected:
  152.     NodeT *N = nullptr;
  153.     using Base = ChildIteratorBase;
  154.  
  155.   public:
  156.     ChildIteratorBase() = default;
  157.     explicit ChildIteratorBase(NodeT *N) : N(N) {}
  158.  
  159.     friend bool operator==(const DerivedT &LHS, const DerivedT &RHS) {
  160.       return LHS.N == RHS.N;
  161.     }
  162.  
  163.     NodeT &operator*() const { return *N; }
  164.     DerivedT &operator++() {
  165.       N = N->getNextSibling();
  166.       return *static_cast<DerivedT *>(this);
  167.     }
  168.  
  169.     /// Truthy if valid (not past-the-end).
  170.     /// This allows: if (auto It = find_if(N.children(), ...) )
  171.     explicit operator bool() const { return N != nullptr; }
  172.     /// The element, or nullptr if past-the-end.
  173.     NodeT *asPointer() const { return N; }
  174.   };
  175.  
  176. public:
  177.   static bool classof(const Node *N);
  178.  
  179.   Node *getFirstChild() { return FirstChild; }
  180.   const Node *getFirstChild() const { return FirstChild; }
  181.   Node *getLastChild() { return LastChild; }
  182.   const Node *getLastChild() const { return LastChild; }
  183.  
  184.   const Leaf *findFirstLeaf() const;
  185.   Leaf *findFirstLeaf() {
  186.     return const_cast<Leaf *>(const_cast<const Tree *>(this)->findFirstLeaf());
  187.   }
  188.  
  189.   const Leaf *findLastLeaf() const;
  190.   Leaf *findLastLeaf() {
  191.     return const_cast<Leaf *>(const_cast<const Tree *>(this)->findLastLeaf());
  192.   }
  193.  
  194.   /// child_iterator is not invalidated by mutations.
  195.   struct ChildIterator : ChildIteratorBase<ChildIterator, Node> {
  196.     using Base::ChildIteratorBase;
  197.   };
  198.   struct ConstChildIterator
  199.       : ChildIteratorBase<ConstChildIterator, const Node> {
  200.     using Base::ChildIteratorBase;
  201.     ConstChildIterator() = default;
  202.     ConstChildIterator(const ChildIterator &I) : Base(I.asPointer()) {}
  203.   };
  204.  
  205.   llvm::iterator_range<ChildIterator> getChildren() {
  206.     return {ChildIterator(getFirstChild()), ChildIterator()};
  207.   }
  208.   llvm::iterator_range<ConstChildIterator> getChildren() const {
  209.     return {ConstChildIterator(getFirstChild()), ConstChildIterator()};
  210.   }
  211.  
  212.   /// Find the first node with a corresponding role.
  213.   const Node *findChild(NodeRole R) const;
  214.   Node *findChild(NodeRole R) {
  215.     return const_cast<Node *>(const_cast<const Tree *>(this)->findChild(R));
  216.   }
  217.  
  218. protected:
  219.   using Node::Node;
  220.  
  221. private:
  222.   /// Append \p Child to the list of children and sets the parent pointer.
  223.   /// A very low-level operation that does not check any invariants, only used
  224.   /// by TreeBuilder and FactoryImpl.
  225.   /// EXPECTS: Role != Detached.
  226.   void appendChildLowLevel(Node *Child, NodeRole Role);
  227.   /// Similar but prepends.
  228.   void prependChildLowLevel(Node *Child, NodeRole Role);
  229.  
  230.   /// Like the previous overloads, but does not set role for \p Child.
  231.   /// EXPECTS: Child->Role != Detached
  232.   void appendChildLowLevel(Node *Child);
  233.   void prependChildLowLevel(Node *Child);
  234.   friend class TreeBuilder;
  235.   friend class FactoryImpl;
  236.  
  237.   /// Replace a range of children [Begin, End) with a list of
  238.   /// new nodes starting at \p New.
  239.   /// Only used by MutationsImpl to implement higher-level mutation operations.
  240.   /// (!) \p New can be null to model removal of the child range.
  241.   /// (!) \p End can be null to model one past the end.
  242.   /// (!) \p Begin can be null to model an append.
  243.   void replaceChildRangeLowLevel(Node *Begin, Node *End, Node *New);
  244.   friend class MutationsImpl;
  245.  
  246.   Node *FirstChild = nullptr;
  247.   Node *LastChild = nullptr;
  248. };
  249.  
  250. /// A list of Elements separated or terminated by a fixed token.
  251. ///
  252. /// This type models the following grammar construct:
  253. /// delimited-list(element, delimiter, termination, canBeEmpty)
  254. class List : public Tree {
  255. public:
  256.   template <typename Element> struct ElementAndDelimiter {
  257.     Element *element;
  258.     Leaf *delimiter;
  259.   };
  260.  
  261.   enum class TerminationKind {
  262.     Terminated,
  263.     MaybeTerminated,
  264.     Separated,
  265.   };
  266.  
  267.   using Tree::Tree;
  268.   static bool classof(const Node *N);
  269.   /// Returns the elements and corresponding delimiters. Missing elements
  270.   /// and delimiters are represented as null pointers.
  271.   ///
  272.   /// For example, in a separated list:
  273.   /// "a, b, c"  <=> [("a" , ","), ("b" , "," ), ("c" , null)]
  274.   /// "a,  , c"  <=> [("a" , ","), (null, "," ), ("c" , null)]
  275.   /// "a, b  c"  <=> [("a" , ","), ("b" , null), ("c" , null)]
  276.   /// "a, b,"    <=> [("a" , ","), ("b" , "," ), (null, null)]
  277.   ///
  278.   /// In a terminated or maybe-terminated list:
  279.   /// "a; b; c;" <=> [("a" , ";"), ("b" , ";" ), ("c" , ";" )]
  280.   /// "a;  ; c;" <=> [("a" , ";"), (null, ";" ), ("c" , ";" )]
  281.   /// "a; b  c;" <=> [("a" , ";"), ("b" , null), ("c" , ";" )]
  282.   /// "a; b; c"  <=> [("a" , ";"), ("b" , ";" ), ("c" , null)]
  283.   std::vector<ElementAndDelimiter<Node>> getElementsAsNodesAndDelimiters();
  284.  
  285.   /// Returns the elements of the list. Missing elements are represented
  286.   /// as null pointers in the same way as in the return value of
  287.   /// `getElementsAsNodesAndDelimiters()`.
  288.   std::vector<Node *> getElementsAsNodes();
  289.  
  290.   // These can't be implemented with the information we have!
  291.  
  292.   /// Returns the appropriate delimiter for this list.
  293.   ///
  294.   /// Useful for discovering the correct delimiter to use when adding
  295.   /// elements to empty or one-element lists.
  296.   clang::tok::TokenKind getDelimiterTokenKind() const;
  297.  
  298.   TerminationKind getTerminationKind() const;
  299.  
  300.   /// Whether this list can be empty in syntactically and semantically correct
  301.   /// code.
  302.   ///
  303.   /// This list may be empty when the source code has errors even if
  304.   /// canBeEmpty() returns false.
  305.   bool canBeEmpty() const;
  306. };
  307.  
  308. } // namespace syntax
  309. } // namespace clang
  310.  
  311. #endif
  312.