Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. //===- YAMLParser.h - Simple YAML parser ------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. //  This is a YAML 1.2 parser.
  10. //
  11. //  See http://www.yaml.org/spec/1.2/spec.html for the full standard.
  12. //
  13. //  This currently does not implement the following:
  14. //    * Tag resolution.
  15. //    * UTF-16.
  16. //    * BOMs anywhere other than the first Unicode scalar value in the file.
  17. //
  18. //  The most important class here is Stream. This represents a YAML stream with
  19. //  0, 1, or many documents.
  20. //
  21. //  SourceMgr sm;
  22. //  StringRef input = getInput();
  23. //  yaml::Stream stream(input, sm);
  24. //
  25. //  for (yaml::document_iterator di = stream.begin(), de = stream.end();
  26. //       di != de; ++di) {
  27. //    yaml::Node *n = di->getRoot();
  28. //    if (n) {
  29. //      // Do something with n...
  30. //    } else
  31. //      break;
  32. //  }
  33. //
  34. //===----------------------------------------------------------------------===//
  35.  
  36. #ifndef LLVM_SUPPORT_YAMLPARSER_H
  37. #define LLVM_SUPPORT_YAMLPARSER_H
  38.  
  39. #include "llvm/ADT/StringRef.h"
  40. #include "llvm/Support/Allocator.h"
  41. #include "llvm/Support/SMLoc.h"
  42. #include "llvm/Support/SourceMgr.h"
  43. #include <cassert>
  44. #include <cstddef>
  45. #include <iterator>
  46. #include <map>
  47. #include <memory>
  48. #include <optional>
  49. #include <string>
  50. #include <system_error>
  51.  
  52. namespace llvm {
  53.  
  54. class MemoryBufferRef;
  55. class raw_ostream;
  56. class Twine;
  57.  
  58. namespace yaml {
  59.  
  60. class Document;
  61. class document_iterator;
  62. class Node;
  63. class Scanner;
  64. struct Token;
  65.  
  66. /// Dump all the tokens in this stream to OS.
  67. /// \returns true if there was an error, false otherwise.
  68. bool dumpTokens(StringRef Input, raw_ostream &);
  69.  
  70. /// Scans all tokens in input without outputting anything. This is used
  71. ///        for benchmarking the tokenizer.
  72. /// \returns true if there was an error, false otherwise.
  73. bool scanTokens(StringRef Input);
  74.  
  75. /// Escape \a Input for a double quoted scalar; if \p EscapePrintable
  76. /// is true, all UTF8 sequences will be escaped, if \p EscapePrintable is
  77. /// false, those UTF8 sequences encoding printable unicode scalars will not be
  78. /// escaped, but emitted verbatim.
  79. std::string escape(StringRef Input, bool EscapePrintable = true);
  80.  
  81. /// Parse \p S as a bool according to https://yaml.org/type/bool.html.
  82. std::optional<bool> parseBool(StringRef S);
  83.  
  84. /// This class represents a YAML stream potentially containing multiple
  85. ///        documents.
  86. class Stream {
  87. public:
  88.   /// This keeps a reference to the string referenced by \p Input.
  89.   Stream(StringRef Input, SourceMgr &, bool ShowColors = true,
  90.          std::error_code *EC = nullptr);
  91.  
  92.   Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true,
  93.          std::error_code *EC = nullptr);
  94.   ~Stream();
  95.  
  96.   document_iterator begin();
  97.   document_iterator end();
  98.   void skip();
  99.   bool failed();
  100.  
  101.   bool validate() {
  102.     skip();
  103.     return !failed();
  104.   }
  105.  
  106.   void printError(Node *N, const Twine &Msg,
  107.                   SourceMgr::DiagKind Kind = SourceMgr::DK_Error);
  108.   void printError(const SMRange &Range, const Twine &Msg,
  109.                   SourceMgr::DiagKind Kind = SourceMgr::DK_Error);
  110.  
  111. private:
  112.   friend class Document;
  113.  
  114.   std::unique_ptr<Scanner> scanner;
  115.   std::unique_ptr<Document> CurrentDoc;
  116. };
  117.  
  118. /// Abstract base class for all Nodes.
  119. class Node {
  120.   virtual void anchor();
  121.  
  122. public:
  123.   enum NodeKind {
  124.     NK_Null,
  125.     NK_Scalar,
  126.     NK_BlockScalar,
  127.     NK_KeyValue,
  128.     NK_Mapping,
  129.     NK_Sequence,
  130.     NK_Alias
  131.   };
  132.  
  133.   Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor,
  134.        StringRef Tag);
  135.  
  136.   // It's not safe to copy YAML nodes; the document is streamed and the position
  137.   // is part of the state.
  138.   Node(const Node &) = delete;
  139.   void operator=(const Node &) = delete;
  140.  
  141.   void *operator new(size_t Size, BumpPtrAllocator &Alloc,
  142.                      size_t Alignment = 16) noexcept {
  143.     return Alloc.Allocate(Size, Alignment);
  144.   }
  145.  
  146.   void operator delete(void *Ptr, BumpPtrAllocator &Alloc,
  147.                        size_t Size) noexcept {
  148.     Alloc.Deallocate(Ptr, Size, 0);
  149.   }
  150.  
  151.   void operator delete(void *) noexcept = delete;
  152.  
  153.   /// Get the value of the anchor attached to this node. If it does not
  154.   ///        have one, getAnchor().size() will be 0.
  155.   StringRef getAnchor() const { return Anchor; }
  156.  
  157.   /// Get the tag as it was written in the document. This does not
  158.   ///   perform tag resolution.
  159.   StringRef getRawTag() const { return Tag; }
  160.  
  161.   /// Get the verbatium tag for a given Node. This performs tag resoluton
  162.   ///   and substitution.
  163.   std::string getVerbatimTag() const;
  164.  
  165.   SMRange getSourceRange() const { return SourceRange; }
  166.   void setSourceRange(SMRange SR) { SourceRange = SR; }
  167.  
  168.   // These functions forward to Document and Scanner.
  169.   Token &peekNext();
  170.   Token getNext();
  171.   Node *parseBlockNode();
  172.   BumpPtrAllocator &getAllocator();
  173.   void setError(const Twine &Message, Token &Location) const;
  174.   bool failed() const;
  175.  
  176.   virtual void skip() {}
  177.  
  178.   unsigned int getType() const { return TypeID; }
  179.  
  180. protected:
  181.   std::unique_ptr<Document> &Doc;
  182.   SMRange SourceRange;
  183.  
  184.   ~Node() = default;
  185.  
  186. private:
  187.   unsigned int TypeID;
  188.   StringRef Anchor;
  189.   /// The tag as typed in the document.
  190.   StringRef Tag;
  191. };
  192.  
  193. /// A null value.
  194. ///
  195. /// Example:
  196. ///   !!null null
  197. class NullNode final : public Node {
  198.   void anchor() override;
  199.  
  200. public:
  201.   NullNode(std::unique_ptr<Document> &D)
  202.       : Node(NK_Null, D, StringRef(), StringRef()) {}
  203.  
  204.   static bool classof(const Node *N) { return N->getType() == NK_Null; }
  205. };
  206.  
  207. /// A scalar node is an opaque datum that can be presented as a
  208. ///        series of zero or more Unicode scalar values.
  209. ///
  210. /// Example:
  211. ///   Adena
  212. class ScalarNode final : public Node {
  213.   void anchor() override;
  214.  
  215. public:
  216.   ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
  217.              StringRef Val)
  218.       : Node(NK_Scalar, D, Anchor, Tag), Value(Val) {
  219.     SMLoc Start = SMLoc::getFromPointer(Val.begin());
  220.     SMLoc End = SMLoc::getFromPointer(Val.end());
  221.     SourceRange = SMRange(Start, End);
  222.   }
  223.  
  224.   // Return Value without any escaping or folding or other fun YAML stuff. This
  225.   // is the exact bytes that are contained in the file (after conversion to
  226.   // utf8).
  227.   StringRef getRawValue() const { return Value; }
  228.  
  229.   /// Gets the value of this node as a StringRef.
  230.   ///
  231.   /// \param Storage is used to store the content of the returned StringRef if
  232.   ///        it requires any modification from how it appeared in the source.
  233.   ///        This happens with escaped characters and multi-line literals.
  234.   StringRef getValue(SmallVectorImpl<char> &Storage) const;
  235.  
  236.   static bool classof(const Node *N) {
  237.     return N->getType() == NK_Scalar;
  238.   }
  239.  
  240. private:
  241.   StringRef Value;
  242.  
  243.   StringRef unescapeDoubleQuoted(StringRef UnquotedValue,
  244.                                  StringRef::size_type Start,
  245.                                  SmallVectorImpl<char> &Storage) const;
  246. };
  247.  
  248. /// A block scalar node is an opaque datum that can be presented as a
  249. ///        series of zero or more Unicode scalar values.
  250. ///
  251. /// Example:
  252. ///   |
  253. ///     Hello
  254. ///     World
  255. class BlockScalarNode final : public Node {
  256.   void anchor() override;
  257.  
  258. public:
  259.   BlockScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
  260.                   StringRef Value, StringRef RawVal)
  261.       : Node(NK_BlockScalar, D, Anchor, Tag), Value(Value) {
  262.     SMLoc Start = SMLoc::getFromPointer(RawVal.begin());
  263.     SMLoc End = SMLoc::getFromPointer(RawVal.end());
  264.     SourceRange = SMRange(Start, End);
  265.   }
  266.  
  267.   /// Gets the value of this node as a StringRef.
  268.   StringRef getValue() const { return Value; }
  269.  
  270.   static bool classof(const Node *N) {
  271.     return N->getType() == NK_BlockScalar;
  272.   }
  273.  
  274. private:
  275.   StringRef Value;
  276. };
  277.  
  278. /// A key and value pair. While not technically a Node under the YAML
  279. ///        representation graph, it is easier to treat them this way.
  280. ///
  281. /// TODO: Consider making this not a child of Node.
  282. ///
  283. /// Example:
  284. ///   Section: .text
  285. class KeyValueNode final : public Node {
  286.   void anchor() override;
  287.  
  288. public:
  289.   KeyValueNode(std::unique_ptr<Document> &D)
  290.       : Node(NK_KeyValue, D, StringRef(), StringRef()) {}
  291.  
  292.   /// Parse and return the key.
  293.   ///
  294.   /// This may be called multiple times.
  295.   ///
  296.   /// \returns The key, or nullptr if failed() == true.
  297.   Node *getKey();
  298.  
  299.   /// Parse and return the value.
  300.   ///
  301.   /// This may be called multiple times.
  302.   ///
  303.   /// \returns The value, or nullptr if failed() == true.
  304.   Node *getValue();
  305.  
  306.   void skip() override {
  307.     if (Node *Key = getKey()) {
  308.       Key->skip();
  309.       if (Node *Val = getValue())
  310.         Val->skip();
  311.     }
  312.   }
  313.  
  314.   static bool classof(const Node *N) {
  315.     return N->getType() == NK_KeyValue;
  316.   }
  317.  
  318. private:
  319.   Node *Key = nullptr;
  320.   Node *Value = nullptr;
  321. };
  322.  
  323. /// This is an iterator abstraction over YAML collections shared by both
  324. ///        sequences and maps.
  325. ///
  326. /// BaseT must have a ValueT* member named CurrentEntry and a member function
  327. /// increment() which must set CurrentEntry to 0 to create an end iterator.
  328. template <class BaseT, class ValueT> class basic_collection_iterator {
  329. public:
  330.   using iterator_category = std::input_iterator_tag;
  331.   using value_type = ValueT;
  332.   using difference_type = std::ptrdiff_t;
  333.   using pointer = value_type *;
  334.   using reference = value_type &;
  335.  
  336.   basic_collection_iterator() = default;
  337.   basic_collection_iterator(BaseT *B) : Base(B) {}
  338.  
  339.   ValueT *operator->() const {
  340.     assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
  341.     return Base->CurrentEntry;
  342.   }
  343.  
  344.   ValueT &operator*() const {
  345.     assert(Base && Base->CurrentEntry &&
  346.            "Attempted to dereference end iterator!");
  347.     return *Base->CurrentEntry;
  348.   }
  349.  
  350.   operator ValueT *() const {
  351.     assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
  352.     return Base->CurrentEntry;
  353.   }
  354.  
  355.   /// Note on EqualityComparable:
  356.   ///
  357.   /// The iterator is not re-entrant,
  358.   /// it is meant to be used for parsing YAML on-demand
  359.   /// Once iteration started - it can point only to one entry at a time
  360.   /// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal
  361.   /// iff Base and Other.Base are equal.
  362.   bool operator==(const basic_collection_iterator &Other) const {
  363.     if (Base && (Base == Other.Base)) {
  364.       assert((Base->CurrentEntry == Other.Base->CurrentEntry)
  365.              && "Equal Bases expected to point to equal Entries");
  366.     }
  367.  
  368.     return Base == Other.Base;
  369.   }
  370.  
  371.   bool operator!=(const basic_collection_iterator &Other) const {
  372.     return !(Base == Other.Base);
  373.   }
  374.  
  375.   basic_collection_iterator &operator++() {
  376.     assert(Base && "Attempted to advance iterator past end!");
  377.     Base->increment();
  378.     // Create an end iterator.
  379.     if (!Base->CurrentEntry)
  380.       Base = nullptr;
  381.     return *this;
  382.   }
  383.  
  384. private:
  385.   BaseT *Base = nullptr;
  386. };
  387.  
  388. // The following two templates are used for both MappingNode and Sequence Node.
  389. template <class CollectionType>
  390. typename CollectionType::iterator begin(CollectionType &C) {
  391.   assert(C.IsAtBeginning && "You may only iterate over a collection once!");
  392.   C.IsAtBeginning = false;
  393.   typename CollectionType::iterator ret(&C);
  394.   ++ret;
  395.   return ret;
  396. }
  397.  
  398. template <class CollectionType> void skip(CollectionType &C) {
  399.   // TODO: support skipping from the middle of a parsed collection ;/
  400.   assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
  401.   if (C.IsAtBeginning)
  402.     for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e;
  403.          ++i)
  404.       i->skip();
  405. }
  406.  
  407. /// Represents a YAML map created from either a block map for a flow map.
  408. ///
  409. /// This parses the YAML stream as increment() is called.
  410. ///
  411. /// Example:
  412. ///   Name: _main
  413. ///   Scope: Global
  414. class MappingNode final : public Node {
  415.   void anchor() override;
  416.  
  417. public:
  418.   enum MappingType {
  419.     MT_Block,
  420.     MT_Flow,
  421.     MT_Inline ///< An inline mapping node is used for "[key: value]".
  422.   };
  423.  
  424.   MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
  425.               MappingType MT)
  426.       : Node(NK_Mapping, D, Anchor, Tag), Type(MT) {}
  427.  
  428.   friend class basic_collection_iterator<MappingNode, KeyValueNode>;
  429.  
  430.   using iterator = basic_collection_iterator<MappingNode, KeyValueNode>;
  431.  
  432.   template <class T> friend typename T::iterator yaml::begin(T &);
  433.   template <class T> friend void yaml::skip(T &);
  434.  
  435.   iterator begin() { return yaml::begin(*this); }
  436.  
  437.   iterator end() { return iterator(); }
  438.  
  439.   void skip() override { yaml::skip(*this); }
  440.  
  441.   static bool classof(const Node *N) {
  442.     return N->getType() == NK_Mapping;
  443.   }
  444.  
  445. private:
  446.   MappingType Type;
  447.   bool IsAtBeginning = true;
  448.   bool IsAtEnd = false;
  449.   KeyValueNode *CurrentEntry = nullptr;
  450.  
  451.   void increment();
  452. };
  453.  
  454. /// Represents a YAML sequence created from either a block sequence for a
  455. ///        flow sequence.
  456. ///
  457. /// This parses the YAML stream as increment() is called.
  458. ///
  459. /// Example:
  460. ///   - Hello
  461. ///   - World
  462. class SequenceNode final : public Node {
  463.   void anchor() override;
  464.  
  465. public:
  466.   enum SequenceType {
  467.     ST_Block,
  468.     ST_Flow,
  469.     // Use for:
  470.     //
  471.     // key:
  472.     // - val1
  473.     // - val2
  474.     //
  475.     // As a BlockMappingEntry and BlockEnd are not created in this case.
  476.     ST_Indentless
  477.   };
  478.  
  479.   SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
  480.                SequenceType ST)
  481.       : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST) {}
  482.  
  483.   friend class basic_collection_iterator<SequenceNode, Node>;
  484.  
  485.   using iterator = basic_collection_iterator<SequenceNode, Node>;
  486.  
  487.   template <class T> friend typename T::iterator yaml::begin(T &);
  488.   template <class T> friend void yaml::skip(T &);
  489.  
  490.   void increment();
  491.  
  492.   iterator begin() { return yaml::begin(*this); }
  493.  
  494.   iterator end() { return iterator(); }
  495.  
  496.   void skip() override { yaml::skip(*this); }
  497.  
  498.   static bool classof(const Node *N) {
  499.     return N->getType() == NK_Sequence;
  500.   }
  501.  
  502. private:
  503.   SequenceType SeqType;
  504.   bool IsAtBeginning = true;
  505.   bool IsAtEnd = false;
  506.   bool WasPreviousTokenFlowEntry = true; // Start with an imaginary ','.
  507.   Node *CurrentEntry = nullptr;
  508. };
  509.  
  510. /// Represents an alias to a Node with an anchor.
  511. ///
  512. /// Example:
  513. ///   *AnchorName
  514. class AliasNode final : public Node {
  515.   void anchor() override;
  516.  
  517. public:
  518.   AliasNode(std::unique_ptr<Document> &D, StringRef Val)
  519.       : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {}
  520.  
  521.   StringRef getName() const { return Name; }
  522.  
  523.   static bool classof(const Node *N) { return N->getType() == NK_Alias; }
  524.  
  525. private:
  526.   StringRef Name;
  527. };
  528.  
  529. /// A YAML Stream is a sequence of Documents. A document contains a root
  530. ///        node.
  531. class Document {
  532. public:
  533.   Document(Stream &ParentStream);
  534.  
  535.   /// Root for parsing a node. Returns a single node.
  536.   Node *parseBlockNode();
  537.  
  538.   /// Finish parsing the current document and return true if there are
  539.   ///        more. Return false otherwise.
  540.   bool skip();
  541.  
  542.   /// Parse and return the root level node.
  543.   Node *getRoot() {
  544.     if (Root)
  545.       return Root;
  546.     return Root = parseBlockNode();
  547.   }
  548.  
  549.   const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; }
  550.  
  551. private:
  552.   friend class Node;
  553.   friend class document_iterator;
  554.  
  555.   /// Stream to read tokens from.
  556.   Stream &stream;
  557.  
  558.   /// Used to allocate nodes to. All are destroyed without calling their
  559.   ///        destructor when the document is destroyed.
  560.   BumpPtrAllocator NodeAllocator;
  561.  
  562.   /// The root node. Used to support skipping a partially parsed
  563.   ///        document.
  564.   Node *Root;
  565.  
  566.   /// Maps tag prefixes to their expansion.
  567.   std::map<StringRef, StringRef> TagMap;
  568.  
  569.   Token &peekNext();
  570.   Token getNext();
  571.   void setError(const Twine &Message, Token &Location) const;
  572.   bool failed() const;
  573.  
  574.   /// Parse %BLAH directives and return true if any were encountered.
  575.   bool parseDirectives();
  576.  
  577.   /// Parse %YAML
  578.   void parseYAMLDirective();
  579.  
  580.   /// Parse %TAG
  581.   void parseTAGDirective();
  582.  
  583.   /// Consume the next token and error if it is not \a TK.
  584.   bool expectToken(int TK);
  585. };
  586.  
  587. /// Iterator abstraction for Documents over a Stream.
  588. class document_iterator {
  589. public:
  590.   document_iterator() = default;
  591.   document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {}
  592.  
  593.   bool operator==(const document_iterator &Other) const {
  594.     if (isAtEnd() || Other.isAtEnd())
  595.       return isAtEnd() && Other.isAtEnd();
  596.  
  597.     return Doc == Other.Doc;
  598.   }
  599.   bool operator!=(const document_iterator &Other) const {
  600.     return !(*this == Other);
  601.   }
  602.  
  603.   document_iterator operator++() {
  604.     assert(Doc && "incrementing iterator past the end.");
  605.     if (!(*Doc)->skip()) {
  606.       Doc->reset(nullptr);
  607.     } else {
  608.       Stream &S = (*Doc)->stream;
  609.       Doc->reset(new Document(S));
  610.     }
  611.     return *this;
  612.   }
  613.  
  614.   Document &operator*() { return **Doc; }
  615.  
  616.   std::unique_ptr<Document> &operator->() { return *Doc; }
  617.  
  618. private:
  619.   bool isAtEnd() const { return !Doc || !*Doc; }
  620.  
  621.   std::unique_ptr<Document> *Doc = nullptr;
  622. };
  623.  
  624. } // end namespace yaml
  625.  
  626. } // end namespace llvm
  627.  
  628. #endif // LLVM_SUPPORT_YAMLPARSER_H
  629.