Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
//===- YAMLParser.h - Simple YAML parser ------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
//  This is a YAML 1.2 parser.
10
//
11
//  See http://www.yaml.org/spec/1.2/spec.html for the full standard.
12
//
13
//  This currently does not implement the following:
14
//    * Tag resolution.
15
//    * UTF-16.
16
//    * BOMs anywhere other than the first Unicode scalar value in the file.
17
//
18
//  The most important class here is Stream. This represents a YAML stream with
19
//  0, 1, or many documents.
20
//
21
//  SourceMgr sm;
22
//  StringRef input = getInput();
23
//  yaml::Stream stream(input, sm);
24
//
25
//  for (yaml::document_iterator di = stream.begin(), de = stream.end();
26
//       di != de; ++di) {
27
//    yaml::Node *n = di->getRoot();
28
//    if (n) {
29
//      // Do something with n...
30
//    } else
31
//      break;
32
//  }
33
//
34
//===----------------------------------------------------------------------===//
35
 
36
#ifndef LLVM_SUPPORT_YAMLPARSER_H
37
#define LLVM_SUPPORT_YAMLPARSER_H
38
 
39
#include "llvm/ADT/StringRef.h"
40
#include "llvm/Support/Allocator.h"
41
#include "llvm/Support/SMLoc.h"
42
#include "llvm/Support/SourceMgr.h"
43
#include <cassert>
44
#include <cstddef>
45
#include <iterator>
46
#include <map>
47
#include <memory>
48
#include <optional>
49
#include <string>
50
#include <system_error>
51
 
52
namespace llvm {
53
 
54
class MemoryBufferRef;
55
class raw_ostream;
56
class Twine;
57
 
58
namespace yaml {
59
 
60
class Document;
61
class document_iterator;
62
class Node;
63
class Scanner;
64
struct Token;
65
 
66
/// Dump all the tokens in this stream to OS.
67
/// \returns true if there was an error, false otherwise.
68
bool dumpTokens(StringRef Input, raw_ostream &);
69
 
70
/// Scans all tokens in input without outputting anything. This is used
71
///        for benchmarking the tokenizer.
72
/// \returns true if there was an error, false otherwise.
73
bool scanTokens(StringRef Input);
74
 
75
/// Escape \a Input for a double quoted scalar; if \p EscapePrintable
76
/// is true, all UTF8 sequences will be escaped, if \p EscapePrintable is
77
/// false, those UTF8 sequences encoding printable unicode scalars will not be
78
/// escaped, but emitted verbatim.
79
std::string escape(StringRef Input, bool EscapePrintable = true);
80
 
81
/// Parse \p S as a bool according to https://yaml.org/type/bool.html.
82
std::optional<bool> parseBool(StringRef S);
83
 
84
/// This class represents a YAML stream potentially containing multiple
85
///        documents.
86
class Stream {
87
public:
88
  /// This keeps a reference to the string referenced by \p Input.
89
  Stream(StringRef Input, SourceMgr &, bool ShowColors = true,
90
         std::error_code *EC = nullptr);
91
 
92
  Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true,
93
         std::error_code *EC = nullptr);
94
  ~Stream();
95
 
96
  document_iterator begin();
97
  document_iterator end();
98
  void skip();
99
  bool failed();
100
 
101
  bool validate() {
102
    skip();
103
    return !failed();
104
  }
105
 
106
  void printError(Node *N, const Twine &Msg,
107
                  SourceMgr::DiagKind Kind = SourceMgr::DK_Error);
108
  void printError(const SMRange &Range, const Twine &Msg,
109
                  SourceMgr::DiagKind Kind = SourceMgr::DK_Error);
110
 
111
private:
112
  friend class Document;
113
 
114
  std::unique_ptr<Scanner> scanner;
115
  std::unique_ptr<Document> CurrentDoc;
116
};
117
 
118
/// Abstract base class for all Nodes.
119
class Node {
120
  virtual void anchor();
121
 
122
public:
123
  enum NodeKind {
124
    NK_Null,
125
    NK_Scalar,
126
    NK_BlockScalar,
127
    NK_KeyValue,
128
    NK_Mapping,
129
    NK_Sequence,
130
    NK_Alias
131
  };
132
 
133
  Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor,
134
       StringRef Tag);
135
 
136
  // It's not safe to copy YAML nodes; the document is streamed and the position
137
  // is part of the state.
138
  Node(const Node &) = delete;
139
  void operator=(const Node &) = delete;
140
 
141
  void *operator new(size_t Size, BumpPtrAllocator &Alloc,
142
                     size_t Alignment = 16) noexcept {
143
    return Alloc.Allocate(Size, Alignment);
144
  }
145
 
146
  void operator delete(void *Ptr, BumpPtrAllocator &Alloc,
147
                       size_t Size) noexcept {
148
    Alloc.Deallocate(Ptr, Size, 0);
149
  }
150
 
151
  void operator delete(void *) noexcept = delete;
152
 
153
  /// Get the value of the anchor attached to this node. If it does not
154
  ///        have one, getAnchor().size() will be 0.
155
  StringRef getAnchor() const { return Anchor; }
156
 
157
  /// Get the tag as it was written in the document. This does not
158
  ///   perform tag resolution.
159
  StringRef getRawTag() const { return Tag; }
160
 
161
  /// Get the verbatium tag for a given Node. This performs tag resoluton
162
  ///   and substitution.
163
  std::string getVerbatimTag() const;
164
 
165
  SMRange getSourceRange() const { return SourceRange; }
166
  void setSourceRange(SMRange SR) { SourceRange = SR; }
167
 
168
  // These functions forward to Document and Scanner.
169
  Token &peekNext();
170
  Token getNext();
171
  Node *parseBlockNode();
172
  BumpPtrAllocator &getAllocator();
173
  void setError(const Twine &Message, Token &Location) const;
174
  bool failed() const;
175
 
176
  virtual void skip() {}
177
 
178
  unsigned int getType() const { return TypeID; }
179
 
180
protected:
181
  std::unique_ptr<Document> &Doc;
182
  SMRange SourceRange;
183
 
184
  ~Node() = default;
185
 
186
private:
187
  unsigned int TypeID;
188
  StringRef Anchor;
189
  /// The tag as typed in the document.
190
  StringRef Tag;
191
};
192
 
193
/// A null value.
194
///
195
/// Example:
196
///   !!null null
197
class NullNode final : public Node {
198
  void anchor() override;
199
 
200
public:
201
  NullNode(std::unique_ptr<Document> &D)
202
      : Node(NK_Null, D, StringRef(), StringRef()) {}
203
 
204
  static bool classof(const Node *N) { return N->getType() == NK_Null; }
205
};
206
 
207
/// A scalar node is an opaque datum that can be presented as a
208
///        series of zero or more Unicode scalar values.
209
///
210
/// Example:
211
///   Adena
212
class ScalarNode final : public Node {
213
  void anchor() override;
214
 
215
public:
216
  ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
217
             StringRef Val)
218
      : Node(NK_Scalar, D, Anchor, Tag), Value(Val) {
219
    SMLoc Start = SMLoc::getFromPointer(Val.begin());
220
    SMLoc End = SMLoc::getFromPointer(Val.end());
221
    SourceRange = SMRange(Start, End);
222
  }
223
 
224
  // Return Value without any escaping or folding or other fun YAML stuff. This
225
  // is the exact bytes that are contained in the file (after conversion to
226
  // utf8).
227
  StringRef getRawValue() const { return Value; }
228
 
229
  /// Gets the value of this node as a StringRef.
230
  ///
231
  /// \param Storage is used to store the content of the returned StringRef if
232
  ///        it requires any modification from how it appeared in the source.
233
  ///        This happens with escaped characters and multi-line literals.
234
  StringRef getValue(SmallVectorImpl<char> &Storage) const;
235
 
236
  static bool classof(const Node *N) {
237
    return N->getType() == NK_Scalar;
238
  }
239
 
240
private:
241
  StringRef Value;
242
 
243
  StringRef unescapeDoubleQuoted(StringRef UnquotedValue,
244
                                 StringRef::size_type Start,
245
                                 SmallVectorImpl<char> &Storage) const;
246
};
247
 
248
/// A block scalar node is an opaque datum that can be presented as a
249
///        series of zero or more Unicode scalar values.
250
///
251
/// Example:
252
///   |
253
///     Hello
254
///     World
255
class BlockScalarNode final : public Node {
256
  void anchor() override;
257
 
258
public:
259
  BlockScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
260
                  StringRef Value, StringRef RawVal)
261
      : Node(NK_BlockScalar, D, Anchor, Tag), Value(Value) {
262
    SMLoc Start = SMLoc::getFromPointer(RawVal.begin());
263
    SMLoc End = SMLoc::getFromPointer(RawVal.end());
264
    SourceRange = SMRange(Start, End);
265
  }
266
 
267
  /// Gets the value of this node as a StringRef.
268
  StringRef getValue() const { return Value; }
269
 
270
  static bool classof(const Node *N) {
271
    return N->getType() == NK_BlockScalar;
272
  }
273
 
274
private:
275
  StringRef Value;
276
};
277
 
278
/// A key and value pair. While not technically a Node under the YAML
279
///        representation graph, it is easier to treat them this way.
280
///
281
/// TODO: Consider making this not a child of Node.
282
///
283
/// Example:
284
///   Section: .text
285
class KeyValueNode final : public Node {
286
  void anchor() override;
287
 
288
public:
289
  KeyValueNode(std::unique_ptr<Document> &D)
290
      : Node(NK_KeyValue, D, StringRef(), StringRef()) {}
291
 
292
  /// Parse and return the key.
293
  ///
294
  /// This may be called multiple times.
295
  ///
296
  /// \returns The key, or nullptr if failed() == true.
297
  Node *getKey();
298
 
299
  /// Parse and return the value.
300
  ///
301
  /// This may be called multiple times.
302
  ///
303
  /// \returns The value, or nullptr if failed() == true.
304
  Node *getValue();
305
 
306
  void skip() override {
307
    if (Node *Key = getKey()) {
308
      Key->skip();
309
      if (Node *Val = getValue())
310
        Val->skip();
311
    }
312
  }
313
 
314
  static bool classof(const Node *N) {
315
    return N->getType() == NK_KeyValue;
316
  }
317
 
318
private:
319
  Node *Key = nullptr;
320
  Node *Value = nullptr;
321
};
322
 
323
/// This is an iterator abstraction over YAML collections shared by both
324
///        sequences and maps.
325
///
326
/// BaseT must have a ValueT* member named CurrentEntry and a member function
327
/// increment() which must set CurrentEntry to 0 to create an end iterator.
328
template <class BaseT, class ValueT> class basic_collection_iterator {
329
public:
330
  using iterator_category = std::input_iterator_tag;
331
  using value_type = ValueT;
332
  using difference_type = std::ptrdiff_t;
333
  using pointer = value_type *;
334
  using reference = value_type &;
335
 
336
  basic_collection_iterator() = default;
337
  basic_collection_iterator(BaseT *B) : Base(B) {}
338
 
339
  ValueT *operator->() const {
340
    assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
341
    return Base->CurrentEntry;
342
  }
343
 
344
  ValueT &operator*() const {
345
    assert(Base && Base->CurrentEntry &&
346
           "Attempted to dereference end iterator!");
347
    return *Base->CurrentEntry;
348
  }
349
 
350
  operator ValueT *() const {
351
    assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
352
    return Base->CurrentEntry;
353
  }
354
 
355
  /// Note on EqualityComparable:
356
  ///
357
  /// The iterator is not re-entrant,
358
  /// it is meant to be used for parsing YAML on-demand
359
  /// Once iteration started - it can point only to one entry at a time
360
  /// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal
361
  /// iff Base and Other.Base are equal.
362
  bool operator==(const basic_collection_iterator &Other) const {
363
    if (Base && (Base == Other.Base)) {
364
      assert((Base->CurrentEntry == Other.Base->CurrentEntry)
365
             && "Equal Bases expected to point to equal Entries");
366
    }
367
 
368
    return Base == Other.Base;
369
  }
370
 
371
  bool operator!=(const basic_collection_iterator &Other) const {
372
    return !(Base == Other.Base);
373
  }
374
 
375
  basic_collection_iterator &operator++() {
376
    assert(Base && "Attempted to advance iterator past end!");
377
    Base->increment();
378
    // Create an end iterator.
379
    if (!Base->CurrentEntry)
380
      Base = nullptr;
381
    return *this;
382
  }
383
 
384
private:
385
  BaseT *Base = nullptr;
386
};
387
 
388
// The following two templates are used for both MappingNode and Sequence Node.
389
template <class CollectionType>
390
typename CollectionType::iterator begin(CollectionType &C) {
391
  assert(C.IsAtBeginning && "You may only iterate over a collection once!");
392
  C.IsAtBeginning = false;
393
  typename CollectionType::iterator ret(&C);
394
  ++ret;
395
  return ret;
396
}
397
 
398
template <class CollectionType> void skip(CollectionType &C) {
399
  // TODO: support skipping from the middle of a parsed collection ;/
400
  assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
401
  if (C.IsAtBeginning)
402
    for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e;
403
         ++i)
404
      i->skip();
405
}
406
 
407
/// Represents a YAML map created from either a block map for a flow map.
408
///
409
/// This parses the YAML stream as increment() is called.
410
///
411
/// Example:
412
///   Name: _main
413
///   Scope: Global
414
class MappingNode final : public Node {
415
  void anchor() override;
416
 
417
public:
418
  enum MappingType {
419
    MT_Block,
420
    MT_Flow,
421
    MT_Inline ///< An inline mapping node is used for "[key: value]".
422
  };
423
 
424
  MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
425
              MappingType MT)
426
      : Node(NK_Mapping, D, Anchor, Tag), Type(MT) {}
427
 
428
  friend class basic_collection_iterator<MappingNode, KeyValueNode>;
429
 
430
  using iterator = basic_collection_iterator<MappingNode, KeyValueNode>;
431
 
432
  template <class T> friend typename T::iterator yaml::begin(T &);
433
  template <class T> friend void yaml::skip(T &);
434
 
435
  iterator begin() { return yaml::begin(*this); }
436
 
437
  iterator end() { return iterator(); }
438
 
439
  void skip() override { yaml::skip(*this); }
440
 
441
  static bool classof(const Node *N) {
442
    return N->getType() == NK_Mapping;
443
  }
444
 
445
private:
446
  MappingType Type;
447
  bool IsAtBeginning = true;
448
  bool IsAtEnd = false;
449
  KeyValueNode *CurrentEntry = nullptr;
450
 
451
  void increment();
452
};
453
 
454
/// Represents a YAML sequence created from either a block sequence for a
455
///        flow sequence.
456
///
457
/// This parses the YAML stream as increment() is called.
458
///
459
/// Example:
460
///   - Hello
461
///   - World
462
class SequenceNode final : public Node {
463
  void anchor() override;
464
 
465
public:
466
  enum SequenceType {
467
    ST_Block,
468
    ST_Flow,
469
    // Use for:
470
    //
471
    // key:
472
    // - val1
473
    // - val2
474
    //
475
    // As a BlockMappingEntry and BlockEnd are not created in this case.
476
    ST_Indentless
477
  };
478
 
479
  SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
480
               SequenceType ST)
481
      : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST) {}
482
 
483
  friend class basic_collection_iterator<SequenceNode, Node>;
484
 
485
  using iterator = basic_collection_iterator<SequenceNode, Node>;
486
 
487
  template <class T> friend typename T::iterator yaml::begin(T &);
488
  template <class T> friend void yaml::skip(T &);
489
 
490
  void increment();
491
 
492
  iterator begin() { return yaml::begin(*this); }
493
 
494
  iterator end() { return iterator(); }
495
 
496
  void skip() override { yaml::skip(*this); }
497
 
498
  static bool classof(const Node *N) {
499
    return N->getType() == NK_Sequence;
500
  }
501
 
502
private:
503
  SequenceType SeqType;
504
  bool IsAtBeginning = true;
505
  bool IsAtEnd = false;
506
  bool WasPreviousTokenFlowEntry = true; // Start with an imaginary ','.
507
  Node *CurrentEntry = nullptr;
508
};
509
 
510
/// Represents an alias to a Node with an anchor.
511
///
512
/// Example:
513
///   *AnchorName
514
class AliasNode final : public Node {
515
  void anchor() override;
516
 
517
public:
518
  AliasNode(std::unique_ptr<Document> &D, StringRef Val)
519
      : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {}
520
 
521
  StringRef getName() const { return Name; }
522
 
523
  static bool classof(const Node *N) { return N->getType() == NK_Alias; }
524
 
525
private:
526
  StringRef Name;
527
};
528
 
529
/// A YAML Stream is a sequence of Documents. A document contains a root
530
///        node.
531
class Document {
532
public:
533
  Document(Stream &ParentStream);
534
 
535
  /// Root for parsing a node. Returns a single node.
536
  Node *parseBlockNode();
537
 
538
  /// Finish parsing the current document and return true if there are
539
  ///        more. Return false otherwise.
540
  bool skip();
541
 
542
  /// Parse and return the root level node.
543
  Node *getRoot() {
544
    if (Root)
545
      return Root;
546
    return Root = parseBlockNode();
547
  }
548
 
549
  const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; }
550
 
551
private:
552
  friend class Node;
553
  friend class document_iterator;
554
 
555
  /// Stream to read tokens from.
556
  Stream &stream;
557
 
558
  /// Used to allocate nodes to. All are destroyed without calling their
559
  ///        destructor when the document is destroyed.
560
  BumpPtrAllocator NodeAllocator;
561
 
562
  /// The root node. Used to support skipping a partially parsed
563
  ///        document.
564
  Node *Root;
565
 
566
  /// Maps tag prefixes to their expansion.
567
  std::map<StringRef, StringRef> TagMap;
568
 
569
  Token &peekNext();
570
  Token getNext();
571
  void setError(const Twine &Message, Token &Location) const;
572
  bool failed() const;
573
 
574
  /// Parse %BLAH directives and return true if any were encountered.
575
  bool parseDirectives();
576
 
577
  /// Parse %YAML
578
  void parseYAMLDirective();
579
 
580
  /// Parse %TAG
581
  void parseTAGDirective();
582
 
583
  /// Consume the next token and error if it is not \a TK.
584
  bool expectToken(int TK);
585
};
586
 
587
/// Iterator abstraction for Documents over a Stream.
588
class document_iterator {
589
public:
590
  document_iterator() = default;
591
  document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {}
592
 
593
  bool operator==(const document_iterator &Other) const {
594
    if (isAtEnd() || Other.isAtEnd())
595
      return isAtEnd() && Other.isAtEnd();
596
 
597
    return Doc == Other.Doc;
598
  }
599
  bool operator!=(const document_iterator &Other) const {
600
    return !(*this == Other);
601
  }
602
 
603
  document_iterator operator++() {
604
    assert(Doc && "incrementing iterator past the end.");
605
    if (!(*Doc)->skip()) {
606
      Doc->reset(nullptr);
607
    } else {
608
      Stream &S = (*Doc)->stream;
609
      Doc->reset(new Document(S));
610
    }
611
    return *this;
612
  }
613
 
614
  Document &operator*() { return **Doc; }
615
 
616
  std::unique_ptr<Document> &operator->() { return *Doc; }
617
 
618
private:
619
  bool isAtEnd() const { return !Doc || !*Doc; }
620
 
621
  std::unique_ptr<Document> *Doc = nullptr;
622
};
623
 
624
} // end namespace yaml
625
 
626
} // end namespace llvm
627
 
628
#endif // LLVM_SUPPORT_YAMLPARSER_H