Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | //===- Markup.h -------------------------------------------------*- C++ -*-===// |
| 2 | // |
||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 6 | // |
||
| 7 | //===----------------------------------------------------------------------===// |
||
| 8 | /// |
||
| 9 | /// \file |
||
| 10 | /// This file declares the log symbolizer markup data model and parser. |
||
| 11 | /// |
||
| 12 | /// See https://llvm.org/docs/SymbolizerMarkupFormat.html |
||
| 13 | /// |
||
| 14 | //===----------------------------------------------------------------------===// |
||
| 15 | |||
| 16 | #ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H |
||
| 17 | #define LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H |
||
| 18 | |||
| 19 | #include "llvm/ADT/SmallVector.h" |
||
| 20 | #include "llvm/ADT/StringRef.h" |
||
| 21 | #include "llvm/ADT/StringSet.h" |
||
| 22 | #include "llvm/Support/Regex.h" |
||
| 23 | |||
| 24 | namespace llvm { |
||
| 25 | namespace symbolize { |
||
| 26 | |||
| 27 | /// A node of symbolizer markup. |
||
| 28 | /// |
||
| 29 | /// If only the Text field is set, this represents a region of text outside a |
||
| 30 | /// markup element. ANSI SGR control codes are also reported this way; if |
||
| 31 | /// detected, then the control code will be the entirety of the Text field, and |
||
| 32 | /// any surrounding text will be reported as preceding and following nodes. |
||
| 33 | struct MarkupNode { |
||
| 34 | /// The full text of this node in the input. |
||
| 35 | StringRef Text; |
||
| 36 | |||
| 37 | /// If this represents an element, the tag. Otherwise, empty. |
||
| 38 | StringRef Tag; |
||
| 39 | |||
| 40 | /// If this represents an element with fields, a list of the field contents. |
||
| 41 | /// Otherwise, empty. |
||
| 42 | SmallVector<StringRef> Fields; |
||
| 43 | |||
| 44 | bool operator==(const MarkupNode &Other) const { |
||
| 45 | return Text == Other.Text && Tag == Other.Tag && Fields == Other.Fields; |
||
| 46 | } |
||
| 47 | bool operator!=(const MarkupNode &Other) const { return !(*this == Other); } |
||
| 48 | }; |
||
| 49 | |||
| 50 | /// Parses a log containing symbolizer markup into a sequence of nodes. |
||
| 51 | class MarkupParser { |
||
| 52 | public: |
||
| 53 | MarkupParser(StringSet<> MultilineTags = {}); |
||
| 54 | |||
| 55 | /// Parses an individual \p Line of input. |
||
| 56 | /// |
||
| 57 | /// Nodes from the previous parseLine() call that haven't yet been extracted |
||
| 58 | /// by nextNode() are discarded. The nodes returned by nextNode() may |
||
| 59 | /// reference the input string, so it must be retained by the caller until the |
||
| 60 | /// last use. |
||
| 61 | /// |
||
| 62 | /// Note that some elements may span multiple lines. If a line ends with the |
||
| 63 | /// start of one of these elements, then no nodes will be produced until the |
||
| 64 | /// either the end or something that cannot be part of an element is |
||
| 65 | /// encountered. This may only occur after multiple calls to parseLine(), |
||
| 66 | /// corresponding to the lines of the multi-line element. |
||
| 67 | void parseLine(StringRef Line); |
||
| 68 | |||
| 69 | /// Inform the parser of that the input stream has ended. |
||
| 70 | /// |
||
| 71 | /// This allows the parser to finish any deferred processing (e.g., an |
||
| 72 | /// in-progress multi-line element) and may cause nextNode() to return |
||
| 73 | /// additional nodes. |
||
| 74 | void flush(); |
||
| 75 | |||
| 76 | /// Returns the next node in the input sequence. |
||
| 77 | /// |
||
| 78 | /// Calling nextNode() may invalidate the contents of the node returned by the |
||
| 79 | /// previous call. |
||
| 80 | /// |
||
| 81 | /// \returns the next markup node or std::nullopt if none remain. |
||
| 82 | std::optional<MarkupNode> nextNode(); |
||
| 83 | |||
| 84 | bool isSGR(const MarkupNode &Node) const { |
||
| 85 | return SGRSyntax.match(Node.Text); |
||
| 86 | } |
||
| 87 | |||
| 88 | private: |
||
| 89 | std::optional<MarkupNode> parseElement(StringRef Line); |
||
| 90 | void parseTextOutsideMarkup(StringRef Text); |
||
| 91 | std::optional<StringRef> parseMultiLineBegin(StringRef Line); |
||
| 92 | std::optional<StringRef> parseMultiLineEnd(StringRef Line); |
||
| 93 | |||
| 94 | // Tags of elements that can span multiple lines. |
||
| 95 | const StringSet<> MultilineTags; |
||
| 96 | |||
| 97 | // Contents of a multi-line element that has finished being parsed. Retained |
||
| 98 | // to keep returned StringRefs for the contents valid. |
||
| 99 | std::string FinishedMultiline; |
||
| 100 | |||
| 101 | // Contents of a multi-line element that is still in the process of receiving |
||
| 102 | // lines. |
||
| 103 | std::string InProgressMultiline; |
||
| 104 | |||
| 105 | // The line currently being parsed. |
||
| 106 | StringRef Line; |
||
| 107 | |||
| 108 | // Buffer for nodes parsed from the current line. |
||
| 109 | SmallVector<MarkupNode> Buffer; |
||
| 110 | |||
| 111 | // Next buffer index to return. |
||
| 112 | size_t NextIdx; |
||
| 113 | |||
| 114 | // Regular expression matching supported ANSI SGR escape sequences. |
||
| 115 | const Regex SGRSyntax; |
||
| 116 | }; |
||
| 117 | |||
| 118 | } // end namespace symbolize |
||
| 119 | } // end namespace llvm |
||
| 120 | |||
| 121 | #endif // LLVM_DEBUGINFO_SYMBOLIZE_MARKUP_H |