- //===--- CommentLexer.h - Lexer for structured comments ---------*- C++ -*-===// 
- // 
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 
- // See https://llvm.org/LICENSE.txt for license information. 
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 
- // 
- //===----------------------------------------------------------------------===// 
- // 
- //  This file defines lexer for structured comments and supporting token class. 
- // 
- //===----------------------------------------------------------------------===// 
-   
- #ifndef LLVM_CLANG_AST_COMMENTLEXER_H 
- #define LLVM_CLANG_AST_COMMENTLEXER_H 
-   
- #include "clang/Basic/Diagnostic.h" 
- #include "clang/Basic/SourceManager.h" 
- #include "llvm/ADT/SmallString.h" 
- #include "llvm/ADT/StringRef.h" 
- #include "llvm/Support/Allocator.h" 
- #include "llvm/Support/raw_ostream.h" 
-   
- namespace clang { 
- namespace comments { 
-   
- class Lexer; 
- class TextTokenRetokenizer; 
- struct CommandInfo; 
- class CommandTraits; 
-   
- namespace tok { 
- enum TokenKind { 
-   eof, 
-   newline, 
-   text, 
-   unknown_command,   // Command that does not have an ID. 
-   backslash_command, // Command with an ID, that used backslash marker. 
-   at_command,        // Command with an ID, that used 'at' marker. 
-   verbatim_block_begin, 
-   verbatim_block_line, 
-   verbatim_block_end, 
-   verbatim_line_name, 
-   verbatim_line_text, 
-   html_start_tag,     // <tag 
-   html_ident,         // attr 
-   html_equals,        // = 
-   html_quoted_string, // "blah\"blah" or 'blah\'blah' 
-   html_greater,       // > 
-   html_slash_greater, // /> 
-   html_end_tag        // </tag 
- }; 
- } // end namespace tok 
-   
- /// Comment token. 
- class Token { 
-   friend class Lexer; 
-   friend class TextTokenRetokenizer; 
-   
-   /// The location of the token. 
-   SourceLocation Loc; 
-   
-   /// The actual kind of the token. 
-   tok::TokenKind Kind; 
-   
-   /// Integer value associated with a token. 
-   /// 
-   /// If the token is a known command, contains command ID and TextPtr is 
-   /// unused (command spelling can be found with CommandTraits).  Otherwise, 
-   /// contains the length of the string that starts at TextPtr. 
-   unsigned IntVal; 
-   
-   /// Length of the token spelling in comment.  Can be 0 for synthenized 
-   /// tokens. 
-   unsigned Length; 
-   
-   /// Contains text value associated with a token. 
-   const char *TextPtr; 
-   
- public: 
-   SourceLocation getLocation() const LLVM_READONLY { return Loc; } 
-   void setLocation(SourceLocation SL) { Loc = SL; } 
-   
-   SourceLocation getEndLocation() const LLVM_READONLY { 
-     if (Length == 0 || Length == 1) 
-       return Loc; 
-     return Loc.getLocWithOffset(Length - 1); 
-   } 
-   
-   tok::TokenKind getKind() const LLVM_READONLY { return Kind; } 
-   void setKind(tok::TokenKind K) { Kind = K; } 
-   
-   bool is(tok::TokenKind K) const LLVM_READONLY { return Kind == K; } 
-   bool isNot(tok::TokenKind K) const LLVM_READONLY { return Kind != K; } 
-   
-   unsigned getLength() const LLVM_READONLY { return Length; } 
-   void setLength(unsigned L) { Length = L; } 
-   
-   StringRef getText() const LLVM_READONLY { 
-     assert(is(tok::text)); 
-     return StringRef(TextPtr, IntVal); 
-   } 
-   
-   void setText(StringRef Text) { 
-     assert(is(tok::text)); 
-     TextPtr = Text.data(); 
-     IntVal = Text.size(); 
-   } 
-   
-   StringRef getUnknownCommandName() const LLVM_READONLY { 
-     assert(is(tok::unknown_command)); 
-     return StringRef(TextPtr, IntVal); 
-   } 
-   
-   void setUnknownCommandName(StringRef Name) { 
-     assert(is(tok::unknown_command)); 
-     TextPtr = Name.data(); 
-     IntVal = Name.size(); 
-   } 
-   
-   unsigned getCommandID() const LLVM_READONLY { 
-     assert(is(tok::backslash_command) || is(tok::at_command)); 
-     return IntVal; 
-   } 
-   
-   void setCommandID(unsigned ID) { 
-     assert(is(tok::backslash_command) || is(tok::at_command)); 
-     IntVal = ID; 
-   } 
-   
-   unsigned getVerbatimBlockID() const LLVM_READONLY { 
-     assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end)); 
-     return IntVal; 
-   } 
-   
-   void setVerbatimBlockID(unsigned ID) { 
-     assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end)); 
-     IntVal = ID; 
-   } 
-   
-   StringRef getVerbatimBlockText() const LLVM_READONLY { 
-     assert(is(tok::verbatim_block_line)); 
-     return StringRef(TextPtr, IntVal); 
-   } 
-   
-   void setVerbatimBlockText(StringRef Text) { 
-     assert(is(tok::verbatim_block_line)); 
-     TextPtr = Text.data(); 
-     IntVal = Text.size(); 
-   } 
-   
-   unsigned getVerbatimLineID() const LLVM_READONLY { 
-     assert(is(tok::verbatim_line_name)); 
-     return IntVal; 
-   } 
-   
-   void setVerbatimLineID(unsigned ID) { 
-     assert(is(tok::verbatim_line_name)); 
-     IntVal = ID; 
-   } 
-   
-   StringRef getVerbatimLineText() const LLVM_READONLY { 
-     assert(is(tok::verbatim_line_text)); 
-     return StringRef(TextPtr, IntVal); 
-   } 
-   
-   void setVerbatimLineText(StringRef Text) { 
-     assert(is(tok::verbatim_line_text)); 
-     TextPtr = Text.data(); 
-     IntVal = Text.size(); 
-   } 
-   
-   StringRef getHTMLTagStartName() const LLVM_READONLY { 
-     assert(is(tok::html_start_tag)); 
-     return StringRef(TextPtr, IntVal); 
-   } 
-   
-   void setHTMLTagStartName(StringRef Name) { 
-     assert(is(tok::html_start_tag)); 
-     TextPtr = Name.data(); 
-     IntVal = Name.size(); 
-   } 
-   
-   StringRef getHTMLIdent() const LLVM_READONLY { 
-     assert(is(tok::html_ident)); 
-     return StringRef(TextPtr, IntVal); 
-   } 
-   
-   void setHTMLIdent(StringRef Name) { 
-     assert(is(tok::html_ident)); 
-     TextPtr = Name.data(); 
-     IntVal = Name.size(); 
-   } 
-   
-   StringRef getHTMLQuotedString() const LLVM_READONLY { 
-     assert(is(tok::html_quoted_string)); 
-     return StringRef(TextPtr, IntVal); 
-   } 
-   
-   void setHTMLQuotedString(StringRef Str) { 
-     assert(is(tok::html_quoted_string)); 
-     TextPtr = Str.data(); 
-     IntVal = Str.size(); 
-   } 
-   
-   StringRef getHTMLTagEndName() const LLVM_READONLY { 
-     assert(is(tok::html_end_tag)); 
-     return StringRef(TextPtr, IntVal); 
-   } 
-   
-   void setHTMLTagEndName(StringRef Name) { 
-     assert(is(tok::html_end_tag)); 
-     TextPtr = Name.data(); 
-     IntVal = Name.size(); 
-   } 
-   
-   void dump(const Lexer &L, const SourceManager &SM) const; 
- }; 
-   
- /// Comment lexer. 
- class Lexer { 
- private: 
-   Lexer(const Lexer &) = delete; 
-   void operator=(const Lexer &) = delete; 
-   
-   /// Allocator for strings that are semantic values of tokens and have to be 
-   /// computed (for example, resolved decimal character references). 
-   llvm::BumpPtrAllocator &Allocator; 
-   
-   DiagnosticsEngine &Diags; 
-   
-   const CommandTraits &Traits; 
-   
-   const char *const BufferStart; 
-   const char *const BufferEnd; 
-   
-   const char *BufferPtr; 
-   
-   /// One past end pointer for the current comment.  For BCPL comments points 
-   /// to newline or BufferEnd, for C comments points to star in '*/'. 
-   const char *CommentEnd; 
-   
-   SourceLocation FileLoc; 
-   
-   /// If true, the commands, html tags, etc will be parsed and reported as 
-   /// separate tokens inside the comment body. If false, the comment text will 
-   /// be parsed into text and newline tokens. 
-   bool ParseCommands; 
-   
-   enum LexerCommentState : uint8_t { 
-     LCS_BeforeComment, 
-     LCS_InsideBCPLComment, 
-     LCS_InsideCComment, 
-     LCS_BetweenComments 
-   }; 
-   
-   /// Low-level lexer state, track if we are inside or outside of comment. 
-   LexerCommentState CommentState; 
-   
-   enum LexerState : uint8_t { 
-     /// Lexing normal comment text 
-     LS_Normal, 
-   
-     /// Finished lexing verbatim block beginning command, will lex first body 
-     /// line. 
-     LS_VerbatimBlockFirstLine, 
-   
-     /// Lexing verbatim block body line-by-line, skipping line-starting 
-     /// decorations. 
-     LS_VerbatimBlockBody, 
-   
-     /// Finished lexing verbatim line beginning command, will lex text (one 
-     /// line). 
-     LS_VerbatimLineText, 
-   
-     /// Finished lexing \verbatim <TAG \endverbatim part, lexing tag attributes. 
-     LS_HTMLStartTag, 
-   
-     /// Finished lexing \verbatim </TAG \endverbatim part, lexing '>'. 
-     LS_HTMLEndTag 
-   }; 
-   
-   /// Current lexing mode. 
-   LexerState State; 
-   
-   /// If State is LS_VerbatimBlock, contains the name of verbatim end 
-   /// command, including command marker. 
-   SmallString<16> VerbatimBlockEndCommandName; 
-   
-   /// Given a character reference name (e.g., "lt"), return the character that 
-   /// it stands for (e.g., "<"). 
-   StringRef resolveHTMLNamedCharacterReference(StringRef Name) const; 
-   
-   /// Given a Unicode codepoint as base-10 integer, return the character. 
-   StringRef resolveHTMLDecimalCharacterReference(StringRef Name) const; 
-   
-   /// Given a Unicode codepoint as base-16 integer, return the character. 
-   StringRef resolveHTMLHexCharacterReference(StringRef Name) const; 
-   
-   void formTokenWithChars(Token &Result, const char *TokEnd, 
-                           tok::TokenKind Kind); 
-   
-   void formTextToken(Token &Result, const char *TokEnd) { 
-     StringRef Text(BufferPtr, TokEnd - BufferPtr); 
-     formTokenWithChars(Result, TokEnd, tok::text); 
-     Result.setText(Text); 
-   } 
-   
-   SourceLocation getSourceLocation(const char *Loc) const { 
-     assert(Loc >= BufferStart && Loc <= BufferEnd && 
-            "Location out of range for this buffer!"); 
-   
-     const unsigned CharNo = Loc - BufferStart; 
-     return FileLoc.getLocWithOffset(CharNo); 
-   } 
-   
-   DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) { 
-     return Diags.Report(Loc, DiagID); 
-   } 
-   
-   /// Eat string matching regexp \code \s*\* \endcode. 
-   void skipLineStartingDecorations(); 
-   
-   /// Skip over pure text. 
-   const char *skipTextToken(); 
-   
-   /// Lex comment text, including commands if ParseCommands is set to true. 
-   void lexCommentText(Token &T); 
-   
-   void setupAndLexVerbatimBlock(Token &T, const char *TextBegin, char Marker, 
-                                 const CommandInfo *Info); 
-   
-   void lexVerbatimBlockFirstLine(Token &T); 
-   
-   void lexVerbatimBlockBody(Token &T); 
-   
-   void setupAndLexVerbatimLine(Token &T, const char *TextBegin, 
-                                const CommandInfo *Info); 
-   
-   void lexVerbatimLineText(Token &T); 
-   
-   void lexHTMLCharacterReference(Token &T); 
-   
-   void setupAndLexHTMLStartTag(Token &T); 
-   
-   void lexHTMLStartTag(Token &T); 
-   
-   void setupAndLexHTMLEndTag(Token &T); 
-   
-   void lexHTMLEndTag(Token &T); 
-   
- public: 
-   Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags, 
-         const CommandTraits &Traits, SourceLocation FileLoc, 
-         const char *BufferStart, const char *BufferEnd, 
-         bool ParseCommands = true); 
-   
-   void lex(Token &T); 
-   
-   StringRef getSpelling(const Token &Tok, const SourceManager &SourceMgr) const; 
- }; 
-   
- } // end namespace comments 
- } // end namespace clang 
-   
- #endif 
-   
-