WebSVN – QNX 8.QNX8 LLVM/Clang compiler suite – Blame – //llvm-build/x86_64/include/clang/AST/CommentLexer.h

Rev	Author	Line No.	Line
14	pmbaty	1	//===--- CommentLexer.h - Lexer for structured comments ---------- C++ --===//
		2	//
		3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
		4	// See https://llvm.org/LICENSE.txt for license information.
		5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
		6	//
		7	//===----------------------------------------------------------------------===//
		8	//
		9	// This file defines lexer for structured comments and supporting token class.
		10	//
		11	//===----------------------------------------------------------------------===//
		12
		13	#ifndef LLVM_CLANG_AST_COMMENTLEXER_H
		14	#define LLVM_CLANG_AST_COMMENTLEXER_H
		15
		16	#include "clang/Basic/Diagnostic.h"
		17	#include "clang/Basic/SourceManager.h"
		18	#include "llvm/ADT/SmallString.h"
		19	#include "llvm/ADT/StringRef.h"
		20	#include "llvm/Support/Allocator.h"
		21	#include "llvm/Support/raw_ostream.h"
		22
		23	namespace clang {
		24	namespace comments {
		25
		26	class Lexer;
		27	class TextTokenRetokenizer;
		28	struct CommandInfo;
		29	class CommandTraits;
		30
		31	namespace tok {
		32	enum TokenKind {
		33	eof,
		34	newline,
		35	text,
		36	unknown_command, // Command that does not have an ID.
		37	backslash_command, // Command with an ID, that used backslash marker.
		38	at_command, // Command with an ID, that used 'at' marker.
		39	verbatim_block_begin,
		40	verbatim_block_line,
		41	verbatim_block_end,
		42	verbatim_line_name,
		43	verbatim_line_text,
		44	html_start_tag, // <tag
		45	html_ident, // attr
		46	html_equals, // =
		47	html_quoted_string, // "blah\"blah" or 'blah\'blah'
		48	html_greater, // >
		49	html_slash_greater, // />
		50	html_end_tag // </tag
		51	};
		52	} // end namespace tok
		53
		54	/// Comment token.
		55	class Token {
		56	friend class Lexer;
		57	friend class TextTokenRetokenizer;
		58
		59	/// The location of the token.
		60	SourceLocation Loc;
		61
		62	/// The actual kind of the token.
		63	tok::TokenKind Kind;
		64
		65	/// Integer value associated with a token.
		66	///
		67	/// If the token is a known command, contains command ID and TextPtr is
		68	/// unused (command spelling can be found with CommandTraits). Otherwise,
		69	/// contains the length of the string that starts at TextPtr.
		70	unsigned IntVal;
		71
		72	/// Length of the token spelling in comment. Can be 0 for synthenized
		73	/// tokens.
		74	unsigned Length;
		75
		76	/// Contains text value associated with a token.
		77	const char *TextPtr;
		78
		79	public:
		80	SourceLocation getLocation() const LLVM_READONLY { return Loc; }
		81	void setLocation(SourceLocation SL) { Loc = SL; }
		82
		83	SourceLocation getEndLocation() const LLVM_READONLY {
		84	if (Length == 0 \|\| Length == 1)
		85	return Loc;
		86	return Loc.getLocWithOffset(Length - 1);
		87	}
		88
		89	tok::TokenKind getKind() const LLVM_READONLY { return Kind; }
		90	void setKind(tok::TokenKind K) { Kind = K; }
		91
		92	bool is(tok::TokenKind K) const LLVM_READONLY { return Kind == K; }
		93	bool isNot(tok::TokenKind K) const LLVM_READONLY { return Kind != K; }
		94
		95	unsigned getLength() const LLVM_READONLY { return Length; }
		96	void setLength(unsigned L) { Length = L; }
		97
		98	StringRef getText() const LLVM_READONLY {
		99	assert(is(tok::text));
		100	return StringRef(TextPtr, IntVal);
		101	}
		102
		103	void setText(StringRef Text) {
		104	assert(is(tok::text));
		105	TextPtr = Text.data();
		106	IntVal = Text.size();
		107	}
		108
		109	StringRef getUnknownCommandName() const LLVM_READONLY {
		110	assert(is(tok::unknown_command));
		111	return StringRef(TextPtr, IntVal);
		112	}
		113
		114	void setUnknownCommandName(StringRef Name) {
		115	assert(is(tok::unknown_command));
		116	TextPtr = Name.data();
		117	IntVal = Name.size();
		118	}
		119
		120	unsigned getCommandID() const LLVM_READONLY {
		121	assert(is(tok::backslash_command) \|\| is(tok::at_command));
		122	return IntVal;
		123	}
		124
		125	void setCommandID(unsigned ID) {
		126	assert(is(tok::backslash_command) \|\| is(tok::at_command));
		127	IntVal = ID;
		128	}
		129
		130	unsigned getVerbatimBlockID() const LLVM_READONLY {
		131	assert(is(tok::verbatim_block_begin) \|\| is(tok::verbatim_block_end));
		132	return IntVal;
		133	}
		134
		135	void setVerbatimBlockID(unsigned ID) {
		136	assert(is(tok::verbatim_block_begin) \|\| is(tok::verbatim_block_end));
		137	IntVal = ID;
		138	}
		139
		140	StringRef getVerbatimBlockText() const LLVM_READONLY {
		141	assert(is(tok::verbatim_block_line));
		142	return StringRef(TextPtr, IntVal);
		143	}
		144
		145	void setVerbatimBlockText(StringRef Text) {
		146	assert(is(tok::verbatim_block_line));
		147	TextPtr = Text.data();
		148	IntVal = Text.size();
		149	}
		150
		151	unsigned getVerbatimLineID() const LLVM_READONLY {
		152	assert(is(tok::verbatim_line_name));
		153	return IntVal;
		154	}
		155
		156	void setVerbatimLineID(unsigned ID) {
		157	assert(is(tok::verbatim_line_name));
		158	IntVal = ID;
		159	}
		160
		161	StringRef getVerbatimLineText() const LLVM_READONLY {
		162	assert(is(tok::verbatim_line_text));
		163	return StringRef(TextPtr, IntVal);
		164	}
		165
		166	void setVerbatimLineText(StringRef Text) {
		167	assert(is(tok::verbatim_line_text));
		168	TextPtr = Text.data();
		169	IntVal = Text.size();
		170	}
		171
		172	StringRef getHTMLTagStartName() const LLVM_READONLY {
		173	assert(is(tok::html_start_tag));
		174	return StringRef(TextPtr, IntVal);
		175	}
		176
		177	void setHTMLTagStartName(StringRef Name) {
		178	assert(is(tok::html_start_tag));
		179	TextPtr = Name.data();
		180	IntVal = Name.size();
		181	}
		182
		183	StringRef getHTMLIdent() const LLVM_READONLY {
		184	assert(is(tok::html_ident));
		185	return StringRef(TextPtr, IntVal);
		186	}
		187
		188	void setHTMLIdent(StringRef Name) {
		189	assert(is(tok::html_ident));
		190	TextPtr = Name.data();
		191	IntVal = Name.size();
		192	}
		193
		194	StringRef getHTMLQuotedString() const LLVM_READONLY {
		195	assert(is(tok::html_quoted_string));
		196	return StringRef(TextPtr, IntVal);
		197	}
		198
		199	void setHTMLQuotedString(StringRef Str) {
		200	assert(is(tok::html_quoted_string));
		201	TextPtr = Str.data();
		202	IntVal = Str.size();
		203	}
		204
		205	StringRef getHTMLTagEndName() const LLVM_READONLY {
		206	assert(is(tok::html_end_tag));
		207	return StringRef(TextPtr, IntVal);
		208	}
		209
		210	void setHTMLTagEndName(StringRef Name) {
		211	assert(is(tok::html_end_tag));
		212	TextPtr = Name.data();
		213	IntVal = Name.size();
		214	}
		215
		216	void dump(const Lexer &L, const SourceManager &SM) const;
		217	};
		218
		219	/// Comment lexer.
		220	class Lexer {
		221	private:
		222	Lexer(const Lexer &) = delete;
		223	void operator=(const Lexer &) = delete;
		224
		225	/// Allocator for strings that are semantic values of tokens and have to be
		226	/// computed (for example, resolved decimal character references).
		227	llvm::BumpPtrAllocator &Allocator;
		228
		229	DiagnosticsEngine &Diags;
		230
		231	const CommandTraits &Traits;
		232
		233	const char *const BufferStart;
		234	const char *const BufferEnd;
		235
		236	const char *BufferPtr;
		237
		238	/// One past end pointer for the current comment. For BCPL comments points
		239	/// to newline or BufferEnd, for C comments points to star in '*/'.
		240	const char *CommentEnd;
		241
		242	SourceLocation FileLoc;
		243
		244	/// If true, the commands, html tags, etc will be parsed and reported as
		245	/// separate tokens inside the comment body. If false, the comment text will
		246	/// be parsed into text and newline tokens.
		247	bool ParseCommands;
		248
		249	enum LexerCommentState : uint8_t {
		250	LCS_BeforeComment,
		251	LCS_InsideBCPLComment,
		252	LCS_InsideCComment,
		253	LCS_BetweenComments
		254	};
		255
		256	/// Low-level lexer state, track if we are inside or outside of comment.
		257	LexerCommentState CommentState;
		258
		259	enum LexerState : uint8_t {
		260	/// Lexing normal comment text
		261	LS_Normal,
		262
		263	/// Finished lexing verbatim block beginning command, will lex first body
		264	/// line.
		265	LS_VerbatimBlockFirstLine,
		266
		267	/// Lexing verbatim block body line-by-line, skipping line-starting
		268	/// decorations.
		269	LS_VerbatimBlockBody,
		270
		271	/// Finished lexing verbatim line beginning command, will lex text (one
		272	/// line).
		273	LS_VerbatimLineText,
		274
		275	/// Finished lexing \verbatim <TAG \endverbatim part, lexing tag attributes.
		276	LS_HTMLStartTag,
		277
		278	/// Finished lexing \verbatim </TAG \endverbatim part, lexing '>'.
		279	LS_HTMLEndTag
		280	};
		281
		282	/// Current lexing mode.
		283	LexerState State;
		284
		285	/// If State is LS_VerbatimBlock, contains the name of verbatim end
		286	/// command, including command marker.
		287	SmallString<16> VerbatimBlockEndCommandName;
		288
		289	/// Given a character reference name (e.g., "lt"), return the character that
		290	/// it stands for (e.g., "<").
		291	StringRef resolveHTMLNamedCharacterReference(StringRef Name) const;
		292
		293	/// Given a Unicode codepoint as base-10 integer, return the character.
		294	StringRef resolveHTMLDecimalCharacterReference(StringRef Name) const;
		295
		296	/// Given a Unicode codepoint as base-16 integer, return the character.
		297	StringRef resolveHTMLHexCharacterReference(StringRef Name) const;
		298
		299	void formTokenWithChars(Token &Result, const char *TokEnd,
		300	tok::TokenKind Kind);
		301
		302	void formTextToken(Token &Result, const char *TokEnd) {
		303	StringRef Text(BufferPtr, TokEnd - BufferPtr);
		304	formTokenWithChars(Result, TokEnd, tok::text);
		305	Result.setText(Text);
		306	}
		307
		308	SourceLocation getSourceLocation(const char *Loc) const {
		309	assert(Loc >= BufferStart && Loc <= BufferEnd &&
		310	"Location out of range for this buffer!");
		311
		312	const unsigned CharNo = Loc - BufferStart;
		313	return FileLoc.getLocWithOffset(CharNo);
		314	}
		315
		316	DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) {
		317	return Diags.Report(Loc, DiagID);
		318	}
		319
		320	/// Eat string matching regexp \code \s\ \endcode.
		321	void skipLineStartingDecorations();
		322
		323	/// Skip over pure text.
		324	const char *skipTextToken();
		325
		326	/// Lex comment text, including commands if ParseCommands is set to true.
		327	void lexCommentText(Token &T);
		328
		329	void setupAndLexVerbatimBlock(Token &T, const char *TextBegin, char Marker,
		330	const CommandInfo *Info);
		331
		332	void lexVerbatimBlockFirstLine(Token &T);
		333
		334	void lexVerbatimBlockBody(Token &T);
		335
		336	void setupAndLexVerbatimLine(Token &T, const char *TextBegin,
		337	const CommandInfo *Info);
		338
		339	void lexVerbatimLineText(Token &T);
		340
		341	void lexHTMLCharacterReference(Token &T);
		342
		343	void setupAndLexHTMLStartTag(Token &T);
		344
		345	void lexHTMLStartTag(Token &T);
		346
		347	void setupAndLexHTMLEndTag(Token &T);
		348
		349	void lexHTMLEndTag(Token &T);
		350
		351	public:
		352	Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags,
		353	const CommandTraits &Traits, SourceLocation FileLoc,
		354	const char BufferStart, const char BufferEnd,
		355	bool ParseCommands = true);
		356
		357	void lex(Token &T);
		358
		359	StringRef getSpelling(const Token &Tok, const SourceManager &SourceMgr) const;
		360	};
		361
		362	} // end namespace comments
		363	} // end namespace clang
		364
		365	#endif
		366

Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

QNX 8.QNX8 LLVM/Clang compiler suite//llvm-build/x86_64/include/clang/AST/CommentLexer.h – Rev 14