Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
//===--- CommentLexer.h - Lexer for structured comments ---------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
//  This file defines lexer for structured comments and supporting token class.
10
//
11
//===----------------------------------------------------------------------===//
12
 
13
#ifndef LLVM_CLANG_AST_COMMENTLEXER_H
14
#define LLVM_CLANG_AST_COMMENTLEXER_H
15
 
16
#include "clang/Basic/Diagnostic.h"
17
#include "clang/Basic/SourceManager.h"
18
#include "llvm/ADT/SmallString.h"
19
#include "llvm/ADT/StringRef.h"
20
#include "llvm/Support/Allocator.h"
21
#include "llvm/Support/raw_ostream.h"
22
 
23
namespace clang {
24
namespace comments {
25
 
26
class Lexer;
27
class TextTokenRetokenizer;
28
struct CommandInfo;
29
class CommandTraits;
30
 
31
namespace tok {
32
enum TokenKind {
33
  eof,
34
  newline,
35
  text,
36
  unknown_command,   // Command that does not have an ID.
37
  backslash_command, // Command with an ID, that used backslash marker.
38
  at_command,        // Command with an ID, that used 'at' marker.
39
  verbatim_block_begin,
40
  verbatim_block_line,
41
  verbatim_block_end,
42
  verbatim_line_name,
43
  verbatim_line_text,
44
  html_start_tag,     // <tag
45
  html_ident,         // attr
46
  html_equals,        // =
47
  html_quoted_string, // "blah\"blah" or 'blah\'blah'
48
  html_greater,       // >
49
  html_slash_greater, // />
50
  html_end_tag        // </tag
51
};
52
} // end namespace tok
53
 
54
/// Comment token.
55
class Token {
56
  friend class Lexer;
57
  friend class TextTokenRetokenizer;
58
 
59
  /// The location of the token.
60
  SourceLocation Loc;
61
 
62
  /// The actual kind of the token.
63
  tok::TokenKind Kind;
64
 
65
  /// Integer value associated with a token.
66
  ///
67
  /// If the token is a known command, contains command ID and TextPtr is
68
  /// unused (command spelling can be found with CommandTraits).  Otherwise,
69
  /// contains the length of the string that starts at TextPtr.
70
  unsigned IntVal;
71
 
72
  /// Length of the token spelling in comment.  Can be 0 for synthenized
73
  /// tokens.
74
  unsigned Length;
75
 
76
  /// Contains text value associated with a token.
77
  const char *TextPtr;
78
 
79
public:
80
  SourceLocation getLocation() const LLVM_READONLY { return Loc; }
81
  void setLocation(SourceLocation SL) { Loc = SL; }
82
 
83
  SourceLocation getEndLocation() const LLVM_READONLY {
84
    if (Length == 0 || Length == 1)
85
      return Loc;
86
    return Loc.getLocWithOffset(Length - 1);
87
  }
88
 
89
  tok::TokenKind getKind() const LLVM_READONLY { return Kind; }
90
  void setKind(tok::TokenKind K) { Kind = K; }
91
 
92
  bool is(tok::TokenKind K) const LLVM_READONLY { return Kind == K; }
93
  bool isNot(tok::TokenKind K) const LLVM_READONLY { return Kind != K; }
94
 
95
  unsigned getLength() const LLVM_READONLY { return Length; }
96
  void setLength(unsigned L) { Length = L; }
97
 
98
  StringRef getText() const LLVM_READONLY {
99
    assert(is(tok::text));
100
    return StringRef(TextPtr, IntVal);
101
  }
102
 
103
  void setText(StringRef Text) {
104
    assert(is(tok::text));
105
    TextPtr = Text.data();
106
    IntVal = Text.size();
107
  }
108
 
109
  StringRef getUnknownCommandName() const LLVM_READONLY {
110
    assert(is(tok::unknown_command));
111
    return StringRef(TextPtr, IntVal);
112
  }
113
 
114
  void setUnknownCommandName(StringRef Name) {
115
    assert(is(tok::unknown_command));
116
    TextPtr = Name.data();
117
    IntVal = Name.size();
118
  }
119
 
120
  unsigned getCommandID() const LLVM_READONLY {
121
    assert(is(tok::backslash_command) || is(tok::at_command));
122
    return IntVal;
123
  }
124
 
125
  void setCommandID(unsigned ID) {
126
    assert(is(tok::backslash_command) || is(tok::at_command));
127
    IntVal = ID;
128
  }
129
 
130
  unsigned getVerbatimBlockID() const LLVM_READONLY {
131
    assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end));
132
    return IntVal;
133
  }
134
 
135
  void setVerbatimBlockID(unsigned ID) {
136
    assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end));
137
    IntVal = ID;
138
  }
139
 
140
  StringRef getVerbatimBlockText() const LLVM_READONLY {
141
    assert(is(tok::verbatim_block_line));
142
    return StringRef(TextPtr, IntVal);
143
  }
144
 
145
  void setVerbatimBlockText(StringRef Text) {
146
    assert(is(tok::verbatim_block_line));
147
    TextPtr = Text.data();
148
    IntVal = Text.size();
149
  }
150
 
151
  unsigned getVerbatimLineID() const LLVM_READONLY {
152
    assert(is(tok::verbatim_line_name));
153
    return IntVal;
154
  }
155
 
156
  void setVerbatimLineID(unsigned ID) {
157
    assert(is(tok::verbatim_line_name));
158
    IntVal = ID;
159
  }
160
 
161
  StringRef getVerbatimLineText() const LLVM_READONLY {
162
    assert(is(tok::verbatim_line_text));
163
    return StringRef(TextPtr, IntVal);
164
  }
165
 
166
  void setVerbatimLineText(StringRef Text) {
167
    assert(is(tok::verbatim_line_text));
168
    TextPtr = Text.data();
169
    IntVal = Text.size();
170
  }
171
 
172
  StringRef getHTMLTagStartName() const LLVM_READONLY {
173
    assert(is(tok::html_start_tag));
174
    return StringRef(TextPtr, IntVal);
175
  }
176
 
177
  void setHTMLTagStartName(StringRef Name) {
178
    assert(is(tok::html_start_tag));
179
    TextPtr = Name.data();
180
    IntVal = Name.size();
181
  }
182
 
183
  StringRef getHTMLIdent() const LLVM_READONLY {
184
    assert(is(tok::html_ident));
185
    return StringRef(TextPtr, IntVal);
186
  }
187
 
188
  void setHTMLIdent(StringRef Name) {
189
    assert(is(tok::html_ident));
190
    TextPtr = Name.data();
191
    IntVal = Name.size();
192
  }
193
 
194
  StringRef getHTMLQuotedString() const LLVM_READONLY {
195
    assert(is(tok::html_quoted_string));
196
    return StringRef(TextPtr, IntVal);
197
  }
198
 
199
  void setHTMLQuotedString(StringRef Str) {
200
    assert(is(tok::html_quoted_string));
201
    TextPtr = Str.data();
202
    IntVal = Str.size();
203
  }
204
 
205
  StringRef getHTMLTagEndName() const LLVM_READONLY {
206
    assert(is(tok::html_end_tag));
207
    return StringRef(TextPtr, IntVal);
208
  }
209
 
210
  void setHTMLTagEndName(StringRef Name) {
211
    assert(is(tok::html_end_tag));
212
    TextPtr = Name.data();
213
    IntVal = Name.size();
214
  }
215
 
216
  void dump(const Lexer &L, const SourceManager &SM) const;
217
};
218
 
219
/// Comment lexer.
220
class Lexer {
221
private:
222
  Lexer(const Lexer &) = delete;
223
  void operator=(const Lexer &) = delete;
224
 
225
  /// Allocator for strings that are semantic values of tokens and have to be
226
  /// computed (for example, resolved decimal character references).
227
  llvm::BumpPtrAllocator &Allocator;
228
 
229
  DiagnosticsEngine &Diags;
230
 
231
  const CommandTraits &Traits;
232
 
233
  const char *const BufferStart;
234
  const char *const BufferEnd;
235
 
236
  const char *BufferPtr;
237
 
238
  /// One past end pointer for the current comment.  For BCPL comments points
239
  /// to newline or BufferEnd, for C comments points to star in '*/'.
240
  const char *CommentEnd;
241
 
242
  SourceLocation FileLoc;
243
 
244
  /// If true, the commands, html tags, etc will be parsed and reported as
245
  /// separate tokens inside the comment body. If false, the comment text will
246
  /// be parsed into text and newline tokens.
247
  bool ParseCommands;
248
 
249
  enum LexerCommentState : uint8_t {
250
    LCS_BeforeComment,
251
    LCS_InsideBCPLComment,
252
    LCS_InsideCComment,
253
    LCS_BetweenComments
254
  };
255
 
256
  /// Low-level lexer state, track if we are inside or outside of comment.
257
  LexerCommentState CommentState;
258
 
259
  enum LexerState : uint8_t {
260
    /// Lexing normal comment text
261
    LS_Normal,
262
 
263
    /// Finished lexing verbatim block beginning command, will lex first body
264
    /// line.
265
    LS_VerbatimBlockFirstLine,
266
 
267
    /// Lexing verbatim block body line-by-line, skipping line-starting
268
    /// decorations.
269
    LS_VerbatimBlockBody,
270
 
271
    /// Finished lexing verbatim line beginning command, will lex text (one
272
    /// line).
273
    LS_VerbatimLineText,
274
 
275
    /// Finished lexing \verbatim <TAG \endverbatim part, lexing tag attributes.
276
    LS_HTMLStartTag,
277
 
278
    /// Finished lexing \verbatim </TAG \endverbatim part, lexing '>'.
279
    LS_HTMLEndTag
280
  };
281
 
282
  /// Current lexing mode.
283
  LexerState State;
284
 
285
  /// If State is LS_VerbatimBlock, contains the name of verbatim end
286
  /// command, including command marker.
287
  SmallString<16> VerbatimBlockEndCommandName;
288
 
289
  /// Given a character reference name (e.g., "lt"), return the character that
290
  /// it stands for (e.g., "<").
291
  StringRef resolveHTMLNamedCharacterReference(StringRef Name) const;
292
 
293
  /// Given a Unicode codepoint as base-10 integer, return the character.
294
  StringRef resolveHTMLDecimalCharacterReference(StringRef Name) const;
295
 
296
  /// Given a Unicode codepoint as base-16 integer, return the character.
297
  StringRef resolveHTMLHexCharacterReference(StringRef Name) const;
298
 
299
  void formTokenWithChars(Token &Result, const char *TokEnd,
300
                          tok::TokenKind Kind);
301
 
302
  void formTextToken(Token &Result, const char *TokEnd) {
303
    StringRef Text(BufferPtr, TokEnd - BufferPtr);
304
    formTokenWithChars(Result, TokEnd, tok::text);
305
    Result.setText(Text);
306
  }
307
 
308
  SourceLocation getSourceLocation(const char *Loc) const {
309
    assert(Loc >= BufferStart && Loc <= BufferEnd &&
310
           "Location out of range for this buffer!");
311
 
312
    const unsigned CharNo = Loc - BufferStart;
313
    return FileLoc.getLocWithOffset(CharNo);
314
  }
315
 
316
  DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) {
317
    return Diags.Report(Loc, DiagID);
318
  }
319
 
320
  /// Eat string matching regexp \code \s*\* \endcode.
321
  void skipLineStartingDecorations();
322
 
323
  /// Skip over pure text.
324
  const char *skipTextToken();
325
 
326
  /// Lex comment text, including commands if ParseCommands is set to true.
327
  void lexCommentText(Token &T);
328
 
329
  void setupAndLexVerbatimBlock(Token &T, const char *TextBegin, char Marker,
330
                                const CommandInfo *Info);
331
 
332
  void lexVerbatimBlockFirstLine(Token &T);
333
 
334
  void lexVerbatimBlockBody(Token &T);
335
 
336
  void setupAndLexVerbatimLine(Token &T, const char *TextBegin,
337
                               const CommandInfo *Info);
338
 
339
  void lexVerbatimLineText(Token &T);
340
 
341
  void lexHTMLCharacterReference(Token &T);
342
 
343
  void setupAndLexHTMLStartTag(Token &T);
344
 
345
  void lexHTMLStartTag(Token &T);
346
 
347
  void setupAndLexHTMLEndTag(Token &T);
348
 
349
  void lexHTMLEndTag(Token &T);
350
 
351
public:
352
  Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags,
353
        const CommandTraits &Traits, SourceLocation FileLoc,
354
        const char *BufferStart, const char *BufferEnd,
355
        bool ParseCommands = true);
356
 
357
  void lex(Token &T);
358
 
359
  StringRef getSpelling(const Token &Tok, const SourceManager &SourceMgr) const;
360
};
361
 
362
} // end namespace comments
363
} // end namespace clang
364
 
365
#endif
366