Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===--- Token.h - Token interface ------------------------------*- C++ -*-===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // |
||
9 | // This file defines the Token interface. |
||
10 | // |
||
11 | //===----------------------------------------------------------------------===// |
||
12 | |||
13 | #ifndef LLVM_CLANG_LEX_TOKEN_H |
||
14 | #define LLVM_CLANG_LEX_TOKEN_H |
||
15 | |||
16 | #include "clang/Basic/SourceLocation.h" |
||
17 | #include "clang/Basic/TokenKinds.h" |
||
18 | #include "llvm/ADT/ArrayRef.h" |
||
19 | #include "llvm/ADT/StringRef.h" |
||
20 | #include <cassert> |
||
21 | |||
22 | namespace clang { |
||
23 | |||
24 | class IdentifierInfo; |
||
25 | |||
26 | /// Token - This structure provides full information about a lexed token. |
||
27 | /// It is not intended to be space efficient, it is intended to return as much |
||
28 | /// information as possible about each returned token. This is expected to be |
||
29 | /// compressed into a smaller form if memory footprint is important. |
||
30 | /// |
||
31 | /// The parser can create a special "annotation token" representing a stream of |
||
32 | /// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>" |
||
33 | /// can be represented by a single typename annotation token that carries |
||
34 | /// information about the SourceRange of the tokens and the type object. |
||
35 | class Token { |
||
36 | /// The location of the token. This is actually a SourceLocation. |
||
37 | SourceLocation::UIntTy Loc; |
||
38 | |||
39 | // Conceptually these next two fields could be in a union. However, this |
||
40 | // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical |
||
41 | // routine. Keeping as separate members with casts until a more beautiful fix |
||
42 | // presents itself. |
||
43 | |||
44 | /// UintData - This holds either the length of the token text, when |
||
45 | /// a normal token, or the end of the SourceRange when an annotation |
||
46 | /// token. |
||
47 | SourceLocation::UIntTy UintData; |
||
48 | |||
49 | /// PtrData - This is a union of four different pointer types, which depends |
||
50 | /// on what type of token this is: |
||
51 | /// Identifiers, keywords, etc: |
||
52 | /// This is an IdentifierInfo*, which contains the uniqued identifier |
||
53 | /// spelling. |
||
54 | /// Literals: isLiteral() returns true. |
||
55 | /// This is a pointer to the start of the token in a text buffer, which |
||
56 | /// may be dirty (have trigraphs / escaped newlines). |
||
57 | /// Annotations (resolved type names, C++ scopes, etc): isAnnotation(). |
||
58 | /// This is a pointer to sema-specific data for the annotation token. |
||
59 | /// Eof: |
||
60 | // This is a pointer to a Decl. |
||
61 | /// Other: |
||
62 | /// This is null. |
||
63 | void *PtrData; |
||
64 | |||
65 | /// Kind - The actual flavor of token this is. |
||
66 | tok::TokenKind Kind; |
||
67 | |||
68 | /// Flags - Bits we track about this token, members of the TokenFlags enum. |
||
69 | unsigned short Flags; |
||
70 | |||
71 | public: |
||
72 | // Various flags set per token: |
||
73 | enum TokenFlags { |
||
74 | StartOfLine = 0x01, // At start of line or only after whitespace |
||
75 | // (considering the line after macro expansion). |
||
76 | LeadingSpace = 0x02, // Whitespace exists before this token (considering |
||
77 | // whitespace after macro expansion). |
||
78 | DisableExpand = 0x04, // This identifier may never be macro expanded. |
||
79 | NeedsCleaning = 0x08, // Contained an escaped newline or trigraph. |
||
80 | LeadingEmptyMacro = 0x10, // Empty macro exists before this token. |
||
81 | HasUDSuffix = 0x20, // This string or character literal has a ud-suffix. |
||
82 | HasUCN = 0x40, // This identifier contains a UCN. |
||
83 | IgnoredComma = 0x80, // This comma is not a macro argument separator (MS). |
||
84 | StringifiedInMacro = 0x100, // This string or character literal is formed by |
||
85 | // macro stringizing or charizing operator. |
||
86 | CommaAfterElided = 0x200, // The comma following this token was elided (MS). |
||
87 | IsEditorPlaceholder = 0x400, // This identifier is a placeholder. |
||
88 | IsReinjected = 0x800, // A phase 4 token that was produced before and |
||
89 | // re-added, e.g. via EnterTokenStream. Annotation |
||
90 | // tokens are *not* reinjected. |
||
91 | }; |
||
92 | |||
93 | tok::TokenKind getKind() const { return Kind; } |
||
94 | void setKind(tok::TokenKind K) { Kind = K; } |
||
95 | |||
96 | /// is/isNot - Predicates to check if this token is a specific kind, as in |
||
97 | /// "if (Tok.is(tok::l_brace)) {...}". |
||
98 | bool is(tok::TokenKind K) const { return Kind == K; } |
||
99 | bool isNot(tok::TokenKind K) const { return Kind != K; } |
||
100 | bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { |
||
101 | return is(K1) || is(K2); |
||
102 | } |
||
103 | template <typename... Ts> bool isOneOf(tok::TokenKind K1, Ts... Ks) const { |
||
104 | return is(K1) || isOneOf(Ks...); |
||
105 | } |
||
106 | |||
107 | /// Return true if this is a raw identifier (when lexing |
||
108 | /// in raw mode) or a non-keyword identifier (when lexing in non-raw mode). |
||
109 | bool isAnyIdentifier() const { |
||
110 | return tok::isAnyIdentifier(getKind()); |
||
111 | } |
||
112 | |||
113 | /// Return true if this is a "literal", like a numeric |
||
114 | /// constant, string, etc. |
||
115 | bool isLiteral() const { |
||
116 | return tok::isLiteral(getKind()); |
||
117 | } |
||
118 | |||
119 | /// Return true if this is any of tok::annot_* kind tokens. |
||
120 | bool isAnnotation() const { |
||
121 | return tok::isAnnotation(getKind()); |
||
122 | } |
||
123 | |||
124 | /// Return a source location identifier for the specified |
||
125 | /// offset in the current file. |
||
126 | SourceLocation getLocation() const { |
||
127 | return SourceLocation::getFromRawEncoding(Loc); |
||
128 | } |
||
129 | unsigned getLength() const { |
||
130 | assert(!isAnnotation() && "Annotation tokens have no length field"); |
||
131 | return UintData; |
||
132 | } |
||
133 | |||
134 | void setLocation(SourceLocation L) { Loc = L.getRawEncoding(); } |
||
135 | void setLength(unsigned Len) { |
||
136 | assert(!isAnnotation() && "Annotation tokens have no length field"); |
||
137 | UintData = Len; |
||
138 | } |
||
139 | |||
140 | SourceLocation getAnnotationEndLoc() const { |
||
141 | assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); |
||
142 | return SourceLocation::getFromRawEncoding(UintData ? UintData : Loc); |
||
143 | } |
||
144 | void setAnnotationEndLoc(SourceLocation L) { |
||
145 | assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); |
||
146 | UintData = L.getRawEncoding(); |
||
147 | } |
||
148 | |||
149 | SourceLocation getLastLoc() const { |
||
150 | return isAnnotation() ? getAnnotationEndLoc() : getLocation(); |
||
151 | } |
||
152 | |||
153 | SourceLocation getEndLoc() const { |
||
154 | return isAnnotation() ? getAnnotationEndLoc() |
||
155 | : getLocation().getLocWithOffset(getLength()); |
||
156 | } |
||
157 | |||
158 | /// SourceRange of the group of tokens that this annotation token |
||
159 | /// represents. |
||
160 | SourceRange getAnnotationRange() const { |
||
161 | return SourceRange(getLocation(), getAnnotationEndLoc()); |
||
162 | } |
||
163 | void setAnnotationRange(SourceRange R) { |
||
164 | setLocation(R.getBegin()); |
||
165 | setAnnotationEndLoc(R.getEnd()); |
||
166 | } |
||
167 | |||
168 | const char *getName() const { return tok::getTokenName(Kind); } |
||
169 | |||
170 | /// Reset all flags to cleared. |
||
171 | void startToken() { |
||
172 | Kind = tok::unknown; |
||
173 | Flags = 0; |
||
174 | PtrData = nullptr; |
||
175 | UintData = 0; |
||
176 | Loc = SourceLocation().getRawEncoding(); |
||
177 | } |
||
178 | |||
179 | bool hasPtrData() const { return PtrData != nullptr; } |
||
180 | |||
181 | IdentifierInfo *getIdentifierInfo() const { |
||
182 | assert(isNot(tok::raw_identifier) && |
||
183 | "getIdentifierInfo() on a tok::raw_identifier token!"); |
||
184 | assert(!isAnnotation() && |
||
185 | "getIdentifierInfo() on an annotation token!"); |
||
186 | if (isLiteral()) return nullptr; |
||
187 | if (is(tok::eof)) return nullptr; |
||
188 | return (IdentifierInfo*) PtrData; |
||
189 | } |
||
190 | void setIdentifierInfo(IdentifierInfo *II) { |
||
191 | PtrData = (void*) II; |
||
192 | } |
||
193 | |||
194 | const void *getEofData() const { |
||
195 | assert(is(tok::eof)); |
||
196 | return reinterpret_cast<const void *>(PtrData); |
||
197 | } |
||
198 | void setEofData(const void *D) { |
||
199 | assert(is(tok::eof)); |
||
200 | assert(!PtrData); |
||
201 | PtrData = const_cast<void *>(D); |
||
202 | } |
||
203 | |||
204 | /// getRawIdentifier - For a raw identifier token (i.e., an identifier |
||
205 | /// lexed in raw mode), returns a reference to the text substring in the |
||
206 | /// buffer if known. |
||
207 | StringRef getRawIdentifier() const { |
||
208 | assert(is(tok::raw_identifier)); |
||
209 | return StringRef(reinterpret_cast<const char *>(PtrData), getLength()); |
||
210 | } |
||
211 | void setRawIdentifierData(const char *Ptr) { |
||
212 | assert(is(tok::raw_identifier)); |
||
213 | PtrData = const_cast<char*>(Ptr); |
||
214 | } |
||
215 | |||
216 | /// getLiteralData - For a literal token (numeric constant, string, etc), this |
||
217 | /// returns a pointer to the start of it in the text buffer if known, null |
||
218 | /// otherwise. |
||
219 | const char *getLiteralData() const { |
||
220 | assert(isLiteral() && "Cannot get literal data of non-literal"); |
||
221 | return reinterpret_cast<const char*>(PtrData); |
||
222 | } |
||
223 | void setLiteralData(const char *Ptr) { |
||
224 | assert(isLiteral() && "Cannot set literal data of non-literal"); |
||
225 | PtrData = const_cast<char*>(Ptr); |
||
226 | } |
||
227 | |||
228 | void *getAnnotationValue() const { |
||
229 | assert(isAnnotation() && "Used AnnotVal on non-annotation token"); |
||
230 | return PtrData; |
||
231 | } |
||
232 | void setAnnotationValue(void *val) { |
||
233 | assert(isAnnotation() && "Used AnnotVal on non-annotation token"); |
||
234 | PtrData = val; |
||
235 | } |
||
236 | |||
237 | /// Set the specified flag. |
||
238 | void setFlag(TokenFlags Flag) { |
||
239 | Flags |= Flag; |
||
240 | } |
||
241 | |||
242 | /// Get the specified flag. |
||
243 | bool getFlag(TokenFlags Flag) const { |
||
244 | return (Flags & Flag) != 0; |
||
245 | } |
||
246 | |||
247 | /// Unset the specified flag. |
||
248 | void clearFlag(TokenFlags Flag) { |
||
249 | Flags &= ~Flag; |
||
250 | } |
||
251 | |||
252 | /// Return the internal represtation of the flags. |
||
253 | /// |
||
254 | /// This is only intended for low-level operations such as writing tokens to |
||
255 | /// disk. |
||
256 | unsigned getFlags() const { |
||
257 | return Flags; |
||
258 | } |
||
259 | |||
260 | /// Set a flag to either true or false. |
||
261 | void setFlagValue(TokenFlags Flag, bool Val) { |
||
262 | if (Val) |
||
263 | setFlag(Flag); |
||
264 | else |
||
265 | clearFlag(Flag); |
||
266 | } |
||
267 | |||
268 | /// isAtStartOfLine - Return true if this token is at the start of a line. |
||
269 | /// |
||
270 | bool isAtStartOfLine() const { return getFlag(StartOfLine); } |
||
271 | |||
272 | /// Return true if this token has whitespace before it. |
||
273 | /// |
||
274 | bool hasLeadingSpace() const { return getFlag(LeadingSpace); } |
||
275 | |||
276 | /// Return true if this identifier token should never |
||
277 | /// be expanded in the future, due to C99 6.10.3.4p2. |
||
278 | bool isExpandDisabled() const { return getFlag(DisableExpand); } |
||
279 | |||
280 | /// Return true if we have an ObjC keyword identifier. |
||
281 | bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const; |
||
282 | |||
283 | /// Return the ObjC keyword kind. |
||
284 | tok::ObjCKeywordKind getObjCKeywordID() const; |
||
285 | |||
286 | /// Return true if this token has trigraphs or escaped newlines in it. |
||
287 | bool needsCleaning() const { return getFlag(NeedsCleaning); } |
||
288 | |||
289 | /// Return true if this token has an empty macro before it. |
||
290 | /// |
||
291 | bool hasLeadingEmptyMacro() const { return getFlag(LeadingEmptyMacro); } |
||
292 | |||
293 | /// Return true if this token is a string or character literal which |
||
294 | /// has a ud-suffix. |
||
295 | bool hasUDSuffix() const { return getFlag(HasUDSuffix); } |
||
296 | |||
297 | /// Returns true if this token contains a universal character name. |
||
298 | bool hasUCN() const { return getFlag(HasUCN); } |
||
299 | |||
300 | /// Returns true if this token is formed by macro by stringizing or charizing |
||
301 | /// operator. |
||
302 | bool stringifiedInMacro() const { return getFlag(StringifiedInMacro); } |
||
303 | |||
304 | /// Returns true if the comma after this token was elided. |
||
305 | bool commaAfterElided() const { return getFlag(CommaAfterElided); } |
||
306 | |||
307 | /// Returns true if this token is an editor placeholder. |
||
308 | /// |
||
309 | /// Editor placeholders are produced by the code-completion engine and are |
||
310 | /// represented as characters between '<#' and '#>' in the source code. The |
||
311 | /// lexer uses identifier tokens to represent placeholders. |
||
312 | bool isEditorPlaceholder() const { return getFlag(IsEditorPlaceholder); } |
||
313 | }; |
||
314 | |||
315 | /// Information about the conditional stack (\#if directives) |
||
316 | /// currently active. |
||
317 | struct PPConditionalInfo { |
||
318 | /// Location where the conditional started. |
||
319 | SourceLocation IfLoc; |
||
320 | |||
321 | /// True if this was contained in a skipping directive, e.g., |
||
322 | /// in a "\#if 0" block. |
||
323 | bool WasSkipping; |
||
324 | |||
325 | /// True if we have emitted tokens already, and now we're in |
||
326 | /// an \#else block or something. Only useful in Skipping blocks. |
||
327 | bool FoundNonSkip; |
||
328 | |||
329 | /// True if we've seen a \#else in this block. If so, |
||
330 | /// \#elif/\#else directives are not allowed. |
||
331 | bool FoundElse; |
||
332 | }; |
||
333 | |||
334 | // Extra information needed for annonation tokens. |
||
335 | struct PragmaLoopHintInfo { |
||
336 | Token PragmaName; |
||
337 | Token Option; |
||
338 | ArrayRef<Token> Toks; |
||
339 | }; |
||
340 | } // end namespace clang |
||
341 | |||
342 | #endif // LLVM_CLANG_LEX_TOKEN_H |