Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | //===- Tokens.h - collect tokens from preprocessing --------------*- C++-*-===// |
| 2 | // |
||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 6 | // |
||
| 7 | //===----------------------------------------------------------------------===// |
||
| 8 | // Record tokens that a preprocessor emits and define operations to map between |
||
| 9 | // the tokens written in a file and tokens produced by the preprocessor. |
||
| 10 | // |
||
| 11 | // When running the compiler, there are two token streams we are interested in: |
||
| 12 | // - "spelled" tokens directly correspond to a substring written in some |
||
| 13 | // source file. |
||
| 14 | // - "expanded" tokens represent the result of preprocessing, parses consumes |
||
| 15 | // this token stream to produce the AST. |
||
| 16 | // |
||
| 17 | // Expanded tokens correspond directly to locations found in the AST, allowing |
||
| 18 | // to find subranges of the token stream covered by various AST nodes. Spelled |
||
| 19 | // tokens correspond directly to the source code written by the user. |
||
| 20 | // |
||
| 21 | // To allow composing these two use-cases, we also define operations that map |
||
| 22 | // between expanded and spelled tokens that produced them (macro calls, |
||
| 23 | // directives, etc). |
||
| 24 | // |
||
| 25 | //===----------------------------------------------------------------------===// |
||
| 26 | |||
| 27 | #ifndef LLVM_CLANG_TOOLING_SYNTAX_TOKENS_H |
||
| 28 | #define LLVM_CLANG_TOOLING_SYNTAX_TOKENS_H |
||
| 29 | |||
| 30 | #include "clang/Basic/LangOptions.h" |
||
| 31 | #include "clang/Basic/SourceLocation.h" |
||
| 32 | #include "clang/Basic/SourceManager.h" |
||
| 33 | #include "clang/Basic/TokenKinds.h" |
||
| 34 | #include "clang/Lex/Token.h" |
||
| 35 | #include "llvm/ADT/ArrayRef.h" |
||
| 36 | #include "llvm/ADT/DenseMap.h" |
||
| 37 | #include "llvm/ADT/StringRef.h" |
||
| 38 | #include "llvm/Support/Compiler.h" |
||
| 39 | #include "llvm/Support/raw_ostream.h" |
||
| 40 | #include <cstdint> |
||
| 41 | #include <tuple> |
||
| 42 | |||
| 43 | namespace clang { |
||
| 44 | class Preprocessor; |
||
| 45 | |||
| 46 | namespace syntax { |
||
| 47 | |||
| 48 | /// A half-open character range inside a particular file, the start offset is |
||
| 49 | /// included and the end offset is excluded from the range. |
||
| 50 | struct FileRange { |
||
| 51 | /// EXPECTS: File.isValid() && Begin <= End. |
||
| 52 | FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset); |
||
| 53 | /// EXPECTS: BeginLoc.isValid() && BeginLoc.isFileID(). |
||
| 54 | FileRange(const SourceManager &SM, SourceLocation BeginLoc, unsigned Length); |
||
| 55 | /// EXPECTS: BeginLoc.isValid() && BeginLoc.isFileID(), Begin <= End and files |
||
| 56 | /// are the same. |
||
| 57 | FileRange(const SourceManager &SM, SourceLocation BeginLoc, |
||
| 58 | SourceLocation EndLoc); |
||
| 59 | |||
| 60 | FileID file() const { return File; } |
||
| 61 | /// Start is a start offset (inclusive) in the corresponding file. |
||
| 62 | unsigned beginOffset() const { return Begin; } |
||
| 63 | /// End offset (exclusive) in the corresponding file. |
||
| 64 | unsigned endOffset() const { return End; } |
||
| 65 | |||
| 66 | unsigned length() const { return End - Begin; } |
||
| 67 | |||
| 68 | /// Check if \p Offset is inside the range. |
||
| 69 | bool contains(unsigned Offset) const { |
||
| 70 | return Begin <= Offset && Offset < End; |
||
| 71 | } |
||
| 72 | /// Check \p Offset is inside the range or equal to its endpoint. |
||
| 73 | bool touches(unsigned Offset) const { |
||
| 74 | return Begin <= Offset && Offset <= End; |
||
| 75 | } |
||
| 76 | |||
| 77 | /// Gets the substring that this FileRange refers to. |
||
| 78 | llvm::StringRef text(const SourceManager &SM) const; |
||
| 79 | |||
| 80 | /// Convert to the clang range. The returned range is always a char range, |
||
| 81 | /// never a token range. |
||
| 82 | CharSourceRange toCharRange(const SourceManager &SM) const; |
||
| 83 | |||
| 84 | friend bool operator==(const FileRange &L, const FileRange &R) { |
||
| 85 | return std::tie(L.File, L.Begin, L.End) == std::tie(R.File, R.Begin, R.End); |
||
| 86 | } |
||
| 87 | friend bool operator!=(const FileRange &L, const FileRange &R) { |
||
| 88 | return !(L == R); |
||
| 89 | } |
||
| 90 | |||
| 91 | private: |
||
| 92 | FileID File; |
||
| 93 | unsigned Begin; |
||
| 94 | unsigned End; |
||
| 95 | }; |
||
| 96 | |||
| 97 | /// For debugging purposes. |
||
| 98 | llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const FileRange &R); |
||
| 99 | |||
| 100 | /// A token coming directly from a file or from a macro invocation. Has just |
||
| 101 | /// enough information to locate the token in the source code. |
||
| 102 | /// Can represent both expanded and spelled tokens. |
||
| 103 | class Token { |
||
| 104 | public: |
||
| 105 | Token(SourceLocation Location, unsigned Length, tok::TokenKind Kind); |
||
| 106 | /// EXPECTS: clang::Token is not an annotation token. |
||
| 107 | explicit Token(const clang::Token &T); |
||
| 108 | |||
| 109 | tok::TokenKind kind() const { return Kind; } |
||
| 110 | /// Location of the first character of a token. |
||
| 111 | SourceLocation location() const { return Location; } |
||
| 112 | /// Location right after the last character of a token. |
||
| 113 | SourceLocation endLocation() const { |
||
| 114 | return Location.getLocWithOffset(Length); |
||
| 115 | } |
||
| 116 | unsigned length() const { return Length; } |
||
| 117 | |||
| 118 | /// Get the substring covered by the token. Note that will include all |
||
| 119 | /// digraphs, newline continuations, etc. E.g. tokens for 'int' and |
||
| 120 | /// in\ |
||
| 121 | /// t |
||
| 122 | /// both have the same kind tok::kw_int, but results of text() are different. |
||
| 123 | llvm::StringRef text(const SourceManager &SM) const; |
||
| 124 | |||
| 125 | /// Gets a range of this token. |
||
| 126 | /// EXPECTS: token comes from a file, not from a macro expansion. |
||
| 127 | FileRange range(const SourceManager &SM) const; |
||
| 128 | |||
| 129 | /// Given two tokens inside the same file, returns a file range that starts at |
||
| 130 | /// \p First and ends at \p Last. |
||
| 131 | /// EXPECTS: First and Last are file tokens from the same file, Last starts |
||
| 132 | /// after First. |
||
| 133 | static FileRange range(const SourceManager &SM, const syntax::Token &First, |
||
| 134 | const syntax::Token &Last); |
||
| 135 | |||
| 136 | std::string dumpForTests(const SourceManager &SM) const; |
||
| 137 | /// For debugging purposes. |
||
| 138 | std::string str() const; |
||
| 139 | |||
| 140 | private: |
||
| 141 | SourceLocation Location; |
||
| 142 | unsigned Length; |
||
| 143 | tok::TokenKind Kind; |
||
| 144 | }; |
||
| 145 | /// For debugging purposes. Equivalent to a call to Token::str(). |
||
| 146 | llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T); |
||
| 147 | |||
| 148 | /// A list of tokens obtained by preprocessing a text buffer and operations to |
||
| 149 | /// map between the expanded and spelled tokens, i.e. TokenBuffer has |
||
| 150 | /// information about two token streams: |
||
| 151 | /// 1. Expanded tokens: tokens produced by the preprocessor after all macro |
||
| 152 | /// replacements, |
||
| 153 | /// 2. Spelled tokens: corresponding directly to the source code of a file |
||
| 154 | /// before any macro replacements occurred. |
||
| 155 | /// Here's an example to illustrate a difference between those two: |
||
| 156 | /// #define FOO 10 |
||
| 157 | /// int a = FOO; |
||
| 158 | /// |
||
| 159 | /// Spelled tokens are {'#','define','FOO','10','int','a','=','FOO',';'}. |
||
| 160 | /// Expanded tokens are {'int','a','=','10',';','eof'}. |
||
| 161 | /// |
||
| 162 | /// Note that the expanded token stream has a tok::eof token at the end, the |
||
| 163 | /// spelled tokens never store a 'eof' token. |
||
| 164 | /// |
||
| 165 | /// The full list expanded tokens can be obtained with expandedTokens(). Spelled |
||
| 166 | /// tokens for each of the files can be obtained via spelledTokens(FileID). |
||
| 167 | /// |
||
| 168 | /// To map between the expanded and spelled tokens use findSpelledByExpanded(). |
||
| 169 | /// |
||
| 170 | /// To build a token buffer use the TokenCollector class. You can also compute |
||
| 171 | /// the spelled tokens of a file using the tokenize() helper. |
||
| 172 | /// |
||
| 173 | /// FIXME: allow mappings into macro arguments. |
||
| 174 | class TokenBuffer { |
||
| 175 | public: |
||
| 176 | TokenBuffer(const SourceManager &SourceMgr) : SourceMgr(&SourceMgr) {} |
||
| 177 | |||
| 178 | TokenBuffer(TokenBuffer &&) = default; |
||
| 179 | TokenBuffer(const TokenBuffer &) = delete; |
||
| 180 | TokenBuffer &operator=(TokenBuffer &&) = default; |
||
| 181 | TokenBuffer &operator=(const TokenBuffer &) = delete; |
||
| 182 | |||
| 183 | /// All tokens produced by the preprocessor after all macro replacements, |
||
| 184 | /// directives, etc. Source locations found in the clang AST will always |
||
| 185 | /// point to one of these tokens. |
||
| 186 | /// Tokens are in TU order (per SourceManager::isBeforeInTranslationUnit()). |
||
| 187 | /// FIXME: figure out how to handle token splitting, e.g. '>>' can be split |
||
| 188 | /// into two '>' tokens by the parser. However, TokenBuffer currently |
||
| 189 | /// keeps it as a single '>>' token. |
||
| 190 | llvm::ArrayRef<syntax::Token> expandedTokens() const { |
||
| 191 | return ExpandedTokens; |
||
| 192 | } |
||
| 193 | |||
| 194 | /// Builds a cache to make future calls to expandedToken(SourceRange) faster. |
||
| 195 | /// Creates an index only once. Further calls to it will be no-op. |
||
| 196 | void indexExpandedTokens(); |
||
| 197 | |||
| 198 | /// Returns the subrange of expandedTokens() corresponding to the closed |
||
| 199 | /// token range R. |
||
| 200 | /// Consider calling indexExpandedTokens() before for faster lookups. |
||
| 201 | llvm::ArrayRef<syntax::Token> expandedTokens(SourceRange R) const; |
||
| 202 | |||
| 203 | /// Returns the subrange of spelled tokens corresponding to AST node spanning |
||
| 204 | /// \p Expanded. This is the text that should be replaced if a refactoring |
||
| 205 | /// were to rewrite the node. If \p Expanded is empty, the returned value is |
||
| 206 | /// std::nullopt. |
||
| 207 | /// |
||
| 208 | /// Will fail if the expanded tokens do not correspond to a sequence of |
||
| 209 | /// spelled tokens. E.g. for the following example: |
||
| 210 | /// |
||
| 211 | /// #define FIRST f1 f2 f3 |
||
| 212 | /// #define SECOND s1 s2 s3 |
||
| 213 | /// #define ID2(X, Y) X Y |
||
| 214 | /// |
||
| 215 | /// a FIRST b SECOND c // expanded tokens are: a f1 f2 f3 b s1 s2 s3 c |
||
| 216 | /// d ID2(e f g, h) i // expanded tokens are: d e f g h i |
||
| 217 | /// |
||
| 218 | /// the results would be: |
||
| 219 | /// expanded => spelled |
||
| 220 | /// ------------------------ |
||
| 221 | /// a => a |
||
| 222 | /// s1 s2 s3 => SECOND |
||
| 223 | /// a f1 f2 f3 => a FIRST |
||
| 224 | /// a f1 => can't map |
||
| 225 | /// s1 s2 => can't map |
||
| 226 | /// e f => e f |
||
| 227 | /// g h => can't map |
||
| 228 | /// |
||
| 229 | /// EXPECTS: \p Expanded is a subrange of expandedTokens(). |
||
| 230 | /// Complexity is logarithmic. |
||
| 231 | std::optional<llvm::ArrayRef<syntax::Token>> |
||
| 232 | spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const; |
||
| 233 | |||
| 234 | /// Find the subranges of expanded tokens, corresponding to \p Spelled. |
||
| 235 | /// |
||
| 236 | /// Some spelled tokens may not be present in the expanded token stream, so |
||
| 237 | /// this function can return an empty vector, e.g. for tokens of macro |
||
| 238 | /// directives or disabled preprocessor branches. |
||
| 239 | /// |
||
| 240 | /// Some spelled tokens can be duplicated in the expanded token stream |
||
| 241 | /// multiple times and this function will return multiple results in those |
||
| 242 | /// cases. This happens when \p Spelled is inside a macro argument. |
||
| 243 | /// |
||
| 244 | /// FIXME: return correct results on macro arguments. For now, we return an |
||
| 245 | /// empty list. |
||
| 246 | /// |
||
| 247 | /// (!) will return empty vector on tokens from #define body: |
||
| 248 | /// E.g. for the following example: |
||
| 249 | /// |
||
| 250 | /// #define FIRST(A) f1 A = A f2 |
||
| 251 | /// #define SECOND s |
||
| 252 | /// |
||
| 253 | /// a FIRST(arg) b SECOND c // expanded tokens are: a f1 arg = arg f2 b s |
||
| 254 | /// The results would be |
||
| 255 | /// spelled => expanded |
||
| 256 | /// ------------------------ |
||
| 257 | /// #define FIRST => {} |
||
| 258 | /// a FIRST(arg) => {a f1 arg = arg f2} |
||
| 259 | /// arg => {arg, arg} // arg #1 is before `=` and arg #2 is |
||
| 260 | /// // after `=` in the expanded tokens. |
||
| 261 | llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1> |
||
| 262 | expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const; |
||
| 263 | |||
| 264 | /// An expansion produced by the preprocessor, includes macro expansions and |
||
| 265 | /// preprocessor directives. Preprocessor always maps a non-empty range of |
||
| 266 | /// spelled tokens to a (possibly empty) range of expanded tokens. Here is a |
||
| 267 | /// few examples of expansions: |
||
| 268 | /// #pragma once // Expands to an empty range. |
||
| 269 | /// #define FOO 1 2 3 // Expands an empty range. |
||
| 270 | /// FOO // Expands to "1 2 3". |
||
| 271 | /// FIXME(ibiryukov): implement this, currently #include expansions are empty. |
||
| 272 | /// #include <vector> // Expands to tokens produced by the include. |
||
| 273 | struct Expansion { |
||
| 274 | llvm::ArrayRef<syntax::Token> Spelled; |
||
| 275 | llvm::ArrayRef<syntax::Token> Expanded; |
||
| 276 | }; |
||
| 277 | /// If \p Spelled starts a mapping (e.g. if it's a macro name or '#' starting |
||
| 278 | /// a preprocessor directive) return the subrange of expanded tokens that the |
||
| 279 | /// macro expands to. |
||
| 280 | std::optional<Expansion> |
||
| 281 | expansionStartingAt(const syntax::Token *Spelled) const; |
||
| 282 | /// Returns all expansions (partially) expanded from the specified tokens. |
||
| 283 | /// This is the expansions whose Spelled range intersects \p Spelled. |
||
| 284 | std::vector<Expansion> |
||
| 285 | expansionsOverlapping(llvm::ArrayRef<syntax::Token> Spelled) const; |
||
| 286 | |||
| 287 | /// Lexed tokens of a file before preprocessing. E.g. for the following input |
||
| 288 | /// #define DECL(name) int name = 10 |
||
| 289 | /// DECL(a); |
||
| 290 | /// spelledTokens() returns |
||
| 291 | /// {"#", "define", "DECL", "(", "name", ")", "int", "name", "=", "10", |
||
| 292 | /// "DECL", "(", "a", ")", ";"} |
||
| 293 | llvm::ArrayRef<syntax::Token> spelledTokens(FileID FID) const; |
||
| 294 | |||
| 295 | /// Returns the spelled Token starting at Loc, if there are no such tokens |
||
| 296 | /// returns nullptr. |
||
| 297 | const syntax::Token *spelledTokenAt(SourceLocation Loc) const; |
||
| 298 | |||
| 299 | /// Get all tokens that expand a macro in \p FID. For the following input |
||
| 300 | /// #define FOO B |
||
| 301 | /// #define FOO2(X) int X |
||
| 302 | /// FOO2(XY) |
||
| 303 | /// int B; |
||
| 304 | /// FOO; |
||
| 305 | /// macroExpansions() returns {"FOO2", "FOO"} (from line 3 and 5 |
||
| 306 | /// respecitvely). |
||
| 307 | std::vector<const syntax::Token *> macroExpansions(FileID FID) const; |
||
| 308 | |||
| 309 | const SourceManager &sourceManager() const { return *SourceMgr; } |
||
| 310 | |||
| 311 | std::string dumpForTests() const; |
||
| 312 | |||
| 313 | private: |
||
| 314 | /// Describes a mapping between a continuous subrange of spelled tokens and |
||
| 315 | /// expanded tokens. Represents macro expansions, preprocessor directives, |
||
| 316 | /// conditionally disabled pp regions, etc. |
||
| 317 | /// #define FOO 1+2 |
||
| 318 | /// #define BAR(a) a + 1 |
||
| 319 | /// FOO // invocation #1, tokens = {'1','+','2'}, macroTokens = {'FOO'}. |
||
| 320 | /// BAR(1) // invocation #2, tokens = {'a', '+', '1'}, |
||
| 321 | /// macroTokens = {'BAR', '(', '1', ')'}. |
||
| 322 | struct Mapping { |
||
| 323 | // Positions in the corresponding spelled token stream. The corresponding |
||
| 324 | // range is never empty. |
||
| 325 | unsigned BeginSpelled = 0; |
||
| 326 | unsigned EndSpelled = 0; |
||
| 327 | // Positions in the expanded token stream. The corresponding range can be |
||
| 328 | // empty. |
||
| 329 | unsigned BeginExpanded = 0; |
||
| 330 | unsigned EndExpanded = 0; |
||
| 331 | |||
| 332 | /// For debugging purposes. |
||
| 333 | std::string str() const; |
||
| 334 | }; |
||
| 335 | /// Spelled tokens of the file with information about the subranges. |
||
| 336 | struct MarkedFile { |
||
| 337 | /// Lexed, but not preprocessed, tokens of the file. These map directly to |
||
| 338 | /// text in the corresponding files and include tokens of all preprocessor |
||
| 339 | /// directives. |
||
| 340 | /// FIXME: spelled tokens don't change across FileID that map to the same |
||
| 341 | /// FileEntry. We could consider deduplicating them to save memory. |
||
| 342 | std::vector<syntax::Token> SpelledTokens; |
||
| 343 | /// A sorted list to convert between the spelled and expanded token streams. |
||
| 344 | std::vector<Mapping> Mappings; |
||
| 345 | /// The first expanded token produced for this FileID. |
||
| 346 | unsigned BeginExpanded = 0; |
||
| 347 | unsigned EndExpanded = 0; |
||
| 348 | }; |
||
| 349 | |||
| 350 | friend class TokenCollector; |
||
| 351 | |||
| 352 | /// Maps a single expanded token to its spelled counterpart or a mapping that |
||
| 353 | /// produced it. |
||
| 354 | std::pair<const syntax::Token *, const Mapping *> |
||
| 355 | spelledForExpandedToken(const syntax::Token *Expanded) const; |
||
| 356 | |||
| 357 | /// Returns a mapping starting before \p Spelled token, or nullptr if no |
||
| 358 | /// such mapping exists. |
||
| 359 | static const Mapping * |
||
| 360 | mappingStartingBeforeSpelled(const MarkedFile &F, |
||
| 361 | const syntax::Token *Spelled); |
||
| 362 | |||
| 363 | /// Convert a private Mapping to a public Expansion. |
||
| 364 | Expansion makeExpansion(const MarkedFile &, const Mapping &) const; |
||
| 365 | /// Returns the file that the Spelled tokens are taken from. |
||
| 366 | /// Asserts that they are non-empty, from a tracked file, and in-bounds. |
||
| 367 | const MarkedFile &fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const; |
||
| 368 | |||
| 369 | /// Token stream produced after preprocessing, conceputally this captures the |
||
| 370 | /// same stream as 'clang -E' (excluding the preprocessor directives like |
||
| 371 | /// #file, etc.). |
||
| 372 | std::vector<syntax::Token> ExpandedTokens; |
||
| 373 | // Index of ExpandedTokens for faster lookups by SourceLocation. |
||
| 374 | llvm::DenseMap<SourceLocation, unsigned> ExpandedTokIndex; |
||
| 375 | llvm::DenseMap<FileID, MarkedFile> Files; |
||
| 376 | // The value is never null, pointer instead of reference to avoid disabling |
||
| 377 | // implicit assignment operator. |
||
| 378 | const SourceManager *SourceMgr; |
||
| 379 | }; |
||
| 380 | |||
| 381 | /// The spelled tokens that overlap or touch a spelling location Loc. |
||
| 382 | /// This always returns 0-2 tokens. |
||
| 383 | llvm::ArrayRef<syntax::Token> |
||
| 384 | spelledTokensTouching(SourceLocation Loc, const syntax::TokenBuffer &Tokens); |
||
| 385 | llvm::ArrayRef<syntax::Token> |
||
| 386 | spelledTokensTouching(SourceLocation Loc, llvm::ArrayRef<syntax::Token> Tokens); |
||
| 387 | |||
| 388 | /// The identifier token that overlaps or touches a spelling location Loc. |
||
| 389 | /// If there is none, returns nullptr. |
||
| 390 | const syntax::Token * |
||
| 391 | spelledIdentifierTouching(SourceLocation Loc, |
||
| 392 | llvm::ArrayRef<syntax::Token> Tokens); |
||
| 393 | const syntax::Token * |
||
| 394 | spelledIdentifierTouching(SourceLocation Loc, |
||
| 395 | const syntax::TokenBuffer &Tokens); |
||
| 396 | |||
| 397 | /// Lex the text buffer, corresponding to \p FID, in raw mode and record the |
||
| 398 | /// resulting spelled tokens. Does minimal post-processing on raw identifiers, |
||
| 399 | /// setting the appropriate token kind (instead of the raw_identifier reported |
||
| 400 | /// by lexer in raw mode). This is a very low-level function, most users should |
||
| 401 | /// prefer to use TokenCollector. Lexing in raw mode produces wildly different |
||
| 402 | /// results from what one might expect when running a C++ frontend, e.g. |
||
| 403 | /// preprocessor does not run at all. |
||
| 404 | /// The result will *not* have a 'eof' token at the end. |
||
| 405 | std::vector<syntax::Token> tokenize(FileID FID, const SourceManager &SM, |
||
| 406 | const LangOptions &LO); |
||
| 407 | /// Similar to one above, instead of whole file tokenizes a part of it. Note |
||
| 408 | /// that, the first token might be incomplete if FR.startOffset is not at the |
||
| 409 | /// beginning of a token, and the last token returned will start before the |
||
| 410 | /// FR.endOffset but might end after it. |
||
| 411 | std::vector<syntax::Token> |
||
| 412 | tokenize(const FileRange &FR, const SourceManager &SM, const LangOptions &LO); |
||
| 413 | |||
| 414 | /// Collects tokens for the main file while running the frontend action. An |
||
| 415 | /// instance of this object should be created on |
||
| 416 | /// FrontendAction::BeginSourceFile() and the results should be consumed after |
||
| 417 | /// FrontendAction::Execute() finishes. |
||
| 418 | class TokenCollector { |
||
| 419 | public: |
||
| 420 | /// Adds the hooks to collect the tokens. Should be called before the |
||
| 421 | /// preprocessing starts, i.e. as a part of BeginSourceFile() or |
||
| 422 | /// CreateASTConsumer(). |
||
| 423 | TokenCollector(Preprocessor &P); |
||
| 424 | |||
| 425 | /// Finalizes token collection. Should be called after preprocessing is |
||
| 426 | /// finished, i.e. after running Execute(). |
||
| 427 | [[nodiscard]] TokenBuffer consume() &&; |
||
| 428 | |||
| 429 | private: |
||
| 430 | /// Maps from a start to an end spelling location of transformations |
||
| 431 | /// performed by the preprocessor. These include: |
||
| 432 | /// 1. range from '#' to the last token in the line for PP directives, |
||
| 433 | /// 2. macro name and arguments for macro expansions. |
||
| 434 | /// Note that we record only top-level macro expansions, intermediate |
||
| 435 | /// expansions (e.g. inside macro arguments) are ignored. |
||
| 436 | /// |
||
| 437 | /// Used to find correct boundaries of macro calls and directives when |
||
| 438 | /// building mappings from spelled to expanded tokens. |
||
| 439 | /// |
||
| 440 | /// Logically, at each point of the preprocessor execution there is a stack of |
||
| 441 | /// macro expansions being processed and we could use it to recover the |
||
| 442 | /// location information we need. However, the public preprocessor API only |
||
| 443 | /// exposes the points when macro expansions start (when we push a macro onto |
||
| 444 | /// the stack) and not when they end (when we pop a macro from the stack). |
||
| 445 | /// To workaround this limitation, we rely on source location information |
||
| 446 | /// stored in this map. |
||
| 447 | using PPExpansions = llvm::DenseMap<SourceLocation, SourceLocation>; |
||
| 448 | class Builder; |
||
| 449 | class CollectPPExpansions; |
||
| 450 | |||
| 451 | std::vector<syntax::Token> Expanded; |
||
| 452 | // FIXME: we only store macro expansions, also add directives(#pragma, etc.) |
||
| 453 | PPExpansions Expansions; |
||
| 454 | Preprocessor &PP; |
||
| 455 | CollectPPExpansions *Collector; |
||
| 456 | }; |
||
| 457 | |||
| 458 | } // namespace syntax |
||
| 459 | } // namespace clang |
||
| 460 | |||
| 461 | #endif |