Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===--- LiteralSupport.h ---------------------------------------*- C++ -*-===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // |
||
9 | // This file defines the NumericLiteralParser, CharLiteralParser, and |
||
10 | // StringLiteralParser interfaces. |
||
11 | // |
||
12 | //===----------------------------------------------------------------------===// |
||
13 | |||
14 | #ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H |
||
15 | #define LLVM_CLANG_LEX_LITERALSUPPORT_H |
||
16 | |||
17 | #include "clang/Basic/CharInfo.h" |
||
18 | #include "clang/Basic/LLVM.h" |
||
19 | #include "clang/Basic/TokenKinds.h" |
||
20 | #include "llvm/ADT/APFloat.h" |
||
21 | #include "llvm/ADT/ArrayRef.h" |
||
22 | #include "llvm/ADT/SmallString.h" |
||
23 | #include "llvm/ADT/StringRef.h" |
||
24 | #include "llvm/Support/DataTypes.h" |
||
25 | |||
26 | namespace clang { |
||
27 | |||
28 | class DiagnosticsEngine; |
||
29 | class Preprocessor; |
||
30 | class Token; |
||
31 | class SourceLocation; |
||
32 | class TargetInfo; |
||
33 | class SourceManager; |
||
34 | class LangOptions; |
||
35 | |||
36 | /// Copy characters from Input to Buf, expanding any UCNs. |
||
37 | void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input); |
||
38 | |||
39 | /// NumericLiteralParser - This performs strict semantic analysis of the content |
||
40 | /// of a ppnumber, classifying it as either integer, floating, or erroneous, |
||
41 | /// determines the radix of the value and can convert it to a useful value. |
||
42 | class NumericLiteralParser { |
||
43 | const SourceManager &SM; |
||
44 | const LangOptions &LangOpts; |
||
45 | DiagnosticsEngine &Diags; |
||
46 | |||
47 | const char *const ThisTokBegin; |
||
48 | const char *const ThisTokEnd; |
||
49 | const char *DigitsBegin, *SuffixBegin; // markers |
||
50 | const char *s; // cursor |
||
51 | |||
52 | unsigned radix; |
||
53 | |||
54 | bool saw_exponent, saw_period, saw_ud_suffix, saw_fixed_point_suffix; |
||
55 | |||
56 | SmallString<32> UDSuffixBuf; |
||
57 | |||
58 | public: |
||
59 | NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc, |
||
60 | const SourceManager &SM, const LangOptions &LangOpts, |
||
61 | const TargetInfo &Target, DiagnosticsEngine &Diags); |
||
62 | bool hadError : 1; |
||
63 | bool isUnsigned : 1; |
||
64 | bool isLong : 1; // This is *not* set for long long. |
||
65 | bool isLongLong : 1; |
||
66 | bool isSizeT : 1; // 1z, 1uz (C++2b) |
||
67 | bool isHalf : 1; // 1.0h |
||
68 | bool isFloat : 1; // 1.0f |
||
69 | bool isImaginary : 1; // 1.0i |
||
70 | bool isFloat16 : 1; // 1.0f16 |
||
71 | bool isFloat128 : 1; // 1.0q |
||
72 | bool isFract : 1; // 1.0hr/r/lr/uhr/ur/ulr |
||
73 | bool isAccum : 1; // 1.0hk/k/lk/uhk/uk/ulk |
||
74 | bool isBitInt : 1; // 1wb, 1uwb (C2x) |
||
75 | uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64. |
||
76 | |||
77 | |||
78 | bool isFixedPointLiteral() const { |
||
79 | return (saw_period || saw_exponent) && saw_fixed_point_suffix; |
||
80 | } |
||
81 | |||
82 | bool isIntegerLiteral() const { |
||
83 | return !saw_period && !saw_exponent && !isFixedPointLiteral(); |
||
84 | } |
||
85 | bool isFloatingLiteral() const { |
||
86 | return (saw_period || saw_exponent) && !isFixedPointLiteral(); |
||
87 | } |
||
88 | |||
89 | bool hasUDSuffix() const { |
||
90 | return saw_ud_suffix; |
||
91 | } |
||
92 | StringRef getUDSuffix() const { |
||
93 | assert(saw_ud_suffix); |
||
94 | return UDSuffixBuf; |
||
95 | } |
||
96 | unsigned getUDSuffixOffset() const { |
||
97 | assert(saw_ud_suffix); |
||
98 | return SuffixBegin - ThisTokBegin; |
||
99 | } |
||
100 | |||
101 | static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); |
||
102 | |||
103 | unsigned getRadix() const { return radix; } |
||
104 | |||
105 | /// GetIntegerValue - Convert this numeric literal value to an APInt that |
||
106 | /// matches Val's input width. If there is an overflow (i.e., if the unsigned |
||
107 | /// value read is larger than the APInt's bits will hold), set Val to the low |
||
108 | /// bits of the result and return true. Otherwise, return false. |
||
109 | bool GetIntegerValue(llvm::APInt &Val); |
||
110 | |||
111 | /// GetFloatValue - Convert this numeric literal to a floating value, using |
||
112 | /// the specified APFloat fltSemantics (specifying float, double, etc). |
||
113 | /// The optional bool isExact (passed-by-reference) has its value |
||
114 | /// set to true if the returned APFloat can represent the number in the |
||
115 | /// literal exactly, and false otherwise. |
||
116 | llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result); |
||
117 | |||
118 | /// GetFixedPointValue - Convert this numeric literal value into a |
||
119 | /// scaled integer that represents this value. Returns true if an overflow |
||
120 | /// occurred when calculating the integral part of the scaled integer or |
||
121 | /// calculating the digit sequence of the exponent. |
||
122 | bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale); |
||
123 | |||
124 | /// Get the digits that comprise the literal. This excludes any prefix or |
||
125 | /// suffix associated with the literal. |
||
126 | StringRef getLiteralDigits() const { |
||
127 | assert(!hadError && "cannot reliably get the literal digits with an error"); |
||
128 | return StringRef(DigitsBegin, SuffixBegin - DigitsBegin); |
||
129 | } |
||
130 | |||
131 | private: |
||
132 | |||
133 | void ParseNumberStartingWithZero(SourceLocation TokLoc); |
||
134 | void ParseDecimalOrOctalCommon(SourceLocation TokLoc); |
||
135 | |||
136 | static bool isDigitSeparator(char C) { return C == '\''; } |
||
137 | |||
138 | /// Determine whether the sequence of characters [Start, End) contains |
||
139 | /// any real digits (not digit separators). |
||
140 | bool containsDigits(const char *Start, const char *End) { |
||
141 | return Start != End && (Start + 1 != End || !isDigitSeparator(Start[0])); |
||
142 | } |
||
143 | |||
144 | enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits }; |
||
145 | |||
146 | /// Ensure that we don't have a digit separator here. |
||
147 | void checkSeparator(SourceLocation TokLoc, const char *Pos, |
||
148 | CheckSeparatorKind IsAfterDigits); |
||
149 | |||
150 | /// SkipHexDigits - Read and skip over any hex digits, up to End. |
||
151 | /// Return a pointer to the first non-hex digit or End. |
||
152 | const char *SkipHexDigits(const char *ptr) { |
||
153 | while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr))) |
||
154 | ptr++; |
||
155 | return ptr; |
||
156 | } |
||
157 | |||
158 | /// SkipOctalDigits - Read and skip over any octal digits, up to End. |
||
159 | /// Return a pointer to the first non-hex digit or End. |
||
160 | const char *SkipOctalDigits(const char *ptr) { |
||
161 | while (ptr != ThisTokEnd && |
||
162 | ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr))) |
||
163 | ptr++; |
||
164 | return ptr; |
||
165 | } |
||
166 | |||
167 | /// SkipDigits - Read and skip over any digits, up to End. |
||
168 | /// Return a pointer to the first non-hex digit or End. |
||
169 | const char *SkipDigits(const char *ptr) { |
||
170 | while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr))) |
||
171 | ptr++; |
||
172 | return ptr; |
||
173 | } |
||
174 | |||
175 | /// SkipBinaryDigits - Read and skip over any binary digits, up to End. |
||
176 | /// Return a pointer to the first non-binary digit or End. |
||
177 | const char *SkipBinaryDigits(const char *ptr) { |
||
178 | while (ptr != ThisTokEnd && |
||
179 | (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr))) |
||
180 | ptr++; |
||
181 | return ptr; |
||
182 | } |
||
183 | |||
184 | }; |
||
185 | |||
186 | /// CharLiteralParser - Perform interpretation and semantic analysis of a |
||
187 | /// character literal. |
||
188 | class CharLiteralParser { |
||
189 | uint64_t Value; |
||
190 | tok::TokenKind Kind; |
||
191 | bool IsMultiChar; |
||
192 | bool HadError; |
||
193 | SmallString<32> UDSuffixBuf; |
||
194 | unsigned UDSuffixOffset; |
||
195 | public: |
||
196 | CharLiteralParser(const char *begin, const char *end, |
||
197 | SourceLocation Loc, Preprocessor &PP, |
||
198 | tok::TokenKind kind); |
||
199 | |||
200 | bool hadError() const { return HadError; } |
||
201 | bool isOrdinary() const { return Kind == tok::char_constant; } |
||
202 | bool isWide() const { return Kind == tok::wide_char_constant; } |
||
203 | bool isUTF8() const { return Kind == tok::utf8_char_constant; } |
||
204 | bool isUTF16() const { return Kind == tok::utf16_char_constant; } |
||
205 | bool isUTF32() const { return Kind == tok::utf32_char_constant; } |
||
206 | bool isMultiChar() const { return IsMultiChar; } |
||
207 | uint64_t getValue() const { return Value; } |
||
208 | StringRef getUDSuffix() const { return UDSuffixBuf; } |
||
209 | unsigned getUDSuffixOffset() const { |
||
210 | assert(!UDSuffixBuf.empty() && "no ud-suffix"); |
||
211 | return UDSuffixOffset; |
||
212 | } |
||
213 | }; |
||
214 | |||
215 | /// StringLiteralParser - This decodes string escape characters and performs |
||
216 | /// wide string analysis and Translation Phase #6 (concatenation of string |
||
217 | /// literals) (C99 5.1.1.2p1). |
||
218 | class StringLiteralParser { |
||
219 | const SourceManager &SM; |
||
220 | const LangOptions &Features; |
||
221 | const TargetInfo &Target; |
||
222 | DiagnosticsEngine *Diags; |
||
223 | |||
224 | unsigned MaxTokenLength; |
||
225 | unsigned SizeBound; |
||
226 | unsigned CharByteWidth; |
||
227 | tok::TokenKind Kind; |
||
228 | SmallString<512> ResultBuf; |
||
229 | char *ResultPtr; // cursor |
||
230 | SmallString<32> UDSuffixBuf; |
||
231 | unsigned UDSuffixToken; |
||
232 | unsigned UDSuffixOffset; |
||
233 | public: |
||
234 | StringLiteralParser(ArrayRef<Token> StringToks, |
||
235 | Preprocessor &PP); |
||
236 | StringLiteralParser(ArrayRef<Token> StringToks, |
||
237 | const SourceManager &sm, const LangOptions &features, |
||
238 | const TargetInfo &target, |
||
239 | DiagnosticsEngine *diags = nullptr) |
||
240 | : SM(sm), Features(features), Target(target), Diags(diags), |
||
241 | MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), |
||
242 | ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { |
||
243 | init(StringToks); |
||
244 | } |
||
245 | |||
246 | |||
247 | bool hadError; |
||
248 | bool Pascal; |
||
249 | |||
250 | StringRef GetString() const { |
||
251 | return StringRef(ResultBuf.data(), GetStringLength()); |
||
252 | } |
||
253 | unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); } |
||
254 | |||
255 | unsigned GetNumStringChars() const { |
||
256 | return GetStringLength() / CharByteWidth; |
||
257 | } |
||
258 | /// getOffsetOfStringByte - This function returns the offset of the |
||
259 | /// specified byte of the string data represented by Token. This handles |
||
260 | /// advancing over escape sequences in the string. |
||
261 | /// |
||
262 | /// If the Diagnostics pointer is non-null, then this will do semantic |
||
263 | /// checking of the string literal and emit errors and warnings. |
||
264 | unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const; |
||
265 | |||
266 | bool isOrdinary() const { return Kind == tok::string_literal; } |
||
267 | bool isWide() const { return Kind == tok::wide_string_literal; } |
||
268 | bool isUTF8() const { return Kind == tok::utf8_string_literal; } |
||
269 | bool isUTF16() const { return Kind == tok::utf16_string_literal; } |
||
270 | bool isUTF32() const { return Kind == tok::utf32_string_literal; } |
||
271 | bool isPascal() const { return Pascal; } |
||
272 | |||
273 | StringRef getUDSuffix() const { return UDSuffixBuf; } |
||
274 | |||
275 | /// Get the index of a token containing a ud-suffix. |
||
276 | unsigned getUDSuffixToken() const { |
||
277 | assert(!UDSuffixBuf.empty() && "no ud-suffix"); |
||
278 | return UDSuffixToken; |
||
279 | } |
||
280 | /// Get the spelling offset of the first byte of the ud-suffix. |
||
281 | unsigned getUDSuffixOffset() const { |
||
282 | assert(!UDSuffixBuf.empty() && "no ud-suffix"); |
||
283 | return UDSuffixOffset; |
||
284 | } |
||
285 | |||
286 | static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); |
||
287 | |||
288 | private: |
||
289 | void init(ArrayRef<Token> StringToks); |
||
290 | bool CopyStringFragment(const Token &Tok, const char *TokBegin, |
||
291 | StringRef Fragment); |
||
292 | void DiagnoseLexingError(SourceLocation Loc); |
||
293 | }; |
||
294 | |||
295 | } // end namespace clang |
||
296 | |||
297 | #endif |