Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | //===--- LiteralSupport.h ---------------------------------------*- C++ -*-===// |
| 2 | // |
||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 6 | // |
||
| 7 | //===----------------------------------------------------------------------===// |
||
| 8 | // |
||
| 9 | // This file defines the NumericLiteralParser, CharLiteralParser, and |
||
| 10 | // StringLiteralParser interfaces. |
||
| 11 | // |
||
| 12 | //===----------------------------------------------------------------------===// |
||
| 13 | |||
| 14 | #ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H |
||
| 15 | #define LLVM_CLANG_LEX_LITERALSUPPORT_H |
||
| 16 | |||
| 17 | #include "clang/Basic/CharInfo.h" |
||
| 18 | #include "clang/Basic/LLVM.h" |
||
| 19 | #include "clang/Basic/TokenKinds.h" |
||
| 20 | #include "llvm/ADT/APFloat.h" |
||
| 21 | #include "llvm/ADT/ArrayRef.h" |
||
| 22 | #include "llvm/ADT/SmallString.h" |
||
| 23 | #include "llvm/ADT/StringRef.h" |
||
| 24 | #include "llvm/Support/DataTypes.h" |
||
| 25 | |||
| 26 | namespace clang { |
||
| 27 | |||
| 28 | class DiagnosticsEngine; |
||
| 29 | class Preprocessor; |
||
| 30 | class Token; |
||
| 31 | class SourceLocation; |
||
| 32 | class TargetInfo; |
||
| 33 | class SourceManager; |
||
| 34 | class LangOptions; |
||
| 35 | |||
| 36 | /// Copy characters from Input to Buf, expanding any UCNs. |
||
| 37 | void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input); |
||
| 38 | |||
| 39 | /// NumericLiteralParser - This performs strict semantic analysis of the content |
||
| 40 | /// of a ppnumber, classifying it as either integer, floating, or erroneous, |
||
| 41 | /// determines the radix of the value and can convert it to a useful value. |
||
| 42 | class NumericLiteralParser { |
||
| 43 | const SourceManager &SM; |
||
| 44 | const LangOptions &LangOpts; |
||
| 45 | DiagnosticsEngine &Diags; |
||
| 46 | |||
| 47 | const char *const ThisTokBegin; |
||
| 48 | const char *const ThisTokEnd; |
||
| 49 | const char *DigitsBegin, *SuffixBegin; // markers |
||
| 50 | const char *s; // cursor |
||
| 51 | |||
| 52 | unsigned radix; |
||
| 53 | |||
| 54 | bool saw_exponent, saw_period, saw_ud_suffix, saw_fixed_point_suffix; |
||
| 55 | |||
| 56 | SmallString<32> UDSuffixBuf; |
||
| 57 | |||
| 58 | public: |
||
| 59 | NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc, |
||
| 60 | const SourceManager &SM, const LangOptions &LangOpts, |
||
| 61 | const TargetInfo &Target, DiagnosticsEngine &Diags); |
||
| 62 | bool hadError : 1; |
||
| 63 | bool isUnsigned : 1; |
||
| 64 | bool isLong : 1; // This is *not* set for long long. |
||
| 65 | bool isLongLong : 1; |
||
| 66 | bool isSizeT : 1; // 1z, 1uz (C++2b) |
||
| 67 | bool isHalf : 1; // 1.0h |
||
| 68 | bool isFloat : 1; // 1.0f |
||
| 69 | bool isImaginary : 1; // 1.0i |
||
| 70 | bool isFloat16 : 1; // 1.0f16 |
||
| 71 | bool isFloat128 : 1; // 1.0q |
||
| 72 | bool isFract : 1; // 1.0hr/r/lr/uhr/ur/ulr |
||
| 73 | bool isAccum : 1; // 1.0hk/k/lk/uhk/uk/ulk |
||
| 74 | bool isBitInt : 1; // 1wb, 1uwb (C2x) |
||
| 75 | uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64. |
||
| 76 | |||
| 77 | |||
| 78 | bool isFixedPointLiteral() const { |
||
| 79 | return (saw_period || saw_exponent) && saw_fixed_point_suffix; |
||
| 80 | } |
||
| 81 | |||
| 82 | bool isIntegerLiteral() const { |
||
| 83 | return !saw_period && !saw_exponent && !isFixedPointLiteral(); |
||
| 84 | } |
||
| 85 | bool isFloatingLiteral() const { |
||
| 86 | return (saw_period || saw_exponent) && !isFixedPointLiteral(); |
||
| 87 | } |
||
| 88 | |||
| 89 | bool hasUDSuffix() const { |
||
| 90 | return saw_ud_suffix; |
||
| 91 | } |
||
| 92 | StringRef getUDSuffix() const { |
||
| 93 | assert(saw_ud_suffix); |
||
| 94 | return UDSuffixBuf; |
||
| 95 | } |
||
| 96 | unsigned getUDSuffixOffset() const { |
||
| 97 | assert(saw_ud_suffix); |
||
| 98 | return SuffixBegin - ThisTokBegin; |
||
| 99 | } |
||
| 100 | |||
| 101 | static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); |
||
| 102 | |||
| 103 | unsigned getRadix() const { return radix; } |
||
| 104 | |||
| 105 | /// GetIntegerValue - Convert this numeric literal value to an APInt that |
||
| 106 | /// matches Val's input width. If there is an overflow (i.e., if the unsigned |
||
| 107 | /// value read is larger than the APInt's bits will hold), set Val to the low |
||
| 108 | /// bits of the result and return true. Otherwise, return false. |
||
| 109 | bool GetIntegerValue(llvm::APInt &Val); |
||
| 110 | |||
| 111 | /// GetFloatValue - Convert this numeric literal to a floating value, using |
||
| 112 | /// the specified APFloat fltSemantics (specifying float, double, etc). |
||
| 113 | /// The optional bool isExact (passed-by-reference) has its value |
||
| 114 | /// set to true if the returned APFloat can represent the number in the |
||
| 115 | /// literal exactly, and false otherwise. |
||
| 116 | llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result); |
||
| 117 | |||
| 118 | /// GetFixedPointValue - Convert this numeric literal value into a |
||
| 119 | /// scaled integer that represents this value. Returns true if an overflow |
||
| 120 | /// occurred when calculating the integral part of the scaled integer or |
||
| 121 | /// calculating the digit sequence of the exponent. |
||
| 122 | bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale); |
||
| 123 | |||
| 124 | /// Get the digits that comprise the literal. This excludes any prefix or |
||
| 125 | /// suffix associated with the literal. |
||
| 126 | StringRef getLiteralDigits() const { |
||
| 127 | assert(!hadError && "cannot reliably get the literal digits with an error"); |
||
| 128 | return StringRef(DigitsBegin, SuffixBegin - DigitsBegin); |
||
| 129 | } |
||
| 130 | |||
| 131 | private: |
||
| 132 | |||
| 133 | void ParseNumberStartingWithZero(SourceLocation TokLoc); |
||
| 134 | void ParseDecimalOrOctalCommon(SourceLocation TokLoc); |
||
| 135 | |||
| 136 | static bool isDigitSeparator(char C) { return C == '\''; } |
||
| 137 | |||
| 138 | /// Determine whether the sequence of characters [Start, End) contains |
||
| 139 | /// any real digits (not digit separators). |
||
| 140 | bool containsDigits(const char *Start, const char *End) { |
||
| 141 | return Start != End && (Start + 1 != End || !isDigitSeparator(Start[0])); |
||
| 142 | } |
||
| 143 | |||
| 144 | enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits }; |
||
| 145 | |||
| 146 | /// Ensure that we don't have a digit separator here. |
||
| 147 | void checkSeparator(SourceLocation TokLoc, const char *Pos, |
||
| 148 | CheckSeparatorKind IsAfterDigits); |
||
| 149 | |||
| 150 | /// SkipHexDigits - Read and skip over any hex digits, up to End. |
||
| 151 | /// Return a pointer to the first non-hex digit or End. |
||
| 152 | const char *SkipHexDigits(const char *ptr) { |
||
| 153 | while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr))) |
||
| 154 | ptr++; |
||
| 155 | return ptr; |
||
| 156 | } |
||
| 157 | |||
| 158 | /// SkipOctalDigits - Read and skip over any octal digits, up to End. |
||
| 159 | /// Return a pointer to the first non-hex digit or End. |
||
| 160 | const char *SkipOctalDigits(const char *ptr) { |
||
| 161 | while (ptr != ThisTokEnd && |
||
| 162 | ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr))) |
||
| 163 | ptr++; |
||
| 164 | return ptr; |
||
| 165 | } |
||
| 166 | |||
| 167 | /// SkipDigits - Read and skip over any digits, up to End. |
||
| 168 | /// Return a pointer to the first non-hex digit or End. |
||
| 169 | const char *SkipDigits(const char *ptr) { |
||
| 170 | while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr))) |
||
| 171 | ptr++; |
||
| 172 | return ptr; |
||
| 173 | } |
||
| 174 | |||
| 175 | /// SkipBinaryDigits - Read and skip over any binary digits, up to End. |
||
| 176 | /// Return a pointer to the first non-binary digit or End. |
||
| 177 | const char *SkipBinaryDigits(const char *ptr) { |
||
| 178 | while (ptr != ThisTokEnd && |
||
| 179 | (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr))) |
||
| 180 | ptr++; |
||
| 181 | return ptr; |
||
| 182 | } |
||
| 183 | |||
| 184 | }; |
||
| 185 | |||
| 186 | /// CharLiteralParser - Perform interpretation and semantic analysis of a |
||
| 187 | /// character literal. |
||
| 188 | class CharLiteralParser { |
||
| 189 | uint64_t Value; |
||
| 190 | tok::TokenKind Kind; |
||
| 191 | bool IsMultiChar; |
||
| 192 | bool HadError; |
||
| 193 | SmallString<32> UDSuffixBuf; |
||
| 194 | unsigned UDSuffixOffset; |
||
| 195 | public: |
||
| 196 | CharLiteralParser(const char *begin, const char *end, |
||
| 197 | SourceLocation Loc, Preprocessor &PP, |
||
| 198 | tok::TokenKind kind); |
||
| 199 | |||
| 200 | bool hadError() const { return HadError; } |
||
| 201 | bool isOrdinary() const { return Kind == tok::char_constant; } |
||
| 202 | bool isWide() const { return Kind == tok::wide_char_constant; } |
||
| 203 | bool isUTF8() const { return Kind == tok::utf8_char_constant; } |
||
| 204 | bool isUTF16() const { return Kind == tok::utf16_char_constant; } |
||
| 205 | bool isUTF32() const { return Kind == tok::utf32_char_constant; } |
||
| 206 | bool isMultiChar() const { return IsMultiChar; } |
||
| 207 | uint64_t getValue() const { return Value; } |
||
| 208 | StringRef getUDSuffix() const { return UDSuffixBuf; } |
||
| 209 | unsigned getUDSuffixOffset() const { |
||
| 210 | assert(!UDSuffixBuf.empty() && "no ud-suffix"); |
||
| 211 | return UDSuffixOffset; |
||
| 212 | } |
||
| 213 | }; |
||
| 214 | |||
| 215 | /// StringLiteralParser - This decodes string escape characters and performs |
||
| 216 | /// wide string analysis and Translation Phase #6 (concatenation of string |
||
| 217 | /// literals) (C99 5.1.1.2p1). |
||
| 218 | class StringLiteralParser { |
||
| 219 | const SourceManager &SM; |
||
| 220 | const LangOptions &Features; |
||
| 221 | const TargetInfo &Target; |
||
| 222 | DiagnosticsEngine *Diags; |
||
| 223 | |||
| 224 | unsigned MaxTokenLength; |
||
| 225 | unsigned SizeBound; |
||
| 226 | unsigned CharByteWidth; |
||
| 227 | tok::TokenKind Kind; |
||
| 228 | SmallString<512> ResultBuf; |
||
| 229 | char *ResultPtr; // cursor |
||
| 230 | SmallString<32> UDSuffixBuf; |
||
| 231 | unsigned UDSuffixToken; |
||
| 232 | unsigned UDSuffixOffset; |
||
| 233 | public: |
||
| 234 | StringLiteralParser(ArrayRef<Token> StringToks, |
||
| 235 | Preprocessor &PP); |
||
| 236 | StringLiteralParser(ArrayRef<Token> StringToks, |
||
| 237 | const SourceManager &sm, const LangOptions &features, |
||
| 238 | const TargetInfo &target, |
||
| 239 | DiagnosticsEngine *diags = nullptr) |
||
| 240 | : SM(sm), Features(features), Target(target), Diags(diags), |
||
| 241 | MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), |
||
| 242 | ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { |
||
| 243 | init(StringToks); |
||
| 244 | } |
||
| 245 | |||
| 246 | |||
| 247 | bool hadError; |
||
| 248 | bool Pascal; |
||
| 249 | |||
| 250 | StringRef GetString() const { |
||
| 251 | return StringRef(ResultBuf.data(), GetStringLength()); |
||
| 252 | } |
||
| 253 | unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); } |
||
| 254 | |||
| 255 | unsigned GetNumStringChars() const { |
||
| 256 | return GetStringLength() / CharByteWidth; |
||
| 257 | } |
||
| 258 | /// getOffsetOfStringByte - This function returns the offset of the |
||
| 259 | /// specified byte of the string data represented by Token. This handles |
||
| 260 | /// advancing over escape sequences in the string. |
||
| 261 | /// |
||
| 262 | /// If the Diagnostics pointer is non-null, then this will do semantic |
||
| 263 | /// checking of the string literal and emit errors and warnings. |
||
| 264 | unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const; |
||
| 265 | |||
| 266 | bool isOrdinary() const { return Kind == tok::string_literal; } |
||
| 267 | bool isWide() const { return Kind == tok::wide_string_literal; } |
||
| 268 | bool isUTF8() const { return Kind == tok::utf8_string_literal; } |
||
| 269 | bool isUTF16() const { return Kind == tok::utf16_string_literal; } |
||
| 270 | bool isUTF32() const { return Kind == tok::utf32_string_literal; } |
||
| 271 | bool isPascal() const { return Pascal; } |
||
| 272 | |||
| 273 | StringRef getUDSuffix() const { return UDSuffixBuf; } |
||
| 274 | |||
| 275 | /// Get the index of a token containing a ud-suffix. |
||
| 276 | unsigned getUDSuffixToken() const { |
||
| 277 | assert(!UDSuffixBuf.empty() && "no ud-suffix"); |
||
| 278 | return UDSuffixToken; |
||
| 279 | } |
||
| 280 | /// Get the spelling offset of the first byte of the ud-suffix. |
||
| 281 | unsigned getUDSuffixOffset() const { |
||
| 282 | assert(!UDSuffixBuf.empty() && "no ud-suffix"); |
||
| 283 | return UDSuffixOffset; |
||
| 284 | } |
||
| 285 | |||
| 286 | static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); |
||
| 287 | |||
| 288 | private: |
||
| 289 | void init(ArrayRef<Token> StringToks); |
||
| 290 | bool CopyStringFragment(const Token &Tok, const char *TokBegin, |
||
| 291 | StringRef Fragment); |
||
| 292 | void DiagnoseLexingError(SourceLocation Loc); |
||
| 293 | }; |
||
| 294 | |||
| 295 | } // end namespace clang |
||
| 296 | |||
| 297 | #endif |