Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | //===--- clang/Basic/CharInfo.h - Classifying ASCII Characters --*- C++ -*-===// |
| 2 | // |
||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 6 | // |
||
| 7 | //===----------------------------------------------------------------------===// |
||
| 8 | |||
| 9 | #ifndef LLVM_CLANG_BASIC_CHARINFO_H |
||
| 10 | #define LLVM_CLANG_BASIC_CHARINFO_H |
||
| 11 | |||
| 12 | #include "clang/Basic/LLVM.h" |
||
| 13 | #include "llvm/ADT/StringRef.h" |
||
| 14 | #include "llvm/Support/Compiler.h" |
||
| 15 | #include "llvm/Support/DataTypes.h" |
||
| 16 | |||
| 17 | namespace clang { |
||
| 18 | namespace charinfo { |
||
| 19 | extern const uint16_t InfoTable[256]; |
||
| 20 | |||
| 21 | enum { |
||
| 22 | CHAR_HORZ_WS = 0x0001, // '\t', '\f', '\v'. Note, no '\0' |
||
| 23 | CHAR_VERT_WS = 0x0002, // '\r', '\n' |
||
| 24 | CHAR_SPACE = 0x0004, // ' ' |
||
| 25 | CHAR_DIGIT = 0x0008, // 0-9 |
||
| 26 | CHAR_XLETTER = 0x0010, // a-f,A-F |
||
| 27 | CHAR_UPPER = 0x0020, // A-Z |
||
| 28 | CHAR_LOWER = 0x0040, // a-z |
||
| 29 | CHAR_UNDER = 0x0080, // _ |
||
| 30 | CHAR_PERIOD = 0x0100, // . |
||
| 31 | CHAR_RAWDEL = 0x0200, // {}[]#<>%:;?*+-/^&|~!=,"' |
||
| 32 | CHAR_PUNCT = 0x0400 // `$@() |
||
| 33 | }; |
||
| 34 | |||
| 35 | enum { |
||
| 36 | CHAR_XUPPER = CHAR_XLETTER | CHAR_UPPER, |
||
| 37 | CHAR_XLOWER = CHAR_XLETTER | CHAR_LOWER |
||
| 38 | }; |
||
| 39 | } // end namespace charinfo |
||
| 40 | |||
| 41 | /// Returns true if a byte is an ASCII character. |
||
| 42 | LLVM_READNONE inline bool isASCII(char c) { |
||
| 43 | return static_cast<unsigned char>(c) <= 127; |
||
| 44 | } |
||
| 45 | |||
| 46 | LLVM_READNONE inline bool isASCII(unsigned char c) { return c <= 127; } |
||
| 47 | |||
| 48 | /// Returns true if a codepoint is an ASCII character. |
||
| 49 | LLVM_READNONE inline bool isASCII(uint32_t c) { return c <= 127; } |
||
| 50 | LLVM_READNONE inline bool isASCII(int64_t c) { return 0 <= c && c <= 127; } |
||
| 51 | |||
| 52 | /// Returns true if this is a valid first character of a C identifier, |
||
| 53 | /// which is [a-zA-Z_]. |
||
| 54 | LLVM_READONLY inline bool isAsciiIdentifierStart(unsigned char c, |
||
| 55 | bool AllowDollar = false) { |
||
| 56 | using namespace charinfo; |
||
| 57 | if (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_UNDER)) |
||
| 58 | return true; |
||
| 59 | return AllowDollar && c == '$'; |
||
| 60 | } |
||
| 61 | |||
| 62 | /// Returns true if this is a body character of a C identifier, |
||
| 63 | /// which is [a-zA-Z0-9_]. |
||
| 64 | LLVM_READONLY inline bool isAsciiIdentifierContinue(unsigned char c, |
||
| 65 | bool AllowDollar = false) { |
||
| 66 | using namespace charinfo; |
||
| 67 | if (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_DIGIT|CHAR_UNDER)) |
||
| 68 | return true; |
||
| 69 | return AllowDollar && c == '$'; |
||
| 70 | } |
||
| 71 | |||
| 72 | /// Returns true if this character is horizontal ASCII whitespace: |
||
| 73 | /// ' ', '\\t', '\\f', '\\v'. |
||
| 74 | /// |
||
| 75 | /// Note that this returns false for '\\0'. |
||
| 76 | LLVM_READONLY inline bool isHorizontalWhitespace(unsigned char c) { |
||
| 77 | using namespace charinfo; |
||
| 78 | return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_SPACE)) != 0; |
||
| 79 | } |
||
| 80 | |||
| 81 | /// Returns true if this character is vertical ASCII whitespace: '\\n', '\\r'. |
||
| 82 | /// |
||
| 83 | /// Note that this returns false for '\\0'. |
||
| 84 | LLVM_READONLY inline bool isVerticalWhitespace(unsigned char c) { |
||
| 85 | using namespace charinfo; |
||
| 86 | return (InfoTable[c] & CHAR_VERT_WS) != 0; |
||
| 87 | } |
||
| 88 | |||
| 89 | /// Return true if this character is horizontal or vertical ASCII whitespace: |
||
| 90 | /// ' ', '\\t', '\\f', '\\v', '\\n', '\\r'. |
||
| 91 | /// |
||
| 92 | /// Note that this returns false for '\\0'. |
||
| 93 | LLVM_READONLY inline bool isWhitespace(unsigned char c) { |
||
| 94 | using namespace charinfo; |
||
| 95 | return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_VERT_WS|CHAR_SPACE)) != 0; |
||
| 96 | } |
||
| 97 | |||
| 98 | /// Return true if this character is an ASCII digit: [0-9] |
||
| 99 | LLVM_READONLY inline bool isDigit(unsigned char c) { |
||
| 100 | using namespace charinfo; |
||
| 101 | return (InfoTable[c] & CHAR_DIGIT) != 0; |
||
| 102 | } |
||
| 103 | |||
| 104 | /// Return true if this character is a lowercase ASCII letter: [a-z] |
||
| 105 | LLVM_READONLY inline bool isLowercase(unsigned char c) { |
||
| 106 | using namespace charinfo; |
||
| 107 | return (InfoTable[c] & CHAR_LOWER) != 0; |
||
| 108 | } |
||
| 109 | |||
| 110 | /// Return true if this character is an uppercase ASCII letter: [A-Z] |
||
| 111 | LLVM_READONLY inline bool isUppercase(unsigned char c) { |
||
| 112 | using namespace charinfo; |
||
| 113 | return (InfoTable[c] & CHAR_UPPER) != 0; |
||
| 114 | } |
||
| 115 | |||
| 116 | /// Return true if this character is an ASCII letter: [a-zA-Z] |
||
| 117 | LLVM_READONLY inline bool isLetter(unsigned char c) { |
||
| 118 | using namespace charinfo; |
||
| 119 | return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER)) != 0; |
||
| 120 | } |
||
| 121 | |||
| 122 | /// Return true if this character is an ASCII letter or digit: [a-zA-Z0-9] |
||
| 123 | LLVM_READONLY inline bool isAlphanumeric(unsigned char c) { |
||
| 124 | using namespace charinfo; |
||
| 125 | return (InfoTable[c] & (CHAR_DIGIT|CHAR_UPPER|CHAR_LOWER)) != 0; |
||
| 126 | } |
||
| 127 | |||
| 128 | /// Return true if this character is an ASCII hex digit: [0-9a-fA-F] |
||
| 129 | LLVM_READONLY inline bool isHexDigit(unsigned char c) { |
||
| 130 | using namespace charinfo; |
||
| 131 | return (InfoTable[c] & (CHAR_DIGIT|CHAR_XLETTER)) != 0; |
||
| 132 | } |
||
| 133 | |||
| 134 | /// Return true if this character is an ASCII punctuation character. |
||
| 135 | /// |
||
| 136 | /// Note that '_' is both a punctuation character and an identifier character! |
||
| 137 | LLVM_READONLY inline bool isPunctuation(unsigned char c) { |
||
| 138 | using namespace charinfo; |
||
| 139 | return (InfoTable[c] & (CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL|CHAR_PUNCT)) != 0; |
||
| 140 | } |
||
| 141 | |||
| 142 | /// Return true if this character is an ASCII printable character; that is, a |
||
| 143 | /// character that should take exactly one column to print in a fixed-width |
||
| 144 | /// terminal. |
||
| 145 | LLVM_READONLY inline bool isPrintable(unsigned char c) { |
||
| 146 | using namespace charinfo; |
||
| 147 | return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD|CHAR_PUNCT| |
||
| 148 | CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL|CHAR_SPACE)) != 0; |
||
| 149 | } |
||
| 150 | |||
| 151 | /// Return true if this is the body character of a C preprocessing number, |
||
| 152 | /// which is [a-zA-Z0-9_.]. |
||
| 153 | LLVM_READONLY inline bool isPreprocessingNumberBody(unsigned char c) { |
||
| 154 | using namespace charinfo; |
||
| 155 | return (InfoTable[c] & |
||
| 156 | (CHAR_UPPER|CHAR_LOWER|CHAR_DIGIT|CHAR_UNDER|CHAR_PERIOD)) != 0; |
||
| 157 | } |
||
| 158 | |||
| 159 | /// Return true if this is the body character of a C++ raw string delimiter. |
||
| 160 | LLVM_READONLY inline bool isRawStringDelimBody(unsigned char c) { |
||
| 161 | using namespace charinfo; |
||
| 162 | return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD| |
||
| 163 | CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL)) != 0; |
||
| 164 | } |
||
| 165 | |||
| 166 | enum class EscapeChar { |
||
| 167 | Single = 1, |
||
| 168 | Double = 2, |
||
| 169 | SingleAndDouble = static_cast<int>(Single) | static_cast<int>(Double), |
||
| 170 | }; |
||
| 171 | |||
| 172 | /// Return C-style escaped string for special characters, or an empty string if |
||
| 173 | /// there is no such mapping. |
||
| 174 | template <EscapeChar Opt, class CharT> |
||
| 175 | LLVM_READONLY inline auto escapeCStyle(CharT Ch) -> StringRef { |
||
| 176 | switch (Ch) { |
||
| 177 | case '\\': |
||
| 178 | return "\\\\"; |
||
| 179 | case '\'': |
||
| 180 | if ((static_cast<int>(Opt) & static_cast<int>(EscapeChar::Single)) == 0) |
||
| 181 | break; |
||
| 182 | return "\\'"; |
||
| 183 | case '"': |
||
| 184 | if ((static_cast<int>(Opt) & static_cast<int>(EscapeChar::Double)) == 0) |
||
| 185 | break; |
||
| 186 | return "\\\""; |
||
| 187 | case '\a': |
||
| 188 | return "\\a"; |
||
| 189 | case '\b': |
||
| 190 | return "\\b"; |
||
| 191 | case '\f': |
||
| 192 | return "\\f"; |
||
| 193 | case '\n': |
||
| 194 | return "\\n"; |
||
| 195 | case '\r': |
||
| 196 | return "\\r"; |
||
| 197 | case '\t': |
||
| 198 | return "\\t"; |
||
| 199 | case '\v': |
||
| 200 | return "\\v"; |
||
| 201 | } |
||
| 202 | return {}; |
||
| 203 | } |
||
| 204 | |||
| 205 | /// Converts the given ASCII character to its lowercase equivalent. |
||
| 206 | /// |
||
| 207 | /// If the character is not an uppercase character, it is returned as is. |
||
| 208 | LLVM_READONLY inline char toLowercase(char c) { |
||
| 209 | if (isUppercase(c)) |
||
| 210 | return c + 'a' - 'A'; |
||
| 211 | return c; |
||
| 212 | } |
||
| 213 | |||
| 214 | /// Converts the given ASCII character to its uppercase equivalent. |
||
| 215 | /// |
||
| 216 | /// If the character is not a lowercase character, it is returned as is. |
||
| 217 | LLVM_READONLY inline char toUppercase(char c) { |
||
| 218 | if (isLowercase(c)) |
||
| 219 | return c + 'A' - 'a'; |
||
| 220 | return c; |
||
| 221 | } |
||
| 222 | |||
| 223 | |||
| 224 | /// Return true if this is a valid ASCII identifier. |
||
| 225 | /// |
||
| 226 | /// Note that this is a very simple check; it does not accept UCNs as valid |
||
| 227 | /// identifier characters. |
||
| 228 | LLVM_READONLY inline bool isValidAsciiIdentifier(StringRef S, |
||
| 229 | bool AllowDollar = false) { |
||
| 230 | if (S.empty() || !isAsciiIdentifierStart(S[0], AllowDollar)) |
||
| 231 | return false; |
||
| 232 | |||
| 233 | for (StringRef::iterator I = S.begin(), E = S.end(); I != E; ++I) |
||
| 234 | if (!isAsciiIdentifierContinue(*I, AllowDollar)) |
||
| 235 | return false; |
||
| 236 | |||
| 237 | return true; |
||
| 238 | } |
||
| 239 | |||
| 240 | } // end namespace clang |
||
| 241 | |||
| 242 | #endif |