Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. //===--- UnicodeCharRanges.h - Types and functions for character ranges ---===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #ifndef LLVM_SUPPORT_UNICODECHARRANGES_H
  9. #define LLVM_SUPPORT_UNICODECHARRANGES_H
  10.  
  11. #include "llvm/ADT/ArrayRef.h"
  12. #include "llvm/Support/Compiler.h"
  13. #include "llvm/Support/Debug.h"
  14. #include "llvm/Support/raw_ostream.h"
  15. #include <algorithm>
  16.  
  17. #define DEBUG_TYPE "unicode"
  18.  
  19. namespace llvm {
  20. namespace sys {
  21.  
  22. /// Represents a closed range of Unicode code points [Lower, Upper].
  23. struct UnicodeCharRange {
  24.   uint32_t Lower;
  25.   uint32_t Upper;
  26. };
  27.  
  28. inline bool operator<(uint32_t Value, UnicodeCharRange Range) {
  29.   return Value < Range.Lower;
  30. }
  31. inline bool operator<(UnicodeCharRange Range, uint32_t Value) {
  32.   return Range.Upper < Value;
  33. }
  34.  
  35. /// Holds a reference to an ordered array of UnicodeCharRange and allows
  36. /// to quickly check if a code point is contained in the set represented by this
  37. /// array.
  38. class UnicodeCharSet {
  39. public:
  40.   typedef ArrayRef<UnicodeCharRange> CharRanges;
  41.  
  42.   /// Constructs a UnicodeCharSet instance from an array of
  43.   /// UnicodeCharRanges.
  44.   ///
  45.   /// Array pointed by \p Ranges should have the lifetime at least as long as
  46.   /// the UnicodeCharSet instance, and should not change. Array is validated by
  47.   /// the constructor, so it makes sense to create as few UnicodeCharSet
  48.   /// instances per each array of ranges, as possible.
  49. #ifdef NDEBUG
  50.  
  51.   // FIXME: This could use constexpr + static_assert. This way we
  52.   // may get rid of NDEBUG in this header. Unfortunately there are some
  53.   // problems to get this working with MSVC 2013. Change this when
  54.   // the support for MSVC 2013 is dropped.
  55.   constexpr UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) {}
  56. #else
  57.   UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) {
  58.     assert(rangesAreValid());
  59.   }
  60. #endif
  61.  
  62.   /// Returns true if the character set contains the Unicode code point
  63.   /// \p C.
  64.   bool contains(uint32_t C) const {
  65.     return std::binary_search(Ranges.begin(), Ranges.end(), C);
  66.   }
  67.  
  68. private:
  69.   /// Returns true if each of the ranges is a proper closed range
  70.   /// [min, max], and if the ranges themselves are ordered and non-overlapping.
  71.   bool rangesAreValid() const {
  72.     uint32_t Prev = 0;
  73.     for (CharRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
  74.          I != E; ++I) {
  75.       if (I != Ranges.begin() && Prev >= I->Lower) {
  76.         LLVM_DEBUG(dbgs() << "Upper bound 0x");
  77.         LLVM_DEBUG(dbgs().write_hex(Prev));
  78.         LLVM_DEBUG(dbgs() << " should be less than succeeding lower bound 0x");
  79.         LLVM_DEBUG(dbgs().write_hex(I->Lower) << "\n");
  80.         return false;
  81.       }
  82.       if (I->Upper < I->Lower) {
  83.         LLVM_DEBUG(dbgs() << "Upper bound 0x");
  84.         LLVM_DEBUG(dbgs().write_hex(I->Lower));
  85.         LLVM_DEBUG(dbgs() << " should not be less than lower bound 0x");
  86.         LLVM_DEBUG(dbgs().write_hex(I->Upper) << "\n");
  87.         return false;
  88.       }
  89.       Prev = I->Upper;
  90.     }
  91.  
  92.     return true;
  93.   }
  94.  
  95.   const CharRanges Ranges;
  96. };
  97.  
  98. } // namespace sys
  99. } // namespace llvm
  100.  
  101. #undef DEBUG_TYPE // "unicode"
  102.  
  103. #endif // LLVM_SUPPORT_UNICODECHARRANGES_H
  104.