Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. //===--- SourceLocationEncoding.h - Small serialized locations --*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Source locations are stored pervasively in the AST, making up a third of
  10. // the size of typical serialized files. Storing them efficiently is important.
  11. //
  12. // We use integers optimized by VBR-encoding, because:
  13. //  - when abbreviations cannot be used, VBR6 encoding is our only choice
  14. //  - in the worst case a SourceLocation can be ~any 32-bit number, but in
  15. //    practice they are highly predictable
  16. //
  17. // We encode the integer so that likely values encode as small numbers that
  18. // turn into few VBR chunks:
  19. //  - the invalid sentinel location is a very common value: it encodes as 0
  20. //  - the "macro or not" bit is stored at the bottom of the integer
  21. //    (rather than at the top, as in memory), so macro locations can have
  22. //    small representations.
  23. //  - related locations (e.g. of a left and right paren pair) are usually
  24. //    similar, so when encoding a sequence of locations we store only
  25. //    differences between successive elements.
  26. //
  27. //===----------------------------------------------------------------------===//
  28.  
  29. #include <climits>
  30. #include "clang/Basic/SourceLocation.h"
  31.  
  32. #ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
  33. #define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
  34.  
  35. namespace clang {
  36. class SourceLocationSequence;
  37.  
  38. /// Serialized encoding of SourceLocations without context.
  39. /// Optimized to have small unsigned values (=> small after VBR encoding).
  40. ///
  41. // Macro locations have the top bit set, we rotate by one so it is the low bit.
  42. class SourceLocationEncoding {
  43.   using UIntTy = SourceLocation::UIntTy;
  44.   constexpr static unsigned UIntBits = CHAR_BIT * sizeof(UIntTy);
  45.  
  46.   static UIntTy encodeRaw(UIntTy Raw) {
  47.     return (Raw << 1) | (Raw >> (UIntBits - 1));
  48.   }
  49.   static UIntTy decodeRaw(UIntTy Raw) {
  50.     return (Raw >> 1) | (Raw << (UIntBits - 1));
  51.   }
  52.   friend SourceLocationSequence;
  53.  
  54. public:
  55.   static uint64_t encode(SourceLocation Loc,
  56.                          SourceLocationSequence * = nullptr);
  57.   static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr);
  58. };
  59.  
  60. /// Serialized encoding of a sequence of SourceLocations.
  61. ///
  62. /// Optimized to produce small values when locations with the sequence are
  63. /// similar. Each element can be delta-encoded against the last nonzero element.
  64. ///
  65. /// Sequences should be started by creating a SourceLocationSequence::State,
  66. /// and then passed around as SourceLocationSequence*. Example:
  67. ///
  68. ///   // establishes a sequence
  69. ///   void EmitTopLevelThing() {
  70. ///     SourceLocationSequence::State Seq;
  71. ///     EmitContainedThing(Seq);
  72. ///     EmitRecursiveThing(Seq);
  73. ///   }
  74. ///
  75. ///   // optionally part of a sequence
  76. ///   void EmitContainedThing(SourceLocationSequence *Seq = nullptr) {
  77. ///     Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
  78. ///   }
  79. ///
  80. ///   // establishes a sequence if there isn't one already
  81. ///   void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) {
  82. ///     SourceLocationSequence::State Seq(ParentSeq);
  83. ///     Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
  84. ///     EmitRecursiveThing(Seq);
  85. ///   }
  86. ///
  87. class SourceLocationSequence {
  88.   using UIntTy = SourceLocation::UIntTy;
  89.   using EncodedTy = uint64_t;
  90.   constexpr static auto UIntBits = SourceLocationEncoding::UIntBits;
  91.   static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!");
  92.  
  93.   // Prev stores the rotated last nonzero location.
  94.   UIntTy &Prev;
  95.  
  96.   // Zig-zag encoding turns small signed integers into small unsigned integers.
  97.   // 0 => 0, -1 => 1, 1 => 2, -2 => 3, ...
  98.   static UIntTy zigZag(UIntTy V) {
  99.     UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0);
  100.     return Sign ^ (V << 1);
  101.   }
  102.   static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); }
  103.  
  104.   SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {}
  105.  
  106.   EncodedTy encodeRaw(UIntTy Raw) {
  107.     if (Raw == 0)
  108.       return 0;
  109.     UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw);
  110.     if (Prev == 0)
  111.       return Prev = Rotated;
  112.     UIntTy Delta = Rotated - Prev;
  113.     Prev = Rotated;
  114.     // Exactly one 33 bit value is possible! (1 << 32).
  115.     // This is because we have two representations of zero: trivial & relative.
  116.     return 1 + EncodedTy{zigZag(Delta)};
  117.   }
  118.   UIntTy decodeRaw(EncodedTy Encoded) {
  119.     if (Encoded == 0)
  120.       return 0;
  121.     if (Prev == 0)
  122.       return SourceLocationEncoding::decodeRaw(Prev = Encoded);
  123.     return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1));
  124.   }
  125.  
  126. public:
  127.   SourceLocation decode(EncodedTy Encoded) {
  128.     return SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
  129.   }
  130.   EncodedTy encode(SourceLocation Loc) {
  131.     return encodeRaw(Loc.getRawEncoding());
  132.   }
  133.  
  134.   class State;
  135. };
  136.  
  137. /// This object establishes a SourceLocationSequence.
  138. class SourceLocationSequence::State {
  139.   UIntTy Prev = 0;
  140.   SourceLocationSequence Seq;
  141.  
  142. public:
  143.   // If Parent is provided and non-null, then this root becomes part of that
  144.   // enclosing sequence instead of establishing a new one.
  145.   State(SourceLocationSequence *Parent = nullptr)
  146.       : Seq(Parent ? Parent->Prev : Prev) {}
  147.  
  148.   // Implicit conversion for uniform use of roots vs propagated sequences.
  149.   operator SourceLocationSequence *() { return &Seq; }
  150. };
  151.  
  152. inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc,
  153.                                                SourceLocationSequence *Seq) {
  154.   return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
  155. }
  156. inline SourceLocation
  157. SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) {
  158.   return Seq ? Seq->decode(Encoded)
  159.              : SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
  160. }
  161.  
  162. } // namespace clang
  163. #endif
  164.