Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
//===--- SourceLocationEncoding.h - Small serialized locations --*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Source locations are stored pervasively in the AST, making up a third of
10
// the size of typical serialized files. Storing them efficiently is important.
11
//
12
// We use integers optimized by VBR-encoding, because:
13
//  - when abbreviations cannot be used, VBR6 encoding is our only choice
14
//  - in the worst case a SourceLocation can be ~any 32-bit number, but in
15
//    practice they are highly predictable
16
//
17
// We encode the integer so that likely values encode as small numbers that
18
// turn into few VBR chunks:
19
//  - the invalid sentinel location is a very common value: it encodes as 0
20
//  - the "macro or not" bit is stored at the bottom of the integer
21
//    (rather than at the top, as in memory), so macro locations can have
22
//    small representations.
23
//  - related locations (e.g. of a left and right paren pair) are usually
24
//    similar, so when encoding a sequence of locations we store only
25
//    differences between successive elements.
26
//
27
//===----------------------------------------------------------------------===//
28
 
29
#include <climits>
30
#include "clang/Basic/SourceLocation.h"
31
 
32
#ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
33
#define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
34
 
35
namespace clang {
36
class SourceLocationSequence;
37
 
38
/// Serialized encoding of SourceLocations without context.
39
/// Optimized to have small unsigned values (=> small after VBR encoding).
40
///
41
// Macro locations have the top bit set, we rotate by one so it is the low bit.
42
class SourceLocationEncoding {
43
  using UIntTy = SourceLocation::UIntTy;
44
  constexpr static unsigned UIntBits = CHAR_BIT * sizeof(UIntTy);
45
 
46
  static UIntTy encodeRaw(UIntTy Raw) {
47
    return (Raw << 1) | (Raw >> (UIntBits - 1));
48
  }
49
  static UIntTy decodeRaw(UIntTy Raw) {
50
    return (Raw >> 1) | (Raw << (UIntBits - 1));
51
  }
52
  friend SourceLocationSequence;
53
 
54
public:
55
  static uint64_t encode(SourceLocation Loc,
56
                         SourceLocationSequence * = nullptr);
57
  static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr);
58
};
59
 
60
/// Serialized encoding of a sequence of SourceLocations.
61
///
62
/// Optimized to produce small values when locations with the sequence are
63
/// similar. Each element can be delta-encoded against the last nonzero element.
64
///
65
/// Sequences should be started by creating a SourceLocationSequence::State,
66
/// and then passed around as SourceLocationSequence*. Example:
67
///
68
///   // establishes a sequence
69
///   void EmitTopLevelThing() {
70
///     SourceLocationSequence::State Seq;
71
///     EmitContainedThing(Seq);
72
///     EmitRecursiveThing(Seq);
73
///   }
74
///
75
///   // optionally part of a sequence
76
///   void EmitContainedThing(SourceLocationSequence *Seq = nullptr) {
77
///     Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
78
///   }
79
///
80
///   // establishes a sequence if there isn't one already
81
///   void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) {
82
///     SourceLocationSequence::State Seq(ParentSeq);
83
///     Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
84
///     EmitRecursiveThing(Seq);
85
///   }
86
///
87
class SourceLocationSequence {
88
  using UIntTy = SourceLocation::UIntTy;
89
  using EncodedTy = uint64_t;
90
  constexpr static auto UIntBits = SourceLocationEncoding::UIntBits;
91
  static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!");
92
 
93
  // Prev stores the rotated last nonzero location.
94
  UIntTy &Prev;
95
 
96
  // Zig-zag encoding turns small signed integers into small unsigned integers.
97
  // 0 => 0, -1 => 1, 1 => 2, -2 => 3, ...
98
  static UIntTy zigZag(UIntTy V) {
99
    UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0);
100
    return Sign ^ (V << 1);
101
  }
102
  static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); }
103
 
104
  SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {}
105
 
106
  EncodedTy encodeRaw(UIntTy Raw) {
107
    if (Raw == 0)
108
      return 0;
109
    UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw);
110
    if (Prev == 0)
111
      return Prev = Rotated;
112
    UIntTy Delta = Rotated - Prev;
113
    Prev = Rotated;
114
    // Exactly one 33 bit value is possible! (1 << 32).
115
    // This is because we have two representations of zero: trivial & relative.
116
    return 1 + EncodedTy{zigZag(Delta)};
117
  }
118
  UIntTy decodeRaw(EncodedTy Encoded) {
119
    if (Encoded == 0)
120
      return 0;
121
    if (Prev == 0)
122
      return SourceLocationEncoding::decodeRaw(Prev = Encoded);
123
    return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1));
124
  }
125
 
126
public:
127
  SourceLocation decode(EncodedTy Encoded) {
128
    return SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
129
  }
130
  EncodedTy encode(SourceLocation Loc) {
131
    return encodeRaw(Loc.getRawEncoding());
132
  }
133
 
134
  class State;
135
};
136
 
137
/// This object establishes a SourceLocationSequence.
138
class SourceLocationSequence::State {
139
  UIntTy Prev = 0;
140
  SourceLocationSequence Seq;
141
 
142
public:
143
  // If Parent is provided and non-null, then this root becomes part of that
144
  // enclosing sequence instead of establishing a new one.
145
  State(SourceLocationSequence *Parent = nullptr)
146
      : Seq(Parent ? Parent->Prev : Prev) {}
147
 
148
  // Implicit conversion for uniform use of roots vs propagated sequences.
149
  operator SourceLocationSequence *() { return &Seq; }
150
};
151
 
152
inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc,
153
                                               SourceLocationSequence *Seq) {
154
  return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
155
}
156
inline SourceLocation
157
SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) {
158
  return Seq ? Seq->decode(Encoded)
159
             : SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
160
}
161
 
162
} // namespace clang
163
#endif