Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===--- SourceLocationEncoding.h - Small serialized locations --*- C++ -*-===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // |
||
9 | // Source locations are stored pervasively in the AST, making up a third of |
||
10 | // the size of typical serialized files. Storing them efficiently is important. |
||
11 | // |
||
12 | // We use integers optimized by VBR-encoding, because: |
||
13 | // - when abbreviations cannot be used, VBR6 encoding is our only choice |
||
14 | // - in the worst case a SourceLocation can be ~any 32-bit number, but in |
||
15 | // practice they are highly predictable |
||
16 | // |
||
17 | // We encode the integer so that likely values encode as small numbers that |
||
18 | // turn into few VBR chunks: |
||
19 | // - the invalid sentinel location is a very common value: it encodes as 0 |
||
20 | // - the "macro or not" bit is stored at the bottom of the integer |
||
21 | // (rather than at the top, as in memory), so macro locations can have |
||
22 | // small representations. |
||
23 | // - related locations (e.g. of a left and right paren pair) are usually |
||
24 | // similar, so when encoding a sequence of locations we store only |
||
25 | // differences between successive elements. |
||
26 | // |
||
27 | //===----------------------------------------------------------------------===// |
||
28 | |||
29 | #include <climits> |
||
30 | #include "clang/Basic/SourceLocation.h" |
||
31 | |||
32 | #ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H |
||
33 | #define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H |
||
34 | |||
35 | namespace clang { |
||
36 | class SourceLocationSequence; |
||
37 | |||
38 | /// Serialized encoding of SourceLocations without context. |
||
39 | /// Optimized to have small unsigned values (=> small after VBR encoding). |
||
40 | /// |
||
41 | // Macro locations have the top bit set, we rotate by one so it is the low bit. |
||
42 | class SourceLocationEncoding { |
||
43 | using UIntTy = SourceLocation::UIntTy; |
||
44 | constexpr static unsigned UIntBits = CHAR_BIT * sizeof(UIntTy); |
||
45 | |||
46 | static UIntTy encodeRaw(UIntTy Raw) { |
||
47 | return (Raw << 1) | (Raw >> (UIntBits - 1)); |
||
48 | } |
||
49 | static UIntTy decodeRaw(UIntTy Raw) { |
||
50 | return (Raw >> 1) | (Raw << (UIntBits - 1)); |
||
51 | } |
||
52 | friend SourceLocationSequence; |
||
53 | |||
54 | public: |
||
55 | static uint64_t encode(SourceLocation Loc, |
||
56 | SourceLocationSequence * = nullptr); |
||
57 | static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr); |
||
58 | }; |
||
59 | |||
60 | /// Serialized encoding of a sequence of SourceLocations. |
||
61 | /// |
||
62 | /// Optimized to produce small values when locations with the sequence are |
||
63 | /// similar. Each element can be delta-encoded against the last nonzero element. |
||
64 | /// |
||
65 | /// Sequences should be started by creating a SourceLocationSequence::State, |
||
66 | /// and then passed around as SourceLocationSequence*. Example: |
||
67 | /// |
||
68 | /// // establishes a sequence |
||
69 | /// void EmitTopLevelThing() { |
||
70 | /// SourceLocationSequence::State Seq; |
||
71 | /// EmitContainedThing(Seq); |
||
72 | /// EmitRecursiveThing(Seq); |
||
73 | /// } |
||
74 | /// |
||
75 | /// // optionally part of a sequence |
||
76 | /// void EmitContainedThing(SourceLocationSequence *Seq = nullptr) { |
||
77 | /// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq)); |
||
78 | /// } |
||
79 | /// |
||
80 | /// // establishes a sequence if there isn't one already |
||
81 | /// void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) { |
||
82 | /// SourceLocationSequence::State Seq(ParentSeq); |
||
83 | /// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq)); |
||
84 | /// EmitRecursiveThing(Seq); |
||
85 | /// } |
||
86 | /// |
||
87 | class SourceLocationSequence { |
||
88 | using UIntTy = SourceLocation::UIntTy; |
||
89 | using EncodedTy = uint64_t; |
||
90 | constexpr static auto UIntBits = SourceLocationEncoding::UIntBits; |
||
91 | static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!"); |
||
92 | |||
93 | // Prev stores the rotated last nonzero location. |
||
94 | UIntTy &Prev; |
||
95 | |||
96 | // Zig-zag encoding turns small signed integers into small unsigned integers. |
||
97 | // 0 => 0, -1 => 1, 1 => 2, -2 => 3, ... |
||
98 | static UIntTy zigZag(UIntTy V) { |
||
99 | UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0); |
||
100 | return Sign ^ (V << 1); |
||
101 | } |
||
102 | static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); } |
||
103 | |||
104 | SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {} |
||
105 | |||
106 | EncodedTy encodeRaw(UIntTy Raw) { |
||
107 | if (Raw == 0) |
||
108 | return 0; |
||
109 | UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw); |
||
110 | if (Prev == 0) |
||
111 | return Prev = Rotated; |
||
112 | UIntTy Delta = Rotated - Prev; |
||
113 | Prev = Rotated; |
||
114 | // Exactly one 33 bit value is possible! (1 << 32). |
||
115 | // This is because we have two representations of zero: trivial & relative. |
||
116 | return 1 + EncodedTy{zigZag(Delta)}; |
||
117 | } |
||
118 | UIntTy decodeRaw(EncodedTy Encoded) { |
||
119 | if (Encoded == 0) |
||
120 | return 0; |
||
121 | if (Prev == 0) |
||
122 | return SourceLocationEncoding::decodeRaw(Prev = Encoded); |
||
123 | return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1)); |
||
124 | } |
||
125 | |||
126 | public: |
||
127 | SourceLocation decode(EncodedTy Encoded) { |
||
128 | return SourceLocation::getFromRawEncoding(decodeRaw(Encoded)); |
||
129 | } |
||
130 | EncodedTy encode(SourceLocation Loc) { |
||
131 | return encodeRaw(Loc.getRawEncoding()); |
||
132 | } |
||
133 | |||
134 | class State; |
||
135 | }; |
||
136 | |||
137 | /// This object establishes a SourceLocationSequence. |
||
138 | class SourceLocationSequence::State { |
||
139 | UIntTy Prev = 0; |
||
140 | SourceLocationSequence Seq; |
||
141 | |||
142 | public: |
||
143 | // If Parent is provided and non-null, then this root becomes part of that |
||
144 | // enclosing sequence instead of establishing a new one. |
||
145 | State(SourceLocationSequence *Parent = nullptr) |
||
146 | : Seq(Parent ? Parent->Prev : Prev) {} |
||
147 | |||
148 | // Implicit conversion for uniform use of roots vs propagated sequences. |
||
149 | operator SourceLocationSequence *() { return &Seq; } |
||
150 | }; |
||
151 | |||
152 | inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc, |
||
153 | SourceLocationSequence *Seq) { |
||
154 | return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding()); |
||
155 | } |
||
156 | inline SourceLocation |
||
157 | SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) { |
||
158 | return Seq ? Seq->decode(Encoded) |
||
159 | : SourceLocation::getFromRawEncoding(decodeRaw(Encoded)); |
||
160 | } |
||
161 | |||
162 | } // namespace clang |
||
163 | #endif |