Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | /// |
||
9 | /// \file |
||
10 | /// Lightweight arrays that are backed by an arbitrary BinaryStream. This file |
||
11 | /// provides two different array implementations. |
||
12 | /// |
||
13 | /// VarStreamArray - Arrays of variable length records. The user specifies |
||
14 | /// an Extractor type that can extract a record from a given offset and |
||
15 | /// return the number of bytes consumed by the record. |
||
16 | /// |
||
17 | /// FixedStreamArray - Arrays of fixed length records. This is similar in |
||
18 | /// spirit to ArrayRef<T>, but since it is backed by a BinaryStream, the |
||
19 | /// elements of the array need not be laid out in contiguous memory. |
||
20 | /// |
||
21 | |||
22 | #ifndef LLVM_SUPPORT_BINARYSTREAMARRAY_H |
||
23 | #define LLVM_SUPPORT_BINARYSTREAMARRAY_H |
||
24 | |||
25 | #include "llvm/ADT/ArrayRef.h" |
||
26 | #include "llvm/ADT/iterator.h" |
||
27 | #include "llvm/Support/Alignment.h" |
||
28 | #include "llvm/Support/BinaryStreamRef.h" |
||
29 | #include "llvm/Support/Error.h" |
||
30 | #include <cassert> |
||
31 | #include <cstdint> |
||
32 | |||
33 | namespace llvm { |
||
34 | |||
35 | /// VarStreamArrayExtractor is intended to be specialized to provide customized |
||
36 | /// extraction logic. On input it receives a BinaryStreamRef pointing to the |
||
37 | /// beginning of the next record, but where the length of the record is not yet |
||
38 | /// known. Upon completion, it should return an appropriate Error instance if |
||
39 | /// a record could not be extracted, or if one could be extracted it should |
||
40 | /// return success and set Len to the number of bytes this record occupied in |
||
41 | /// the underlying stream, and it should fill out the fields of the value type |
||
42 | /// Item appropriately to represent the current record. |
||
43 | /// |
||
44 | /// You can specialize this template for your own custom value types to avoid |
||
45 | /// having to specify a second template argument to VarStreamArray (documented |
||
46 | /// below). |
||
47 | template <typename T> struct VarStreamArrayExtractor { |
||
48 | // Method intentionally deleted. You must provide an explicit specialization |
||
49 | // with the following method implemented. |
||
50 | Error operator()(BinaryStreamRef Stream, uint32_t &Len, |
||
51 | T &Item) const = delete; |
||
52 | }; |
||
53 | |||
54 | /// VarStreamArray represents an array of variable length records backed by a |
||
55 | /// stream. This could be a contiguous sequence of bytes in memory, it could |
||
56 | /// be a file on disk, or it could be a PDB stream where bytes are stored as |
||
57 | /// discontiguous blocks in a file. Usually it is desirable to treat arrays |
||
58 | /// as contiguous blocks of memory, but doing so with large PDB files, for |
||
59 | /// example, could mean allocating huge amounts of memory just to allow |
||
60 | /// re-ordering of stream data to be contiguous before iterating over it. By |
||
61 | /// abstracting this out, we need not duplicate this memory, and we can |
||
62 | /// iterate over arrays in arbitrarily formatted streams. Elements are parsed |
||
63 | /// lazily on iteration, so there is no upfront cost associated with building |
||
64 | /// or copying a VarStreamArray, no matter how large it may be. |
||
65 | /// |
||
66 | /// You create a VarStreamArray by specifying a ValueType and an Extractor type. |
||
67 | /// If you do not specify an Extractor type, you are expected to specialize |
||
68 | /// VarStreamArrayExtractor<T> for your ValueType. |
||
69 | /// |
||
70 | /// By default an Extractor is default constructed in the class, but in some |
||
71 | /// cases you might find it useful for an Extractor to maintain state across |
||
72 | /// extractions. In this case you can provide your own Extractor through a |
||
73 | /// secondary constructor. The following examples show various ways of |
||
74 | /// creating a VarStreamArray. |
||
75 | /// |
||
76 | /// // Will use VarStreamArrayExtractor<MyType> as the extractor. |
||
77 | /// VarStreamArray<MyType> MyTypeArray; |
||
78 | /// |
||
79 | /// // Will use a default-constructed MyExtractor as the extractor. |
||
80 | /// VarStreamArray<MyType, MyExtractor> MyTypeArray2; |
||
81 | /// |
||
82 | /// // Will use the specific instance of MyExtractor provided. |
||
83 | /// // MyExtractor need not be default-constructible in this case. |
||
84 | /// MyExtractor E(SomeContext); |
||
85 | /// VarStreamArray<MyType, MyExtractor> MyTypeArray3(E); |
||
86 | /// |
||
87 | |||
88 | template <typename ValueType, typename Extractor> class VarStreamArrayIterator; |
||
89 | |||
90 | template <typename ValueType, |
||
91 | typename Extractor = VarStreamArrayExtractor<ValueType>> |
||
92 | class VarStreamArray { |
||
93 | friend class VarStreamArrayIterator<ValueType, Extractor>; |
||
94 | |||
95 | public: |
||
96 | typedef VarStreamArrayIterator<ValueType, Extractor> Iterator; |
||
97 | |||
98 | VarStreamArray() = default; |
||
99 | |||
100 | explicit VarStreamArray(const Extractor &E) : E(E) {} |
||
101 | |||
102 | explicit VarStreamArray(BinaryStreamRef Stream, uint32_t Skew = 0) |
||
103 | : Stream(Stream), Skew(Skew) {} |
||
104 | |||
105 | VarStreamArray(BinaryStreamRef Stream, const Extractor &E, uint32_t Skew = 0) |
||
106 | : Stream(Stream), E(E), Skew(Skew) {} |
||
107 | |||
108 | Iterator begin(bool *HadError = nullptr) const { |
||
109 | return Iterator(*this, E, Skew, nullptr); |
||
110 | } |
||
111 | |||
112 | bool valid() const { return Stream.valid(); } |
||
113 | |||
114 | bool isOffsetValid(uint32_t Offset) const { return at(Offset) != end(); } |
||
115 | |||
116 | uint32_t skew() const { return Skew; } |
||
117 | Iterator end() const { return Iterator(E); } |
||
118 | |||
119 | bool empty() const { return Stream.getLength() == 0; } |
||
120 | |||
121 | VarStreamArray<ValueType, Extractor> substream(uint32_t Begin, |
||
122 | uint32_t End) const { |
||
123 | assert(Begin >= Skew); |
||
124 | // We should never cut off the beginning of the stream since it might be |
||
125 | // skewed, meaning the initial bytes are important. |
||
126 | BinaryStreamRef NewStream = Stream.slice(0, End); |
||
127 | return {NewStream, E, Begin}; |
||
128 | } |
||
129 | |||
130 | /// given an offset into the array's underlying stream, return an |
||
131 | /// iterator to the record at that offset. This is considered unsafe |
||
132 | /// since the behavior is undefined if \p Offset does not refer to the |
||
133 | /// beginning of a valid record. |
||
134 | Iterator at(uint32_t Offset) const { |
||
135 | return Iterator(*this, E, Offset, nullptr); |
||
136 | } |
||
137 | |||
138 | const Extractor &getExtractor() const { return E; } |
||
139 | Extractor &getExtractor() { return E; } |
||
140 | |||
141 | BinaryStreamRef getUnderlyingStream() const { return Stream; } |
||
142 | void setUnderlyingStream(BinaryStreamRef NewStream, uint32_t NewSkew = 0) { |
||
143 | Stream = NewStream; |
||
144 | Skew = NewSkew; |
||
145 | } |
||
146 | |||
147 | void drop_front() { Skew += begin()->length(); } |
||
148 | |||
149 | private: |
||
150 | BinaryStreamRef Stream; |
||
151 | Extractor E; |
||
152 | uint32_t Skew = 0; |
||
153 | }; |
||
154 | |||
155 | template <typename ValueType, typename Extractor> |
||
156 | class VarStreamArrayIterator |
||
157 | : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>, |
||
158 | std::forward_iterator_tag, const ValueType> { |
||
159 | typedef VarStreamArrayIterator<ValueType, Extractor> IterType; |
||
160 | typedef VarStreamArray<ValueType, Extractor> ArrayType; |
||
161 | |||
162 | public: |
||
163 | VarStreamArrayIterator(const ArrayType &Array, const Extractor &E, |
||
164 | uint32_t Offset, bool *HadError) |
||
165 | : IterRef(Array.Stream.drop_front(Offset)), Extract(E), |
||
166 | Array(&Array), AbsOffset(Offset), HadError(HadError) { |
||
167 | if (IterRef.getLength() == 0) |
||
168 | moveToEnd(); |
||
169 | else { |
||
170 | auto EC = Extract(IterRef, ThisLen, ThisValue); |
||
171 | if (EC) { |
||
172 | consumeError(std::move(EC)); |
||
173 | markError(); |
||
174 | } |
||
175 | } |
||
176 | } |
||
177 | |||
178 | VarStreamArrayIterator() = default; |
||
179 | explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {} |
||
180 | ~VarStreamArrayIterator() = default; |
||
181 | |||
182 | bool operator==(const IterType &R) const { |
||
183 | if (Array && R.Array) { |
||
184 | // Both have a valid array, make sure they're same. |
||
185 | assert(Array == R.Array); |
||
186 | return IterRef == R.IterRef; |
||
187 | } |
||
188 | |||
189 | // Both iterators are at the end. |
||
190 | if (!Array && !R.Array) |
||
191 | return true; |
||
192 | |||
193 | // One is not at the end and one is. |
||
194 | return false; |
||
195 | } |
||
196 | |||
197 | const ValueType &operator*() const { |
||
198 | assert(Array && !HasError); |
||
199 | return ThisValue; |
||
200 | } |
||
201 | |||
202 | IterType &operator+=(unsigned N) { |
||
203 | for (unsigned I = 0; I < N; ++I) { |
||
204 | // We are done with the current record, discard it so that we are |
||
205 | // positioned at the next record. |
||
206 | AbsOffset += ThisLen; |
||
207 | IterRef = IterRef.drop_front(ThisLen); |
||
208 | if (IterRef.getLength() == 0) { |
||
209 | // There is nothing after the current record, we must make this an end |
||
210 | // iterator. |
||
211 | moveToEnd(); |
||
212 | } else { |
||
213 | // There is some data after the current record. |
||
214 | auto EC = Extract(IterRef, ThisLen, ThisValue); |
||
215 | if (EC) { |
||
216 | consumeError(std::move(EC)); |
||
217 | markError(); |
||
218 | } else if (ThisLen == 0) { |
||
219 | // An empty record? Make this an end iterator. |
||
220 | moveToEnd(); |
||
221 | } |
||
222 | } |
||
223 | } |
||
224 | return *this; |
||
225 | } |
||
226 | |||
227 | uint32_t offset() const { return AbsOffset; } |
||
228 | uint32_t getRecordLength() const { return ThisLen; } |
||
229 | |||
230 | private: |
||
231 | void moveToEnd() { |
||
232 | Array = nullptr; |
||
233 | ThisLen = 0; |
||
234 | } |
||
235 | void markError() { |
||
236 | moveToEnd(); |
||
237 | HasError = true; |
||
238 | if (HadError != nullptr) |
||
239 | *HadError = true; |
||
240 | } |
||
241 | |||
242 | ValueType ThisValue; |
||
243 | BinaryStreamRef IterRef; |
||
244 | Extractor Extract; |
||
245 | const ArrayType *Array{nullptr}; |
||
246 | uint32_t ThisLen{0}; |
||
247 | uint32_t AbsOffset{0}; |
||
248 | bool HasError{false}; |
||
249 | bool *HadError{nullptr}; |
||
250 | }; |
||
251 | |||
252 | template <typename T> class FixedStreamArrayIterator; |
||
253 | |||
254 | /// FixedStreamArray is similar to VarStreamArray, except with each record |
||
255 | /// having a fixed-length. As with VarStreamArray, there is no upfront |
||
256 | /// cost associated with building or copying a FixedStreamArray, as the |
||
257 | /// memory for each element is not read from the backing stream until that |
||
258 | /// element is iterated. |
||
259 | template <typename T> class FixedStreamArray { |
||
260 | friend class FixedStreamArrayIterator<T>; |
||
261 | |||
262 | public: |
||
263 | typedef FixedStreamArrayIterator<T> Iterator; |
||
264 | |||
265 | FixedStreamArray() = default; |
||
266 | explicit FixedStreamArray(BinaryStreamRef Stream) : Stream(Stream) { |
||
267 | assert(Stream.getLength() % sizeof(T) == 0); |
||
268 | } |
||
269 | |||
270 | bool operator==(const FixedStreamArray<T> &Other) const { |
||
271 | return Stream == Other.Stream; |
||
272 | } |
||
273 | |||
274 | bool operator!=(const FixedStreamArray<T> &Other) const { |
||
275 | return !(*this == Other); |
||
276 | } |
||
277 | |||
278 | FixedStreamArray(const FixedStreamArray &) = default; |
||
279 | FixedStreamArray &operator=(const FixedStreamArray &) = default; |
||
280 | |||
281 | const T &operator[](uint32_t Index) const { |
||
282 | assert(Index < size()); |
||
283 | uint32_t Off = Index * sizeof(T); |
||
284 | ArrayRef<uint8_t> Data; |
||
285 | if (auto EC = Stream.readBytes(Off, sizeof(T), Data)) { |
||
286 | assert(false && "Unexpected failure reading from stream"); |
||
287 | // This should never happen since we asserted that the stream length was |
||
288 | // an exact multiple of the element size. |
||
289 | consumeError(std::move(EC)); |
||
290 | } |
||
291 | assert(isAddrAligned(Align::Of<T>(), Data.data())); |
||
292 | return *reinterpret_cast<const T *>(Data.data()); |
||
293 | } |
||
294 | |||
295 | uint32_t size() const { return Stream.getLength() / sizeof(T); } |
||
296 | |||
297 | bool empty() const { return size() == 0; } |
||
298 | |||
299 | FixedStreamArrayIterator<T> begin() const { |
||
300 | return FixedStreamArrayIterator<T>(*this, 0); |
||
301 | } |
||
302 | |||
303 | FixedStreamArrayIterator<T> end() const { |
||
304 | return FixedStreamArrayIterator<T>(*this, size()); |
||
305 | } |
||
306 | |||
307 | const T &front() const { return *begin(); } |
||
308 | const T &back() const { |
||
309 | FixedStreamArrayIterator<T> I = end(); |
||
310 | return *(--I); |
||
311 | } |
||
312 | |||
313 | BinaryStreamRef getUnderlyingStream() const { return Stream; } |
||
314 | |||
315 | private: |
||
316 | BinaryStreamRef Stream; |
||
317 | }; |
||
318 | |||
319 | template <typename T> |
||
320 | class FixedStreamArrayIterator |
||
321 | : public iterator_facade_base<FixedStreamArrayIterator<T>, |
||
322 | std::random_access_iterator_tag, const T> { |
||
323 | |||
324 | public: |
||
325 | FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index) |
||
326 | : Array(Array), Index(Index) {} |
||
327 | |||
328 | FixedStreamArrayIterator(const FixedStreamArrayIterator<T> &Other) |
||
329 | : Array(Other.Array), Index(Other.Index) {} |
||
330 | FixedStreamArrayIterator<T> & |
||
331 | operator=(const FixedStreamArrayIterator<T> &Other) { |
||
332 | Array = Other.Array; |
||
333 | Index = Other.Index; |
||
334 | return *this; |
||
335 | } |
||
336 | |||
337 | const T &operator*() const { return Array[Index]; } |
||
338 | const T &operator*() { return Array[Index]; } |
||
339 | |||
340 | bool operator==(const FixedStreamArrayIterator<T> &R) const { |
||
341 | assert(Array == R.Array); |
||
342 | return (Index == R.Index) && (Array == R.Array); |
||
343 | } |
||
344 | |||
345 | FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) { |
||
346 | Index += N; |
||
347 | return *this; |
||
348 | } |
||
349 | |||
350 | FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) { |
||
351 | assert(std::ptrdiff_t(Index) >= N); |
||
352 | Index -= N; |
||
353 | return *this; |
||
354 | } |
||
355 | |||
356 | std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const { |
||
357 | assert(Array == R.Array); |
||
358 | assert(Index >= R.Index); |
||
359 | return Index - R.Index; |
||
360 | } |
||
361 | |||
362 | bool operator<(const FixedStreamArrayIterator<T> &RHS) const { |
||
363 | assert(Array == RHS.Array); |
||
364 | return Index < RHS.Index; |
||
365 | } |
||
366 | |||
367 | private: |
||
368 | FixedStreamArray<T> Array; |
||
369 | uint32_t Index; |
||
370 | }; |
||
371 | |||
372 | } // namespace llvm |
||
373 | |||
374 | #endif // LLVM_SUPPORT_BINARYSTREAMARRAY_H |