Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | //===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===// |
| 2 | // |
||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 6 | // |
||
| 7 | //===----------------------------------------------------------------------===// |
||
| 8 | /// |
||
| 9 | /// \file |
||
| 10 | /// Lightweight arrays that are backed by an arbitrary BinaryStream. This file |
||
| 11 | /// provides two different array implementations. |
||
| 12 | /// |
||
| 13 | /// VarStreamArray - Arrays of variable length records. The user specifies |
||
| 14 | /// an Extractor type that can extract a record from a given offset and |
||
| 15 | /// return the number of bytes consumed by the record. |
||
| 16 | /// |
||
| 17 | /// FixedStreamArray - Arrays of fixed length records. This is similar in |
||
| 18 | /// spirit to ArrayRef<T>, but since it is backed by a BinaryStream, the |
||
| 19 | /// elements of the array need not be laid out in contiguous memory. |
||
| 20 | /// |
||
| 21 | |||
| 22 | #ifndef LLVM_SUPPORT_BINARYSTREAMARRAY_H |
||
| 23 | #define LLVM_SUPPORT_BINARYSTREAMARRAY_H |
||
| 24 | |||
| 25 | #include "llvm/ADT/ArrayRef.h" |
||
| 26 | #include "llvm/ADT/iterator.h" |
||
| 27 | #include "llvm/Support/Alignment.h" |
||
| 28 | #include "llvm/Support/BinaryStreamRef.h" |
||
| 29 | #include "llvm/Support/Error.h" |
||
| 30 | #include <cassert> |
||
| 31 | #include <cstdint> |
||
| 32 | |||
| 33 | namespace llvm { |
||
| 34 | |||
| 35 | /// VarStreamArrayExtractor is intended to be specialized to provide customized |
||
| 36 | /// extraction logic. On input it receives a BinaryStreamRef pointing to the |
||
| 37 | /// beginning of the next record, but where the length of the record is not yet |
||
| 38 | /// known. Upon completion, it should return an appropriate Error instance if |
||
| 39 | /// a record could not be extracted, or if one could be extracted it should |
||
| 40 | /// return success and set Len to the number of bytes this record occupied in |
||
| 41 | /// the underlying stream, and it should fill out the fields of the value type |
||
| 42 | /// Item appropriately to represent the current record. |
||
| 43 | /// |
||
| 44 | /// You can specialize this template for your own custom value types to avoid |
||
| 45 | /// having to specify a second template argument to VarStreamArray (documented |
||
| 46 | /// below). |
||
| 47 | template <typename T> struct VarStreamArrayExtractor { |
||
| 48 | // Method intentionally deleted. You must provide an explicit specialization |
||
| 49 | // with the following method implemented. |
||
| 50 | Error operator()(BinaryStreamRef Stream, uint32_t &Len, |
||
| 51 | T &Item) const = delete; |
||
| 52 | }; |
||
| 53 | |||
| 54 | /// VarStreamArray represents an array of variable length records backed by a |
||
| 55 | /// stream. This could be a contiguous sequence of bytes in memory, it could |
||
| 56 | /// be a file on disk, or it could be a PDB stream where bytes are stored as |
||
| 57 | /// discontiguous blocks in a file. Usually it is desirable to treat arrays |
||
| 58 | /// as contiguous blocks of memory, but doing so with large PDB files, for |
||
| 59 | /// example, could mean allocating huge amounts of memory just to allow |
||
| 60 | /// re-ordering of stream data to be contiguous before iterating over it. By |
||
| 61 | /// abstracting this out, we need not duplicate this memory, and we can |
||
| 62 | /// iterate over arrays in arbitrarily formatted streams. Elements are parsed |
||
| 63 | /// lazily on iteration, so there is no upfront cost associated with building |
||
| 64 | /// or copying a VarStreamArray, no matter how large it may be. |
||
| 65 | /// |
||
| 66 | /// You create a VarStreamArray by specifying a ValueType and an Extractor type. |
||
| 67 | /// If you do not specify an Extractor type, you are expected to specialize |
||
| 68 | /// VarStreamArrayExtractor<T> for your ValueType. |
||
| 69 | /// |
||
| 70 | /// By default an Extractor is default constructed in the class, but in some |
||
| 71 | /// cases you might find it useful for an Extractor to maintain state across |
||
| 72 | /// extractions. In this case you can provide your own Extractor through a |
||
| 73 | /// secondary constructor. The following examples show various ways of |
||
| 74 | /// creating a VarStreamArray. |
||
| 75 | /// |
||
| 76 | /// // Will use VarStreamArrayExtractor<MyType> as the extractor. |
||
| 77 | /// VarStreamArray<MyType> MyTypeArray; |
||
| 78 | /// |
||
| 79 | /// // Will use a default-constructed MyExtractor as the extractor. |
||
| 80 | /// VarStreamArray<MyType, MyExtractor> MyTypeArray2; |
||
| 81 | /// |
||
| 82 | /// // Will use the specific instance of MyExtractor provided. |
||
| 83 | /// // MyExtractor need not be default-constructible in this case. |
||
| 84 | /// MyExtractor E(SomeContext); |
||
| 85 | /// VarStreamArray<MyType, MyExtractor> MyTypeArray3(E); |
||
| 86 | /// |
||
| 87 | |||
| 88 | template <typename ValueType, typename Extractor> class VarStreamArrayIterator; |
||
| 89 | |||
| 90 | template <typename ValueType, |
||
| 91 | typename Extractor = VarStreamArrayExtractor<ValueType>> |
||
| 92 | class VarStreamArray { |
||
| 93 | friend class VarStreamArrayIterator<ValueType, Extractor>; |
||
| 94 | |||
| 95 | public: |
||
| 96 | typedef VarStreamArrayIterator<ValueType, Extractor> Iterator; |
||
| 97 | |||
| 98 | VarStreamArray() = default; |
||
| 99 | |||
| 100 | explicit VarStreamArray(const Extractor &E) : E(E) {} |
||
| 101 | |||
| 102 | explicit VarStreamArray(BinaryStreamRef Stream, uint32_t Skew = 0) |
||
| 103 | : Stream(Stream), Skew(Skew) {} |
||
| 104 | |||
| 105 | VarStreamArray(BinaryStreamRef Stream, const Extractor &E, uint32_t Skew = 0) |
||
| 106 | : Stream(Stream), E(E), Skew(Skew) {} |
||
| 107 | |||
| 108 | Iterator begin(bool *HadError = nullptr) const { |
||
| 109 | return Iterator(*this, E, Skew, nullptr); |
||
| 110 | } |
||
| 111 | |||
| 112 | bool valid() const { return Stream.valid(); } |
||
| 113 | |||
| 114 | bool isOffsetValid(uint32_t Offset) const { return at(Offset) != end(); } |
||
| 115 | |||
| 116 | uint32_t skew() const { return Skew; } |
||
| 117 | Iterator end() const { return Iterator(E); } |
||
| 118 | |||
| 119 | bool empty() const { return Stream.getLength() == 0; } |
||
| 120 | |||
| 121 | VarStreamArray<ValueType, Extractor> substream(uint32_t Begin, |
||
| 122 | uint32_t End) const { |
||
| 123 | assert(Begin >= Skew); |
||
| 124 | // We should never cut off the beginning of the stream since it might be |
||
| 125 | // skewed, meaning the initial bytes are important. |
||
| 126 | BinaryStreamRef NewStream = Stream.slice(0, End); |
||
| 127 | return {NewStream, E, Begin}; |
||
| 128 | } |
||
| 129 | |||
| 130 | /// given an offset into the array's underlying stream, return an |
||
| 131 | /// iterator to the record at that offset. This is considered unsafe |
||
| 132 | /// since the behavior is undefined if \p Offset does not refer to the |
||
| 133 | /// beginning of a valid record. |
||
| 134 | Iterator at(uint32_t Offset) const { |
||
| 135 | return Iterator(*this, E, Offset, nullptr); |
||
| 136 | } |
||
| 137 | |||
| 138 | const Extractor &getExtractor() const { return E; } |
||
| 139 | Extractor &getExtractor() { return E; } |
||
| 140 | |||
| 141 | BinaryStreamRef getUnderlyingStream() const { return Stream; } |
||
| 142 | void setUnderlyingStream(BinaryStreamRef NewStream, uint32_t NewSkew = 0) { |
||
| 143 | Stream = NewStream; |
||
| 144 | Skew = NewSkew; |
||
| 145 | } |
||
| 146 | |||
| 147 | void drop_front() { Skew += begin()->length(); } |
||
| 148 | |||
| 149 | private: |
||
| 150 | BinaryStreamRef Stream; |
||
| 151 | Extractor E; |
||
| 152 | uint32_t Skew = 0; |
||
| 153 | }; |
||
| 154 | |||
| 155 | template <typename ValueType, typename Extractor> |
||
| 156 | class VarStreamArrayIterator |
||
| 157 | : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>, |
||
| 158 | std::forward_iterator_tag, const ValueType> { |
||
| 159 | typedef VarStreamArrayIterator<ValueType, Extractor> IterType; |
||
| 160 | typedef VarStreamArray<ValueType, Extractor> ArrayType; |
||
| 161 | |||
| 162 | public: |
||
| 163 | VarStreamArrayIterator(const ArrayType &Array, const Extractor &E, |
||
| 164 | uint32_t Offset, bool *HadError) |
||
| 165 | : IterRef(Array.Stream.drop_front(Offset)), Extract(E), |
||
| 166 | Array(&Array), AbsOffset(Offset), HadError(HadError) { |
||
| 167 | if (IterRef.getLength() == 0) |
||
| 168 | moveToEnd(); |
||
| 169 | else { |
||
| 170 | auto EC = Extract(IterRef, ThisLen, ThisValue); |
||
| 171 | if (EC) { |
||
| 172 | consumeError(std::move(EC)); |
||
| 173 | markError(); |
||
| 174 | } |
||
| 175 | } |
||
| 176 | } |
||
| 177 | |||
| 178 | VarStreamArrayIterator() = default; |
||
| 179 | explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {} |
||
| 180 | ~VarStreamArrayIterator() = default; |
||
| 181 | |||
| 182 | bool operator==(const IterType &R) const { |
||
| 183 | if (Array && R.Array) { |
||
| 184 | // Both have a valid array, make sure they're same. |
||
| 185 | assert(Array == R.Array); |
||
| 186 | return IterRef == R.IterRef; |
||
| 187 | } |
||
| 188 | |||
| 189 | // Both iterators are at the end. |
||
| 190 | if (!Array && !R.Array) |
||
| 191 | return true; |
||
| 192 | |||
| 193 | // One is not at the end and one is. |
||
| 194 | return false; |
||
| 195 | } |
||
| 196 | |||
| 197 | const ValueType &operator*() const { |
||
| 198 | assert(Array && !HasError); |
||
| 199 | return ThisValue; |
||
| 200 | } |
||
| 201 | |||
| 202 | IterType &operator+=(unsigned N) { |
||
| 203 | for (unsigned I = 0; I < N; ++I) { |
||
| 204 | // We are done with the current record, discard it so that we are |
||
| 205 | // positioned at the next record. |
||
| 206 | AbsOffset += ThisLen; |
||
| 207 | IterRef = IterRef.drop_front(ThisLen); |
||
| 208 | if (IterRef.getLength() == 0) { |
||
| 209 | // There is nothing after the current record, we must make this an end |
||
| 210 | // iterator. |
||
| 211 | moveToEnd(); |
||
| 212 | } else { |
||
| 213 | // There is some data after the current record. |
||
| 214 | auto EC = Extract(IterRef, ThisLen, ThisValue); |
||
| 215 | if (EC) { |
||
| 216 | consumeError(std::move(EC)); |
||
| 217 | markError(); |
||
| 218 | } else if (ThisLen == 0) { |
||
| 219 | // An empty record? Make this an end iterator. |
||
| 220 | moveToEnd(); |
||
| 221 | } |
||
| 222 | } |
||
| 223 | } |
||
| 224 | return *this; |
||
| 225 | } |
||
| 226 | |||
| 227 | uint32_t offset() const { return AbsOffset; } |
||
| 228 | uint32_t getRecordLength() const { return ThisLen; } |
||
| 229 | |||
| 230 | private: |
||
| 231 | void moveToEnd() { |
||
| 232 | Array = nullptr; |
||
| 233 | ThisLen = 0; |
||
| 234 | } |
||
| 235 | void markError() { |
||
| 236 | moveToEnd(); |
||
| 237 | HasError = true; |
||
| 238 | if (HadError != nullptr) |
||
| 239 | *HadError = true; |
||
| 240 | } |
||
| 241 | |||
| 242 | ValueType ThisValue; |
||
| 243 | BinaryStreamRef IterRef; |
||
| 244 | Extractor Extract; |
||
| 245 | const ArrayType *Array{nullptr}; |
||
| 246 | uint32_t ThisLen{0}; |
||
| 247 | uint32_t AbsOffset{0}; |
||
| 248 | bool HasError{false}; |
||
| 249 | bool *HadError{nullptr}; |
||
| 250 | }; |
||
| 251 | |||
| 252 | template <typename T> class FixedStreamArrayIterator; |
||
| 253 | |||
| 254 | /// FixedStreamArray is similar to VarStreamArray, except with each record |
||
| 255 | /// having a fixed-length. As with VarStreamArray, there is no upfront |
||
| 256 | /// cost associated with building or copying a FixedStreamArray, as the |
||
| 257 | /// memory for each element is not read from the backing stream until that |
||
| 258 | /// element is iterated. |
||
| 259 | template <typename T> class FixedStreamArray { |
||
| 260 | friend class FixedStreamArrayIterator<T>; |
||
| 261 | |||
| 262 | public: |
||
| 263 | typedef FixedStreamArrayIterator<T> Iterator; |
||
| 264 | |||
| 265 | FixedStreamArray() = default; |
||
| 266 | explicit FixedStreamArray(BinaryStreamRef Stream) : Stream(Stream) { |
||
| 267 | assert(Stream.getLength() % sizeof(T) == 0); |
||
| 268 | } |
||
| 269 | |||
| 270 | bool operator==(const FixedStreamArray<T> &Other) const { |
||
| 271 | return Stream == Other.Stream; |
||
| 272 | } |
||
| 273 | |||
| 274 | bool operator!=(const FixedStreamArray<T> &Other) const { |
||
| 275 | return !(*this == Other); |
||
| 276 | } |
||
| 277 | |||
| 278 | FixedStreamArray(const FixedStreamArray &) = default; |
||
| 279 | FixedStreamArray &operator=(const FixedStreamArray &) = default; |
||
| 280 | |||
| 281 | const T &operator[](uint32_t Index) const { |
||
| 282 | assert(Index < size()); |
||
| 283 | uint32_t Off = Index * sizeof(T); |
||
| 284 | ArrayRef<uint8_t> Data; |
||
| 285 | if (auto EC = Stream.readBytes(Off, sizeof(T), Data)) { |
||
| 286 | assert(false && "Unexpected failure reading from stream"); |
||
| 287 | // This should never happen since we asserted that the stream length was |
||
| 288 | // an exact multiple of the element size. |
||
| 289 | consumeError(std::move(EC)); |
||
| 290 | } |
||
| 291 | assert(isAddrAligned(Align::Of<T>(), Data.data())); |
||
| 292 | return *reinterpret_cast<const T *>(Data.data()); |
||
| 293 | } |
||
| 294 | |||
| 295 | uint32_t size() const { return Stream.getLength() / sizeof(T); } |
||
| 296 | |||
| 297 | bool empty() const { return size() == 0; } |
||
| 298 | |||
| 299 | FixedStreamArrayIterator<T> begin() const { |
||
| 300 | return FixedStreamArrayIterator<T>(*this, 0); |
||
| 301 | } |
||
| 302 | |||
| 303 | FixedStreamArrayIterator<T> end() const { |
||
| 304 | return FixedStreamArrayIterator<T>(*this, size()); |
||
| 305 | } |
||
| 306 | |||
| 307 | const T &front() const { return *begin(); } |
||
| 308 | const T &back() const { |
||
| 309 | FixedStreamArrayIterator<T> I = end(); |
||
| 310 | return *(--I); |
||
| 311 | } |
||
| 312 | |||
| 313 | BinaryStreamRef getUnderlyingStream() const { return Stream; } |
||
| 314 | |||
| 315 | private: |
||
| 316 | BinaryStreamRef Stream; |
||
| 317 | }; |
||
| 318 | |||
| 319 | template <typename T> |
||
| 320 | class FixedStreamArrayIterator |
||
| 321 | : public iterator_facade_base<FixedStreamArrayIterator<T>, |
||
| 322 | std::random_access_iterator_tag, const T> { |
||
| 323 | |||
| 324 | public: |
||
| 325 | FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index) |
||
| 326 | : Array(Array), Index(Index) {} |
||
| 327 | |||
| 328 | FixedStreamArrayIterator(const FixedStreamArrayIterator<T> &Other) |
||
| 329 | : Array(Other.Array), Index(Other.Index) {} |
||
| 330 | FixedStreamArrayIterator<T> & |
||
| 331 | operator=(const FixedStreamArrayIterator<T> &Other) { |
||
| 332 | Array = Other.Array; |
||
| 333 | Index = Other.Index; |
||
| 334 | return *this; |
||
| 335 | } |
||
| 336 | |||
| 337 | const T &operator*() const { return Array[Index]; } |
||
| 338 | const T &operator*() { return Array[Index]; } |
||
| 339 | |||
| 340 | bool operator==(const FixedStreamArrayIterator<T> &R) const { |
||
| 341 | assert(Array == R.Array); |
||
| 342 | return (Index == R.Index) && (Array == R.Array); |
||
| 343 | } |
||
| 344 | |||
| 345 | FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) { |
||
| 346 | Index += N; |
||
| 347 | return *this; |
||
| 348 | } |
||
| 349 | |||
| 350 | FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) { |
||
| 351 | assert(std::ptrdiff_t(Index) >= N); |
||
| 352 | Index -= N; |
||
| 353 | return *this; |
||
| 354 | } |
||
| 355 | |||
| 356 | std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const { |
||
| 357 | assert(Array == R.Array); |
||
| 358 | assert(Index >= R.Index); |
||
| 359 | return Index - R.Index; |
||
| 360 | } |
||
| 361 | |||
| 362 | bool operator<(const FixedStreamArrayIterator<T> &RHS) const { |
||
| 363 | assert(Array == RHS.Array); |
||
| 364 | return Index < RHS.Index; |
||
| 365 | } |
||
| 366 | |||
| 367 | private: |
||
| 368 | FixedStreamArray<T> Array; |
||
| 369 | uint32_t Index; |
||
| 370 | }; |
||
| 371 | |||
| 372 | } // namespace llvm |
||
| 373 | |||
| 374 | #endif // LLVM_SUPPORT_BINARYSTREAMARRAY_H |