Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
//===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This header defines the BitstreamReader class.  This class can be used to
10
// read an arbitrary bitstream, regardless of its contents.
11
//
12
//===----------------------------------------------------------------------===//
13
 
14
#ifndef LLVM_BITSTREAM_BITSTREAMREADER_H
15
#define LLVM_BITSTREAM_BITSTREAMREADER_H
16
 
17
#include "llvm/ADT/ArrayRef.h"
18
#include "llvm/ADT/SmallVector.h"
19
#include "llvm/Bitstream/BitCodes.h"
20
#include "llvm/Support/Endian.h"
21
#include "llvm/Support/Error.h"
22
#include "llvm/Support/MemoryBufferRef.h"
23
#include <algorithm>
24
#include <cassert>
25
#include <climits>
26
#include <cstddef>
27
#include <cstdint>
28
#include <memory>
29
#include <optional>
30
#include <string>
31
#include <utility>
32
#include <vector>
33
 
34
namespace llvm {
35
 
36
/// This class maintains the abbreviations read from a block info block.
37
class BitstreamBlockInfo {
38
public:
39
  /// This contains information emitted to BLOCKINFO_BLOCK blocks. These
40
  /// describe abbreviations that all blocks of the specified ID inherit.
41
  struct BlockInfo {
42
    unsigned BlockID = 0;
43
    std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs;
44
    std::string Name;
45
    std::vector<std::pair<unsigned, std::string>> RecordNames;
46
  };
47
 
48
private:
49
  std::vector<BlockInfo> BlockInfoRecords;
50
 
51
public:
52
  /// If there is block info for the specified ID, return it, otherwise return
53
  /// null.
54
  const BlockInfo *getBlockInfo(unsigned BlockID) const {
55
    // Common case, the most recent entry matches BlockID.
56
    if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
57
      return &BlockInfoRecords.back();
58
 
59
    for (const BlockInfo &BI : BlockInfoRecords)
60
      if (BI.BlockID == BlockID)
61
        return &BI;
62
    return nullptr;
63
  }
64
 
65
  BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
66
    if (const BlockInfo *BI = getBlockInfo(BlockID))
67
      return *const_cast<BlockInfo*>(BI);
68
 
69
    // Otherwise, add a new record.
70
    BlockInfoRecords.emplace_back();
71
    BlockInfoRecords.back().BlockID = BlockID;
72
    return BlockInfoRecords.back();
73
  }
74
};
75
 
76
/// This represents a position within a bitstream. There may be multiple
77
/// independent cursors reading within one bitstream, each maintaining their
78
/// own local state.
79
class SimpleBitstreamCursor {
80
  ArrayRef<uint8_t> BitcodeBytes;
81
  size_t NextChar = 0;
82
 
83
public:
84
  /// This is the current data we have pulled from the stream but have not
85
  /// returned to the client. This is specifically and intentionally defined to
86
  /// follow the word size of the host machine for efficiency. We use word_t in
87
  /// places that are aware of this to make it perfectly explicit what is going
88
  /// on.
89
  using word_t = size_t;
90
 
91
private:
92
  word_t CurWord = 0;
93
 
94
  /// This is the number of bits in CurWord that are valid. This is always from
95
  /// [0...bits_of(size_t)-1] inclusive.
96
  unsigned BitsInCurWord = 0;
97
 
98
public:
99
  SimpleBitstreamCursor() = default;
100
  explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
101
      : BitcodeBytes(BitcodeBytes) {}
102
  explicit SimpleBitstreamCursor(StringRef BitcodeBytes)
103
      : BitcodeBytes(arrayRefFromStringRef(BitcodeBytes)) {}
104
  explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes)
105
      : SimpleBitstreamCursor(BitcodeBytes.getBuffer()) {}
106
 
107
  bool canSkipToPos(size_t pos) const {
108
    // pos can be skipped to if it is a valid address or one byte past the end.
109
    return pos <= BitcodeBytes.size();
110
  }
111
 
112
  bool AtEndOfStream() {
113
    return BitsInCurWord == 0 && BitcodeBytes.size() <= NextChar;
114
  }
115
 
116
  /// Return the bit # of the bit we are reading.
117
  uint64_t GetCurrentBitNo() const {
118
    return NextChar*CHAR_BIT - BitsInCurWord;
119
  }
120
 
121
  // Return the byte # of the current bit.
122
  uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; }
123
 
124
  ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; }
125
 
126
  /// Reset the stream to the specified bit number.
127
  Error JumpToBit(uint64_t BitNo) {
128
    size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1);
129
    unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
130
    assert(canSkipToPos(ByteNo) && "Invalid location");
131
 
132
    // Move the cursor to the right word.
133
    NextChar = ByteNo;
134
    BitsInCurWord = 0;
135
 
136
    // Skip over any bits that are already consumed.
137
    if (WordBitNo) {
138
      if (Expected<word_t> Res = Read(WordBitNo))
139
        return Error::success();
140
      else
141
        return Res.takeError();
142
    }
143
 
144
    return Error::success();
145
  }
146
 
147
  /// Get a pointer into the bitstream at the specified byte offset.
148
  const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) {
149
    return BitcodeBytes.data() + ByteNo;
150
  }
151
 
152
  /// Get a pointer into the bitstream at the specified bit offset.
153
  ///
154
  /// The bit offset must be on a byte boundary.
155
  const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) {
156
    assert(!(BitNo % 8) && "Expected bit on byte boundary");
157
    return getPointerToByte(BitNo / 8, NumBytes);
158
  }
159
 
160
  Error fillCurWord() {
161
    if (NextChar >= BitcodeBytes.size())
162
      return createStringError(std::errc::io_error,
163
                               "Unexpected end of file reading %u of %u bytes",
164
                               NextChar, BitcodeBytes.size());
165
 
166
    // Read the next word from the stream.
167
    const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar;
168
    unsigned BytesRead;
169
    if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) {
170
      BytesRead = sizeof(word_t);
171
      CurWord =
172
          support::endian::read<word_t, support::little, support::unaligned>(
173
              NextCharPtr);
174
    } else {
175
      // Short read.
176
      BytesRead = BitcodeBytes.size() - NextChar;
177
      CurWord = 0;
178
      for (unsigned B = 0; B != BytesRead; ++B)
179
        CurWord |= uint64_t(NextCharPtr[B]) << (B * 8);
180
    }
181
    NextChar += BytesRead;
182
    BitsInCurWord = BytesRead * 8;
183
    return Error::success();
184
  }
185
 
186
  Expected<word_t> Read(unsigned NumBits) {
187
    static const unsigned BitsInWord = sizeof(word_t) * 8;
188
 
189
    assert(NumBits && NumBits <= BitsInWord &&
190
           "Cannot return zero or more than BitsInWord bits!");
191
 
192
    static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f;
193
 
194
    // If the field is fully contained by CurWord, return it quickly.
195
    if (BitsInCurWord >= NumBits) {
196
      word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits));
197
 
198
      // Use a mask to avoid undefined behavior.
199
      CurWord >>= (NumBits & Mask);
200
 
201
      BitsInCurWord -= NumBits;
202
      return R;
203
    }
204
 
205
    word_t R = BitsInCurWord ? CurWord : 0;
206
    unsigned BitsLeft = NumBits - BitsInCurWord;
207
 
208
    if (Error fillResult = fillCurWord())
209
      return std::move(fillResult);
210
 
211
    // If we run out of data, abort.
212
    if (BitsLeft > BitsInCurWord)
213
      return createStringError(std::errc::io_error,
214
                               "Unexpected end of file reading %u of %u bits",
215
                               BitsInCurWord, BitsLeft);
216
 
217
    word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft));
218
 
219
    // Use a mask to avoid undefined behavior.
220
    CurWord >>= (BitsLeft & Mask);
221
 
222
    BitsInCurWord -= BitsLeft;
223
 
224
    R |= R2 << (NumBits - BitsLeft);
225
 
226
    return R;
227
  }
228
 
229
  Expected<uint32_t> ReadVBR(const unsigned NumBits) {
230
    Expected<unsigned> MaybeRead = Read(NumBits);
231
    if (!MaybeRead)
232
      return MaybeRead;
233
    uint32_t Piece = MaybeRead.get();
234
 
235
    assert(NumBits <= 32 && NumBits >= 1 && "Invalid NumBits value");
236
    const uint32_t MaskBitOrder = (NumBits - 1);
237
    const uint32_t Mask = 1UL << MaskBitOrder;
238
 
239
    if ((Piece & Mask) == 0)
240
      return Piece;
241
 
242
    uint32_t Result = 0;
243
    unsigned NextBit = 0;
244
    while (true) {
245
      Result |= (Piece & (Mask - 1)) << NextBit;
246
 
247
      if ((Piece & Mask) == 0)
248
        return Result;
249
 
250
      NextBit += NumBits-1;
251
      if (NextBit >= 32)
252
        return createStringError(std::errc::illegal_byte_sequence,
253
                                 "Unterminated VBR");
254
 
255
      MaybeRead = Read(NumBits);
256
      if (!MaybeRead)
257
        return MaybeRead;
258
      Piece = MaybeRead.get();
259
    }
260
  }
261
 
262
  // Read a VBR that may have a value up to 64-bits in size. The chunk size of
263
  // the VBR must still be <= 32 bits though.
264
  Expected<uint64_t> ReadVBR64(const unsigned NumBits) {
265
    Expected<uint64_t> MaybeRead = Read(NumBits);
266
    if (!MaybeRead)
267
      return MaybeRead;
268
    uint32_t Piece = MaybeRead.get();
269
    assert(NumBits <= 32 && NumBits >= 1 && "Invalid NumBits value");
270
    const uint32_t MaskBitOrder = (NumBits - 1);
271
    const uint32_t Mask = 1UL << MaskBitOrder;
272
 
273
    if ((Piece & Mask) == 0)
274
      return uint64_t(Piece);
275
 
276
    uint64_t Result = 0;
277
    unsigned NextBit = 0;
278
    while (true) {
279
      Result |= uint64_t(Piece & (Mask - 1)) << NextBit;
280
 
281
      if ((Piece & Mask) == 0)
282
        return Result;
283
 
284
      NextBit += NumBits-1;
285
      if (NextBit >= 64)
286
        return createStringError(std::errc::illegal_byte_sequence,
287
                                 "Unterminated VBR");
288
 
289
      MaybeRead = Read(NumBits);
290
      if (!MaybeRead)
291
        return MaybeRead;
292
      Piece = MaybeRead.get();
293
    }
294
  }
295
 
296
  void SkipToFourByteBoundary() {
297
    // If word_t is 64-bits and if we've read less than 32 bits, just dump
298
    // the bits we have up to the next 32-bit boundary.
299
    if (sizeof(word_t) > 4 &&
300
        BitsInCurWord >= 32) {
301
      CurWord >>= BitsInCurWord-32;
302
      BitsInCurWord = 32;
303
      return;
304
    }
305
 
306
    BitsInCurWord = 0;
307
  }
308
 
309
  /// Return the size of the stream in bytes.
310
  size_t SizeInBytes() const { return BitcodeBytes.size(); }
311
 
312
  /// Skip to the end of the file.
313
  void skipToEnd() { NextChar = BitcodeBytes.size(); }
314
 
315
  /// Check whether a reservation of Size elements is plausible.
316
  bool isSizePlausible(size_t Size) const {
317
    // Don't allow reserving more elements than the number of bits, assuming
318
    // at least one bit is needed to encode an element.
319
    return Size < BitcodeBytes.size() * 8;
320
  }
321
};
322
 
323
/// When advancing through a bitstream cursor, each advance can discover a few
324
/// different kinds of entries:
325
struct BitstreamEntry {
326
  enum {
327
    Error,    // Malformed bitcode was found.
328
    EndBlock, // We've reached the end of the current block, (or the end of the
329
              // file, which is treated like a series of EndBlock records.
330
    SubBlock, // This is the start of a new subblock of a specific ID.
331
    Record    // This is a record with a specific AbbrevID.
332
  } Kind;
333
 
334
  unsigned ID;
335
 
336
  static BitstreamEntry getError() {
337
    BitstreamEntry E; E.Kind = Error; return E;
338
  }
339
 
340
  static BitstreamEntry getEndBlock() {
341
    BitstreamEntry E; E.Kind = EndBlock; return E;
342
  }
343
 
344
  static BitstreamEntry getSubBlock(unsigned ID) {
345
    BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
346
  }
347
 
348
  static BitstreamEntry getRecord(unsigned AbbrevID) {
349
    BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
350
  }
351
};
352
 
353
/// This represents a position within a bitcode file, implemented on top of a
354
/// SimpleBitstreamCursor.
355
///
356
/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
357
/// be passed by value.
358
class BitstreamCursor : SimpleBitstreamCursor {
359
  // This is the declared size of code values used for the current block, in
360
  // bits.
361
  unsigned CurCodeSize = 2;
362
 
363
  /// Abbrevs installed at in this block.
364
  std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;
365
 
366
  struct Block {
367
    unsigned PrevCodeSize;
368
    std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs;
369
 
370
    explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
371
  };
372
 
373
  /// This tracks the codesize of parent blocks.
374
  SmallVector<Block, 8> BlockScope;
375
 
376
  BitstreamBlockInfo *BlockInfo = nullptr;
377
 
378
public:
379
  static const size_t MaxChunkSize = 32;
380
 
381
  BitstreamCursor() = default;
382
  explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
383
      : SimpleBitstreamCursor(BitcodeBytes) {}
384
  explicit BitstreamCursor(StringRef BitcodeBytes)
385
      : SimpleBitstreamCursor(BitcodeBytes) {}
386
  explicit BitstreamCursor(MemoryBufferRef BitcodeBytes)
387
      : SimpleBitstreamCursor(BitcodeBytes) {}
388
 
389
  using SimpleBitstreamCursor::AtEndOfStream;
390
  using SimpleBitstreamCursor::canSkipToPos;
391
  using SimpleBitstreamCursor::fillCurWord;
392
  using SimpleBitstreamCursor::getBitcodeBytes;
393
  using SimpleBitstreamCursor::GetCurrentBitNo;
394
  using SimpleBitstreamCursor::getCurrentByteNo;
395
  using SimpleBitstreamCursor::getPointerToByte;
396
  using SimpleBitstreamCursor::JumpToBit;
397
  using SimpleBitstreamCursor::Read;
398
  using SimpleBitstreamCursor::ReadVBR;
399
  using SimpleBitstreamCursor::ReadVBR64;
400
  using SimpleBitstreamCursor::SizeInBytes;
401
  using SimpleBitstreamCursor::skipToEnd;
402
 
403
  /// Return the number of bits used to encode an abbrev #.
404
  unsigned getAbbrevIDWidth() const { return CurCodeSize; }
405
 
406
  /// Flags that modify the behavior of advance().
407
  enum {
408
    /// If this flag is used, the advance() method does not automatically pop
409
    /// the block scope when the end of a block is reached.
410
    AF_DontPopBlockAtEnd = 1,
411
 
412
    /// If this flag is used, abbrev entries are returned just like normal
413
    /// records.
414
    AF_DontAutoprocessAbbrevs = 2
415
  };
416
 
417
  /// Advance the current bitstream, returning the next entry in the stream.
418
  Expected<BitstreamEntry> advance(unsigned Flags = 0) {
419
    while (true) {
420
      if (AtEndOfStream())
421
        return BitstreamEntry::getError();
422
 
423
      Expected<unsigned> MaybeCode = ReadCode();
424
      if (!MaybeCode)
425
        return MaybeCode.takeError();
426
      unsigned Code = MaybeCode.get();
427
 
428
      if (Code == bitc::END_BLOCK) {
429
        // Pop the end of the block unless Flags tells us not to.
430
        if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
431
          return BitstreamEntry::getError();
432
        return BitstreamEntry::getEndBlock();
433
      }
434
 
435
      if (Code == bitc::ENTER_SUBBLOCK) {
436
        if (Expected<unsigned> MaybeSubBlock = ReadSubBlockID())
437
          return BitstreamEntry::getSubBlock(MaybeSubBlock.get());
438
        else
439
          return MaybeSubBlock.takeError();
440
      }
441
 
442
      if (Code == bitc::DEFINE_ABBREV &&
443
          !(Flags & AF_DontAutoprocessAbbrevs)) {
444
        // We read and accumulate abbrev's, the client can't do anything with
445
        // them anyway.
446
        if (Error Err = ReadAbbrevRecord())
447
          return std::move(Err);
448
        continue;
449
      }
450
 
451
      return BitstreamEntry::getRecord(Code);
452
    }
453
  }
454
 
455
  /// This is a convenience function for clients that don't expect any
456
  /// subblocks. This just skips over them automatically.
457
  Expected<BitstreamEntry> advanceSkippingSubblocks(unsigned Flags = 0) {
458
    while (true) {
459
      // If we found a normal entry, return it.
460
      Expected<BitstreamEntry> MaybeEntry = advance(Flags);
461
      if (!MaybeEntry)
462
        return MaybeEntry;
463
      BitstreamEntry Entry = MaybeEntry.get();
464
 
465
      if (Entry.Kind != BitstreamEntry::SubBlock)
466
        return Entry;
467
 
468
      // If we found a sub-block, just skip over it and check the next entry.
469
      if (Error Err = SkipBlock())
470
        return std::move(Err);
471
    }
472
  }
473
 
474
  Expected<unsigned> ReadCode() { return Read(CurCodeSize); }
475
 
476
  // Block header:
477
  //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
478
 
479
  /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block.
480
  Expected<unsigned> ReadSubBlockID() { return ReadVBR(bitc::BlockIDWidth); }
481
 
482
  /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body
483
  /// of this block.
484
  Error SkipBlock() {
485
    // Read and ignore the codelen value.
486
    if (Expected<uint32_t> Res = ReadVBR(bitc::CodeLenWidth))
487
      ; // Since we are skipping this block, we don't care what code widths are
488
        // used inside of it.
489
    else
490
      return Res.takeError();
491
 
492
    SkipToFourByteBoundary();
493
    Expected<unsigned> MaybeNum = Read(bitc::BlockSizeWidth);
494
    if (!MaybeNum)
495
      return MaybeNum.takeError();
496
    size_t NumFourBytes = MaybeNum.get();
497
 
498
    // Check that the block wasn't partially defined, and that the offset isn't
499
    // bogus.
500
    size_t SkipTo = GetCurrentBitNo() + NumFourBytes * 4 * 8;
501
    if (AtEndOfStream())
502
      return createStringError(std::errc::illegal_byte_sequence,
503
                               "can't skip block: already at end of stream");
504
    if (!canSkipToPos(SkipTo / 8))
505
      return createStringError(std::errc::illegal_byte_sequence,
506
                               "can't skip to bit %zu from %" PRIu64, SkipTo,
507
                               GetCurrentBitNo());
508
 
509
    if (Error Res = JumpToBit(SkipTo))
510
      return Res;
511
 
512
    return Error::success();
513
  }
514
 
515
  /// Having read the ENTER_SUBBLOCK abbrevid, and enter the block.
516
  Error EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
517
 
518
  bool ReadBlockEnd() {
519
    if (BlockScope.empty()) return true;
520
 
521
    // Block tail:
522
    //    [END_BLOCK, <align4bytes>]
523
    SkipToFourByteBoundary();
524
 
525
    popBlockScope();
526
    return false;
527
  }
528
 
529
private:
530
  void popBlockScope() {
531
    CurCodeSize = BlockScope.back().PrevCodeSize;
532
 
533
    CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs);
534
    BlockScope.pop_back();
535
  }
536
 
537
  //===--------------------------------------------------------------------===//
538
  // Record Processing
539
  //===--------------------------------------------------------------------===//
540
 
541
public:
542
  /// Return the abbreviation for the specified AbbrevId.
543
  Expected<const BitCodeAbbrev *> getAbbrev(unsigned AbbrevID) {
544
    unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV;
545
    if (AbbrevNo >= CurAbbrevs.size())
546
      return createStringError(
547
          std::errc::illegal_byte_sequence, "Invalid abbrev number");
548
    return CurAbbrevs[AbbrevNo].get();
549
  }
550
 
551
  /// Read the current record and discard it, returning the code for the record.
552
  Expected<unsigned> skipRecord(unsigned AbbrevID);
553
 
554
  Expected<unsigned> readRecord(unsigned AbbrevID,
555
                                SmallVectorImpl<uint64_t> &Vals,
556
                                StringRef *Blob = nullptr);
557
 
558
  //===--------------------------------------------------------------------===//
559
  // Abbrev Processing
560
  //===--------------------------------------------------------------------===//
561
  Error ReadAbbrevRecord();
562
 
563
  /// Read and return a block info block from the bitstream. If an error was
564
  /// encountered, return std::nullopt.
565
  ///
566
  /// \param ReadBlockInfoNames Whether to read block/record name information in
567
  /// the BlockInfo block. Only llvm-bcanalyzer uses this.
568
  Expected<std::optional<BitstreamBlockInfo>>
569
  ReadBlockInfoBlock(bool ReadBlockInfoNames = false);
570
 
571
  /// Set the block info to be used by this BitstreamCursor to interpret
572
  /// abbreviated records.
573
  void setBlockInfo(BitstreamBlockInfo *BI) { BlockInfo = BI; }
574
};
575
 
576
} // end llvm namespace
577
 
578
#endif // LLVM_BITSTREAM_BITSTREAMREADER_H