Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // |
||
9 | // This header defines interfaces to read LLVM bitcode files/streams. |
||
10 | // |
||
11 | //===----------------------------------------------------------------------===// |
||
12 | |||
13 | #ifndef LLVM_BITCODE_BITCODEREADER_H |
||
14 | #define LLVM_BITCODE_BITCODEREADER_H |
||
15 | |||
16 | #include "llvm/ADT/ArrayRef.h" |
||
17 | #include "llvm/ADT/StringRef.h" |
||
18 | #include "llvm/Bitstream/BitCodeEnums.h" |
||
19 | #include "llvm/IR/GlobalValue.h" |
||
20 | #include "llvm/Support/Endian.h" |
||
21 | #include "llvm/Support/Error.h" |
||
22 | #include "llvm/Support/ErrorOr.h" |
||
23 | #include "llvm/Support/MemoryBufferRef.h" |
||
24 | #include <cstdint> |
||
25 | #include <memory> |
||
26 | #include <optional> |
||
27 | #include <string> |
||
28 | #include <system_error> |
||
29 | #include <vector> |
||
30 | namespace llvm { |
||
31 | |||
32 | class LLVMContext; |
||
33 | class Module; |
||
34 | class MemoryBuffer; |
||
35 | class Metadata; |
||
36 | class ModuleSummaryIndex; |
||
37 | class Type; |
||
38 | class Value; |
||
39 | |||
40 | // Callback to override the data layout string of an imported bitcode module. |
||
41 | // The first argument is the target triple, the second argument the data layout |
||
42 | // string from the input, or a default string. It will be used if the callback |
||
43 | // returns std::nullopt. |
||
44 | typedef std::function<std::optional<std::string>(StringRef, StringRef)> |
||
45 | DataLayoutCallbackFuncTy; |
||
46 | |||
47 | typedef std::function<Type *(unsigned)> GetTypeByIDTy; |
||
48 | |||
49 | typedef std::function<unsigned(unsigned, unsigned)> GetContainedTypeIDTy; |
||
50 | |||
51 | typedef std::function<void(Value *, unsigned, GetTypeByIDTy, |
||
52 | GetContainedTypeIDTy)> |
||
53 | ValueTypeCallbackTy; |
||
54 | |||
55 | typedef std::function<void(Metadata **, unsigned, GetTypeByIDTy, |
||
56 | GetContainedTypeIDTy)> |
||
57 | MDTypeCallbackTy; |
||
58 | |||
59 | // These functions are for converting Expected/Error values to |
||
60 | // ErrorOr/std::error_code for compatibility with legacy clients. FIXME: |
||
61 | // Remove these functions once no longer needed by the C and libLTO APIs. |
||
62 | |||
63 | std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err); |
||
64 | |||
65 | template <typename T> |
||
66 | ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) { |
||
67 | if (!Val) |
||
68 | return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError()); |
||
69 | return std::move(*Val); |
||
70 | } |
||
71 | |||
72 | struct ParserCallbacks { |
||
73 | std::optional<DataLayoutCallbackFuncTy> DataLayout; |
||
74 | /// The ValueType callback is called for every function definition or |
||
75 | /// declaration and allows accessing the type information, also behind |
||
76 | /// pointers. This can be useful, when the opaque pointer upgrade cleans all |
||
77 | /// type information behind pointers. |
||
78 | /// The second argument to ValueTypeCallback is the type ID of the |
||
79 | /// function, the two passed functions can be used to extract type |
||
80 | /// information. |
||
81 | std::optional<ValueTypeCallbackTy> ValueType; |
||
82 | /// The MDType callback is called for every value in metadata. |
||
83 | std::optional<MDTypeCallbackTy> MDType; |
||
84 | |||
85 | ParserCallbacks() = default; |
||
86 | explicit ParserCallbacks(DataLayoutCallbackFuncTy DataLayout) |
||
87 | : DataLayout(DataLayout) {} |
||
88 | }; |
||
89 | |||
90 | struct BitcodeFileContents; |
||
91 | |||
92 | /// Basic information extracted from a bitcode module to be used for LTO. |
||
93 | struct BitcodeLTOInfo { |
||
94 | bool IsThinLTO; |
||
95 | bool HasSummary; |
||
96 | bool EnableSplitLTOUnit; |
||
97 | }; |
||
98 | |||
99 | /// Represents a module in a bitcode file. |
||
100 | class BitcodeModule { |
||
101 | // This covers the identification (if present) and module blocks. |
||
102 | ArrayRef<uint8_t> Buffer; |
||
103 | StringRef ModuleIdentifier; |
||
104 | |||
105 | // The string table used to interpret this module. |
||
106 | StringRef Strtab; |
||
107 | |||
108 | // The bitstream location of the IDENTIFICATION_BLOCK. |
||
109 | uint64_t IdentificationBit; |
||
110 | |||
111 | // The bitstream location of this module's MODULE_BLOCK. |
||
112 | uint64_t ModuleBit; |
||
113 | |||
114 | BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier, |
||
115 | uint64_t IdentificationBit, uint64_t ModuleBit) |
||
116 | : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier), |
||
117 | IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {} |
||
118 | |||
119 | // Calls the ctor. |
||
120 | friend Expected<BitcodeFileContents> |
||
121 | getBitcodeFileContents(MemoryBufferRef Buffer); |
||
122 | |||
123 | Expected<std::unique_ptr<Module>> |
||
124 | getModuleImpl(LLVMContext &Context, bool MaterializeAll, |
||
125 | bool ShouldLazyLoadMetadata, bool IsImporting, |
||
126 | ParserCallbacks Callbacks = {}); |
||
127 | |||
128 | public: |
||
129 | StringRef getBuffer() const { |
||
130 | return StringRef((const char *)Buffer.begin(), Buffer.size()); |
||
131 | } |
||
132 | |||
133 | StringRef getStrtab() const { return Strtab; } |
||
134 | |||
135 | StringRef getModuleIdentifier() const { return ModuleIdentifier; } |
||
136 | |||
137 | /// Read the bitcode module and prepare for lazy deserialization of function |
||
138 | /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well. |
||
139 | /// If IsImporting is true, this module is being parsed for ThinLTO |
||
140 | /// importing into another module. |
||
141 | Expected<std::unique_ptr<Module>> |
||
142 | getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata, |
||
143 | bool IsImporting, ParserCallbacks Callbacks = {}); |
||
144 | |||
145 | /// Read the entire bitcode module and return it. |
||
146 | Expected<std::unique_ptr<Module>> |
||
147 | parseModule(LLVMContext &Context, ParserCallbacks Callbacks = {}); |
||
148 | |||
149 | /// Returns information about the module to be used for LTO: whether to |
||
150 | /// compile with ThinLTO, and whether it has a summary. |
||
151 | Expected<BitcodeLTOInfo> getLTOInfo(); |
||
152 | |||
153 | /// Parse the specified bitcode buffer, returning the module summary index. |
||
154 | Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary(); |
||
155 | |||
156 | /// Parse the specified bitcode buffer and merge its module summary index |
||
157 | /// into CombinedIndex. |
||
158 | Error |
||
159 | readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, |
||
160 | uint64_t ModuleId, |
||
161 | std::function<bool(GlobalValue::GUID)> IsPrevailing = nullptr); |
||
162 | }; |
||
163 | |||
164 | struct BitcodeFileContents { |
||
165 | std::vector<BitcodeModule> Mods; |
||
166 | StringRef Symtab, StrtabForSymtab; |
||
167 | }; |
||
168 | |||
169 | /// Returns the contents of a bitcode file. This includes the raw contents of |
||
170 | /// the symbol table embedded in the bitcode file. Clients which require a |
||
171 | /// symbol table should prefer to use irsymtab::read instead of this function |
||
172 | /// because it creates a reader for the irsymtab and handles upgrading bitcode |
||
173 | /// files without a symbol table or with an old symbol table. |
||
174 | Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer); |
||
175 | |||
176 | /// Returns a list of modules in the specified bitcode buffer. |
||
177 | Expected<std::vector<BitcodeModule>> |
||
178 | getBitcodeModuleList(MemoryBufferRef Buffer); |
||
179 | |||
180 | /// Read the header of the specified bitcode buffer and prepare for lazy |
||
181 | /// deserialization of function bodies. If ShouldLazyLoadMetadata is true, |
||
182 | /// lazily load metadata as well. If IsImporting is true, this module is |
||
183 | /// being parsed for ThinLTO importing into another module. |
||
184 | Expected<std::unique_ptr<Module>> |
||
185 | getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context, |
||
186 | bool ShouldLazyLoadMetadata = false, |
||
187 | bool IsImporting = false, |
||
188 | ParserCallbacks Callbacks = {}); |
||
189 | |||
190 | /// Like getLazyBitcodeModule, except that the module takes ownership of |
||
191 | /// the memory buffer if successful. If successful, this moves Buffer. On |
||
192 | /// error, this *does not* move Buffer. If IsImporting is true, this module is |
||
193 | /// being parsed for ThinLTO importing into another module. |
||
194 | Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule( |
||
195 | std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context, |
||
196 | bool ShouldLazyLoadMetadata = false, bool IsImporting = false, |
||
197 | ParserCallbacks Callbacks = {}); |
||
198 | |||
199 | /// Read the header of the specified bitcode buffer and extract just the |
||
200 | /// triple information. If successful, this returns a string. On error, this |
||
201 | /// returns "". |
||
202 | Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer); |
||
203 | |||
204 | /// Return true if \p Buffer contains a bitcode file with ObjC code (category |
||
205 | /// or class) in it. |
||
206 | Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer); |
||
207 | |||
208 | /// Read the header of the specified bitcode buffer and extract just the |
||
209 | /// producer string information. If successful, this returns a string. On |
||
210 | /// error, this returns "". |
||
211 | Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer); |
||
212 | |||
213 | /// Read the specified bitcode file, returning the module. |
||
214 | Expected<std::unique_ptr<Module>> |
||
215 | parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, |
||
216 | ParserCallbacks Callbacks = {}); |
||
217 | |||
218 | /// Returns LTO information for the specified bitcode file. |
||
219 | Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer); |
||
220 | |||
221 | /// Parse the specified bitcode buffer, returning the module summary index. |
||
222 | Expected<std::unique_ptr<ModuleSummaryIndex>> |
||
223 | getModuleSummaryIndex(MemoryBufferRef Buffer); |
||
224 | |||
225 | /// Parse the specified bitcode buffer and merge the index into CombinedIndex. |
||
226 | Error readModuleSummaryIndex(MemoryBufferRef Buffer, |
||
227 | ModuleSummaryIndex &CombinedIndex, |
||
228 | uint64_t ModuleId); |
||
229 | |||
230 | /// Parse the module summary index out of an IR file and return the module |
||
231 | /// summary index object if found, or an empty summary if not. If Path refers |
||
232 | /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then |
||
233 | /// this function will return nullptr. |
||
234 | Expected<std::unique_ptr<ModuleSummaryIndex>> |
||
235 | getModuleSummaryIndexForFile(StringRef Path, |
||
236 | bool IgnoreEmptyThinLTOIndexFile = false); |
||
237 | |||
238 | /// isBitcodeWrapper - Return true if the given bytes are the magic bytes |
||
239 | /// for an LLVM IR bitcode wrapper. |
||
240 | inline bool isBitcodeWrapper(const unsigned char *BufPtr, |
||
241 | const unsigned char *BufEnd) { |
||
242 | // See if you can find the hidden message in the magic bytes :-). |
||
243 | // (Hint: it's a little-endian encoding.) |
||
244 | return BufPtr != BufEnd && |
||
245 | BufPtr[0] == 0xDE && |
||
246 | BufPtr[1] == 0xC0 && |
||
247 | BufPtr[2] == 0x17 && |
||
248 | BufPtr[3] == 0x0B; |
||
249 | } |
||
250 | |||
251 | /// isRawBitcode - Return true if the given bytes are the magic bytes for |
||
252 | /// raw LLVM IR bitcode (without a wrapper). |
||
253 | inline bool isRawBitcode(const unsigned char *BufPtr, |
||
254 | const unsigned char *BufEnd) { |
||
255 | // These bytes sort of have a hidden message, but it's not in |
||
256 | // little-endian this time, and it's a little redundant. |
||
257 | return BufPtr != BufEnd && |
||
258 | BufPtr[0] == 'B' && |
||
259 | BufPtr[1] == 'C' && |
||
260 | BufPtr[2] == 0xc0 && |
||
261 | BufPtr[3] == 0xde; |
||
262 | } |
||
263 | |||
264 | /// isBitcode - Return true if the given bytes are the magic bytes for |
||
265 | /// LLVM IR bitcode, either with or without a wrapper. |
||
266 | inline bool isBitcode(const unsigned char *BufPtr, |
||
267 | const unsigned char *BufEnd) { |
||
268 | return isBitcodeWrapper(BufPtr, BufEnd) || |
||
269 | isRawBitcode(BufPtr, BufEnd); |
||
270 | } |
||
271 | |||
272 | /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special |
||
273 | /// header for padding or other reasons. The format of this header is: |
||
274 | /// |
||
275 | /// struct bc_header { |
||
276 | /// uint32_t Magic; // 0x0B17C0DE |
||
277 | /// uint32_t Version; // Version, currently always 0. |
||
278 | /// uint32_t BitcodeOffset; // Offset to traditional bitcode file. |
||
279 | /// uint32_t BitcodeSize; // Size of traditional bitcode file. |
||
280 | /// ... potentially other gunk ... |
||
281 | /// }; |
||
282 | /// |
||
283 | /// This function is called when we find a file with a matching magic number. |
||
284 | /// In this case, skip down to the subsection of the file that is actually a |
||
285 | /// BC file. |
||
286 | /// If 'VerifyBufferSize' is true, check that the buffer is large enough to |
||
287 | /// contain the whole bitcode file. |
||
288 | inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr, |
||
289 | const unsigned char *&BufEnd, |
||
290 | bool VerifyBufferSize) { |
||
291 | // Must contain the offset and size field! |
||
292 | if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4) |
||
293 | return true; |
||
294 | |||
295 | unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]); |
||
296 | unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]); |
||
297 | uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size; |
||
298 | |||
299 | // Verify that Offset+Size fits in the file. |
||
300 | if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr)) |
||
301 | return true; |
||
302 | BufPtr += Offset; |
||
303 | BufEnd = BufPtr+Size; |
||
304 | return false; |
||
305 | } |
||
306 | |||
307 | APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits); |
||
308 | |||
309 | const std::error_category &BitcodeErrorCategory(); |
||
310 | enum class BitcodeError { CorruptedBitcode = 1 }; |
||
311 | inline std::error_code make_error_code(BitcodeError E) { |
||
312 | return std::error_code(static_cast<int>(E), BitcodeErrorCategory()); |
||
313 | } |
||
314 | |||
315 | } // end namespace llvm |
||
316 | |||
317 | namespace std { |
||
318 | |||
319 | template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {}; |
||
320 | |||
321 | } // end namespace std |
||
322 | |||
323 | #endif // LLVM_BITCODE_BITCODEREADER_H |