Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===- IRSymtab.h - data definitions for IR symbol tables -------*- C++ -*-===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // |
||
9 | // This file contains data definitions and a reader and builder for a symbol |
||
10 | // table for LLVM IR. Its purpose is to allow linkers and other consumers of |
||
11 | // bitcode files to efficiently read the symbol table for symbol resolution |
||
12 | // purposes without needing to construct a module in memory. |
||
13 | // |
||
14 | // As with most object files the symbol table has two parts: the symbol table |
||
15 | // itself and a string table which is referenced by the symbol table. |
||
16 | // |
||
17 | // A symbol table corresponds to a single bitcode file, which may consist of |
||
18 | // multiple modules, so symbol tables may likewise contain symbols for multiple |
||
19 | // modules. |
||
20 | // |
||
21 | //===----------------------------------------------------------------------===// |
||
22 | |||
23 | #ifndef LLVM_OBJECT_IRSYMTAB_H |
||
24 | #define LLVM_OBJECT_IRSYMTAB_H |
||
25 | |||
26 | #include "llvm/ADT/ArrayRef.h" |
||
27 | #include "llvm/ADT/StringRef.h" |
||
28 | #include "llvm/ADT/iterator_range.h" |
||
29 | #include "llvm/IR/Comdat.h" |
||
30 | #include "llvm/IR/GlobalValue.h" |
||
31 | #include "llvm/Object/SymbolicFile.h" |
||
32 | #include "llvm/Support/Allocator.h" |
||
33 | #include "llvm/Support/Endian.h" |
||
34 | #include "llvm/Support/Error.h" |
||
35 | #include <cassert> |
||
36 | #include <cstdint> |
||
37 | #include <vector> |
||
38 | |||
39 | namespace llvm { |
||
40 | |||
41 | struct BitcodeFileContents; |
||
42 | class StringTableBuilder; |
||
43 | |||
44 | namespace irsymtab { |
||
45 | |||
46 | namespace storage { |
||
47 | |||
48 | // The data structures in this namespace define the low-level serialization |
||
49 | // format. Clients that just want to read a symbol table should use the |
||
50 | // irsymtab::Reader class. |
||
51 | |||
52 | using Word = support::ulittle32_t; |
||
53 | |||
54 | /// A reference to a string in the string table. |
||
55 | struct Str { |
||
56 | Word Offset, Size; |
||
57 | |||
58 | StringRef get(StringRef Strtab) const { |
||
59 | return {Strtab.data() + Offset, Size}; |
||
60 | } |
||
61 | }; |
||
62 | |||
63 | /// A reference to a range of objects in the symbol table. |
||
64 | template <typename T> struct Range { |
||
65 | Word Offset, Size; |
||
66 | |||
67 | ArrayRef<T> get(StringRef Symtab) const { |
||
68 | return {reinterpret_cast<const T *>(Symtab.data() + Offset), Size}; |
||
69 | } |
||
70 | }; |
||
71 | |||
72 | /// Describes the range of a particular module's symbols within the symbol |
||
73 | /// table. |
||
74 | struct Module { |
||
75 | Word Begin, End; |
||
76 | |||
77 | /// The index of the first Uncommon for this Module. |
||
78 | Word UncBegin; |
||
79 | }; |
||
80 | |||
81 | /// This is equivalent to an IR comdat. |
||
82 | struct Comdat { |
||
83 | Str Name; |
||
84 | |||
85 | // llvm::Comdat::SelectionKind |
||
86 | Word SelectionKind; |
||
87 | }; |
||
88 | |||
89 | /// Contains the information needed by linkers for symbol resolution, as well as |
||
90 | /// by the LTO implementation itself. |
||
91 | struct Symbol { |
||
92 | /// The mangled symbol name. |
||
93 | Str Name; |
||
94 | |||
95 | /// The unmangled symbol name, or the empty string if this is not an IR |
||
96 | /// symbol. |
||
97 | Str IRName; |
||
98 | |||
99 | /// The index into Header::Comdats, or -1 if not a comdat member. |
||
100 | Word ComdatIndex; |
||
101 | |||
102 | Word Flags; |
||
103 | enum FlagBits { |
||
104 | FB_visibility, // 2 bits |
||
105 | FB_has_uncommon = FB_visibility + 2, |
||
106 | FB_undefined, |
||
107 | FB_weak, |
||
108 | FB_common, |
||
109 | FB_indirect, |
||
110 | FB_used, |
||
111 | FB_tls, |
||
112 | FB_may_omit, |
||
113 | FB_global, |
||
114 | FB_format_specific, |
||
115 | FB_unnamed_addr, |
||
116 | FB_executable, |
||
117 | }; |
||
118 | }; |
||
119 | |||
120 | /// This data structure contains rarely used symbol fields and is optionally |
||
121 | /// referenced by a Symbol. |
||
122 | struct Uncommon { |
||
123 | Word CommonSize, CommonAlign; |
||
124 | |||
125 | /// COFF-specific: the name of the symbol that a weak external resolves to |
||
126 | /// if not defined. |
||
127 | Str COFFWeakExternFallbackName; |
||
128 | |||
129 | /// Specified section name, if any. |
||
130 | Str SectionName; |
||
131 | }; |
||
132 | |||
133 | |||
134 | struct Header { |
||
135 | /// Version number of the symtab format. This number should be incremented |
||
136 | /// when the format changes, but it does not need to be incremented if a |
||
137 | /// change to LLVM would cause it to create a different symbol table. |
||
138 | Word Version; |
||
139 | enum { kCurrentVersion = 3 }; |
||
140 | |||
141 | /// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION). |
||
142 | /// Consumers should rebuild the symbol table from IR if the producer's |
||
143 | /// version does not match the consumer's version due to potential differences |
||
144 | /// in symbol table format, symbol enumeration order and so on. |
||
145 | Str Producer; |
||
146 | |||
147 | Range<Module> Modules; |
||
148 | Range<Comdat> Comdats; |
||
149 | Range<Symbol> Symbols; |
||
150 | Range<Uncommon> Uncommons; |
||
151 | |||
152 | Str TargetTriple, SourceFileName; |
||
153 | |||
154 | /// COFF-specific: linker directives. |
||
155 | Str COFFLinkerOpts; |
||
156 | |||
157 | /// Dependent Library Specifiers |
||
158 | Range<Str> DependentLibraries; |
||
159 | }; |
||
160 | |||
161 | } // end namespace storage |
||
162 | |||
163 | /// Fills in Symtab and StrtabBuilder with a valid symbol and string table for |
||
164 | /// Mods. |
||
165 | Error build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab, |
||
166 | StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc); |
||
167 | |||
168 | /// This represents a symbol that has been read from a storage::Symbol and |
||
169 | /// possibly a storage::Uncommon. |
||
170 | struct Symbol { |
||
171 | // Copied from storage::Symbol. |
||
172 | StringRef Name, IRName; |
||
173 | int ComdatIndex; |
||
174 | uint32_t Flags; |
||
175 | |||
176 | // Copied from storage::Uncommon. |
||
177 | uint32_t CommonSize, CommonAlign; |
||
178 | StringRef COFFWeakExternFallbackName; |
||
179 | StringRef SectionName; |
||
180 | |||
181 | /// Returns the mangled symbol name. |
||
182 | StringRef getName() const { return Name; } |
||
183 | |||
184 | /// Returns the unmangled symbol name, or the empty string if this is not an |
||
185 | /// IR symbol. |
||
186 | StringRef getIRName() const { return IRName; } |
||
187 | |||
188 | /// Returns the index into the comdat table (see Reader::getComdatTable()), or |
||
189 | /// -1 if not a comdat member. |
||
190 | int getComdatIndex() const { return ComdatIndex; } |
||
191 | |||
192 | using S = storage::Symbol; |
||
193 | |||
194 | GlobalValue::VisibilityTypes getVisibility() const { |
||
195 | return GlobalValue::VisibilityTypes((Flags >> S::FB_visibility) & 3); |
||
196 | } |
||
197 | |||
198 | bool isUndefined() const { return (Flags >> S::FB_undefined) & 1; } |
||
199 | bool isWeak() const { return (Flags >> S::FB_weak) & 1; } |
||
200 | bool isCommon() const { return (Flags >> S::FB_common) & 1; } |
||
201 | bool isIndirect() const { return (Flags >> S::FB_indirect) & 1; } |
||
202 | bool isUsed() const { return (Flags >> S::FB_used) & 1; } |
||
203 | bool isTLS() const { return (Flags >> S::FB_tls) & 1; } |
||
204 | |||
205 | bool canBeOmittedFromSymbolTable() const { |
||
206 | return (Flags >> S::FB_may_omit) & 1; |
||
207 | } |
||
208 | |||
209 | bool isGlobal() const { return (Flags >> S::FB_global) & 1; } |
||
210 | bool isFormatSpecific() const { return (Flags >> S::FB_format_specific) & 1; } |
||
211 | bool isUnnamedAddr() const { return (Flags >> S::FB_unnamed_addr) & 1; } |
||
212 | bool isExecutable() const { return (Flags >> S::FB_executable) & 1; } |
||
213 | |||
214 | uint64_t getCommonSize() const { |
||
215 | assert(isCommon()); |
||
216 | return CommonSize; |
||
217 | } |
||
218 | |||
219 | uint32_t getCommonAlignment() const { |
||
220 | assert(isCommon()); |
||
221 | return CommonAlign; |
||
222 | } |
||
223 | |||
224 | /// COFF-specific: for weak externals, returns the name of the symbol that is |
||
225 | /// used as a fallback if the weak external remains undefined. |
||
226 | StringRef getCOFFWeakExternalFallback() const { |
||
227 | assert(isWeak() && isIndirect()); |
||
228 | return COFFWeakExternFallbackName; |
||
229 | } |
||
230 | |||
231 | StringRef getSectionName() const { return SectionName; } |
||
232 | }; |
||
233 | |||
234 | /// This class can be used to read a Symtab and Strtab produced by |
||
235 | /// irsymtab::build. |
||
236 | class Reader { |
||
237 | StringRef Symtab, Strtab; |
||
238 | |||
239 | ArrayRef<storage::Module> Modules; |
||
240 | ArrayRef<storage::Comdat> Comdats; |
||
241 | ArrayRef<storage::Symbol> Symbols; |
||
242 | ArrayRef<storage::Uncommon> Uncommons; |
||
243 | ArrayRef<storage::Str> DependentLibraries; |
||
244 | |||
245 | StringRef str(storage::Str S) const { return S.get(Strtab); } |
||
246 | |||
247 | template <typename T> ArrayRef<T> range(storage::Range<T> R) const { |
||
248 | return R.get(Symtab); |
||
249 | } |
||
250 | |||
251 | const storage::Header &header() const { |
||
252 | return *reinterpret_cast<const storage::Header *>(Symtab.data()); |
||
253 | } |
||
254 | |||
255 | public: |
||
256 | class SymbolRef; |
||
257 | |||
258 | Reader() = default; |
||
259 | Reader(StringRef Symtab, StringRef Strtab) : Symtab(Symtab), Strtab(Strtab) { |
||
260 | Modules = range(header().Modules); |
||
261 | Comdats = range(header().Comdats); |
||
262 | Symbols = range(header().Symbols); |
||
263 | Uncommons = range(header().Uncommons); |
||
264 | DependentLibraries = range(header().DependentLibraries); |
||
265 | } |
||
266 | |||
267 | using symbol_range = iterator_range<object::content_iterator<SymbolRef>>; |
||
268 | |||
269 | /// Returns the symbol table for the entire bitcode file. |
||
270 | /// The symbols enumerated by this method are ephemeral, but they can be |
||
271 | /// copied into an irsymtab::Symbol object. |
||
272 | symbol_range symbols() const; |
||
273 | |||
274 | size_t getNumModules() const { return Modules.size(); } |
||
275 | |||
276 | /// Returns a slice of the symbol table for the I'th module in the file. |
||
277 | /// The symbols enumerated by this method are ephemeral, but they can be |
||
278 | /// copied into an irsymtab::Symbol object. |
||
279 | symbol_range module_symbols(unsigned I) const; |
||
280 | |||
281 | StringRef getTargetTriple() const { return str(header().TargetTriple); } |
||
282 | |||
283 | /// Returns the source file path specified at compile time. |
||
284 | StringRef getSourceFileName() const { return str(header().SourceFileName); } |
||
285 | |||
286 | /// Returns a table with all the comdats used by this file. |
||
287 | std::vector<std::pair<StringRef, llvm::Comdat::SelectionKind>> |
||
288 | getComdatTable() const { |
||
289 | std::vector<std::pair<StringRef, llvm::Comdat::SelectionKind>> ComdatTable; |
||
290 | ComdatTable.reserve(Comdats.size()); |
||
291 | for (auto C : Comdats) |
||
292 | ComdatTable.push_back({str(C.Name), llvm::Comdat::SelectionKind( |
||
293 | uint32_t(C.SelectionKind))}); |
||
294 | return ComdatTable; |
||
295 | } |
||
296 | |||
297 | /// COFF-specific: returns linker options specified in the input file. |
||
298 | StringRef getCOFFLinkerOpts() const { return str(header().COFFLinkerOpts); } |
||
299 | |||
300 | /// Returns dependent library specifiers |
||
301 | std::vector<StringRef> getDependentLibraries() const { |
||
302 | std::vector<StringRef> Specifiers; |
||
303 | Specifiers.reserve(DependentLibraries.size()); |
||
304 | for (auto S : DependentLibraries) { |
||
305 | Specifiers.push_back(str(S)); |
||
306 | } |
||
307 | return Specifiers; |
||
308 | } |
||
309 | }; |
||
310 | |||
311 | /// Ephemeral symbols produced by Reader::symbols() and |
||
312 | /// Reader::module_symbols(). |
||
313 | class Reader::SymbolRef : public Symbol { |
||
314 | const storage::Symbol *SymI, *SymE; |
||
315 | const storage::Uncommon *UncI; |
||
316 | const Reader *R; |
||
317 | |||
318 | void read() { |
||
319 | if (SymI == SymE) |
||
320 | return; |
||
321 | |||
322 | Name = R->str(SymI->Name); |
||
323 | IRName = R->str(SymI->IRName); |
||
324 | ComdatIndex = SymI->ComdatIndex; |
||
325 | Flags = SymI->Flags; |
||
326 | |||
327 | if (Flags & (1 << storage::Symbol::FB_has_uncommon)) { |
||
328 | CommonSize = UncI->CommonSize; |
||
329 | CommonAlign = UncI->CommonAlign; |
||
330 | COFFWeakExternFallbackName = R->str(UncI->COFFWeakExternFallbackName); |
||
331 | SectionName = R->str(UncI->SectionName); |
||
332 | } else |
||
333 | // Reset this field so it can be queried unconditionally for all symbols. |
||
334 | SectionName = ""; |
||
335 | } |
||
336 | |||
337 | public: |
||
338 | SymbolRef(const storage::Symbol *SymI, const storage::Symbol *SymE, |
||
339 | const storage::Uncommon *UncI, const Reader *R) |
||
340 | : SymI(SymI), SymE(SymE), UncI(UncI), R(R) { |
||
341 | read(); |
||
342 | } |
||
343 | |||
344 | void moveNext() { |
||
345 | ++SymI; |
||
346 | if (Flags & (1 << storage::Symbol::FB_has_uncommon)) |
||
347 | ++UncI; |
||
348 | read(); |
||
349 | } |
||
350 | |||
351 | bool operator==(const SymbolRef &Other) const { return SymI == Other.SymI; } |
||
352 | }; |
||
353 | |||
354 | inline Reader::symbol_range Reader::symbols() const { |
||
355 | return {SymbolRef(Symbols.begin(), Symbols.end(), Uncommons.begin(), this), |
||
356 | SymbolRef(Symbols.end(), Symbols.end(), nullptr, this)}; |
||
357 | } |
||
358 | |||
359 | inline Reader::symbol_range Reader::module_symbols(unsigned I) const { |
||
360 | const storage::Module &M = Modules[I]; |
||
361 | const storage::Symbol *MBegin = Symbols.begin() + M.Begin, |
||
362 | *MEnd = Symbols.begin() + M.End; |
||
363 | return {SymbolRef(MBegin, MEnd, Uncommons.begin() + M.UncBegin, this), |
||
364 | SymbolRef(MEnd, MEnd, nullptr, this)}; |
||
365 | } |
||
366 | |||
367 | /// The contents of the irsymtab in a bitcode file. Any underlying data for the |
||
368 | /// irsymtab are owned by Symtab and Strtab. |
||
369 | struct FileContents { |
||
370 | SmallVector<char, 0> Symtab, Strtab; |
||
371 | Reader TheReader; |
||
372 | }; |
||
373 | |||
374 | /// Reads the contents of a bitcode file, creating its irsymtab if necessary. |
||
375 | Expected<FileContents> readBitcode(const BitcodeFileContents &BFC); |
||
376 | |||
377 | } // end namespace irsymtab |
||
378 | } // end namespace llvm |
||
379 | |||
380 | #endif // LLVM_OBJECT_IRSYMTAB_H |