Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | //===- IRSymtab.h - data definitions for IR symbol tables -------*- C++ -*-===// |
| 2 | // |
||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 6 | // |
||
| 7 | //===----------------------------------------------------------------------===// |
||
| 8 | // |
||
| 9 | // This file contains data definitions and a reader and builder for a symbol |
||
| 10 | // table for LLVM IR. Its purpose is to allow linkers and other consumers of |
||
| 11 | // bitcode files to efficiently read the symbol table for symbol resolution |
||
| 12 | // purposes without needing to construct a module in memory. |
||
| 13 | // |
||
| 14 | // As with most object files the symbol table has two parts: the symbol table |
||
| 15 | // itself and a string table which is referenced by the symbol table. |
||
| 16 | // |
||
| 17 | // A symbol table corresponds to a single bitcode file, which may consist of |
||
| 18 | // multiple modules, so symbol tables may likewise contain symbols for multiple |
||
| 19 | // modules. |
||
| 20 | // |
||
| 21 | //===----------------------------------------------------------------------===// |
||
| 22 | |||
| 23 | #ifndef LLVM_OBJECT_IRSYMTAB_H |
||
| 24 | #define LLVM_OBJECT_IRSYMTAB_H |
||
| 25 | |||
| 26 | #include "llvm/ADT/ArrayRef.h" |
||
| 27 | #include "llvm/ADT/StringRef.h" |
||
| 28 | #include "llvm/ADT/iterator_range.h" |
||
| 29 | #include "llvm/IR/Comdat.h" |
||
| 30 | #include "llvm/IR/GlobalValue.h" |
||
| 31 | #include "llvm/Object/SymbolicFile.h" |
||
| 32 | #include "llvm/Support/Allocator.h" |
||
| 33 | #include "llvm/Support/Endian.h" |
||
| 34 | #include "llvm/Support/Error.h" |
||
| 35 | #include <cassert> |
||
| 36 | #include <cstdint> |
||
| 37 | #include <vector> |
||
| 38 | |||
| 39 | namespace llvm { |
||
| 40 | |||
| 41 | struct BitcodeFileContents; |
||
| 42 | class StringTableBuilder; |
||
| 43 | |||
| 44 | namespace irsymtab { |
||
| 45 | |||
| 46 | namespace storage { |
||
| 47 | |||
| 48 | // The data structures in this namespace define the low-level serialization |
||
| 49 | // format. Clients that just want to read a symbol table should use the |
||
| 50 | // irsymtab::Reader class. |
||
| 51 | |||
| 52 | using Word = support::ulittle32_t; |
||
| 53 | |||
| 54 | /// A reference to a string in the string table. |
||
| 55 | struct Str { |
||
| 56 | Word Offset, Size; |
||
| 57 | |||
| 58 | StringRef get(StringRef Strtab) const { |
||
| 59 | return {Strtab.data() + Offset, Size}; |
||
| 60 | } |
||
| 61 | }; |
||
| 62 | |||
| 63 | /// A reference to a range of objects in the symbol table. |
||
| 64 | template <typename T> struct Range { |
||
| 65 | Word Offset, Size; |
||
| 66 | |||
| 67 | ArrayRef<T> get(StringRef Symtab) const { |
||
| 68 | return {reinterpret_cast<const T *>(Symtab.data() + Offset), Size}; |
||
| 69 | } |
||
| 70 | }; |
||
| 71 | |||
| 72 | /// Describes the range of a particular module's symbols within the symbol |
||
| 73 | /// table. |
||
| 74 | struct Module { |
||
| 75 | Word Begin, End; |
||
| 76 | |||
| 77 | /// The index of the first Uncommon for this Module. |
||
| 78 | Word UncBegin; |
||
| 79 | }; |
||
| 80 | |||
| 81 | /// This is equivalent to an IR comdat. |
||
| 82 | struct Comdat { |
||
| 83 | Str Name; |
||
| 84 | |||
| 85 | // llvm::Comdat::SelectionKind |
||
| 86 | Word SelectionKind; |
||
| 87 | }; |
||
| 88 | |||
| 89 | /// Contains the information needed by linkers for symbol resolution, as well as |
||
| 90 | /// by the LTO implementation itself. |
||
| 91 | struct Symbol { |
||
| 92 | /// The mangled symbol name. |
||
| 93 | Str Name; |
||
| 94 | |||
| 95 | /// The unmangled symbol name, or the empty string if this is not an IR |
||
| 96 | /// symbol. |
||
| 97 | Str IRName; |
||
| 98 | |||
| 99 | /// The index into Header::Comdats, or -1 if not a comdat member. |
||
| 100 | Word ComdatIndex; |
||
| 101 | |||
| 102 | Word Flags; |
||
| 103 | enum FlagBits { |
||
| 104 | FB_visibility, // 2 bits |
||
| 105 | FB_has_uncommon = FB_visibility + 2, |
||
| 106 | FB_undefined, |
||
| 107 | FB_weak, |
||
| 108 | FB_common, |
||
| 109 | FB_indirect, |
||
| 110 | FB_used, |
||
| 111 | FB_tls, |
||
| 112 | FB_may_omit, |
||
| 113 | FB_global, |
||
| 114 | FB_format_specific, |
||
| 115 | FB_unnamed_addr, |
||
| 116 | FB_executable, |
||
| 117 | }; |
||
| 118 | }; |
||
| 119 | |||
| 120 | /// This data structure contains rarely used symbol fields and is optionally |
||
| 121 | /// referenced by a Symbol. |
||
| 122 | struct Uncommon { |
||
| 123 | Word CommonSize, CommonAlign; |
||
| 124 | |||
| 125 | /// COFF-specific: the name of the symbol that a weak external resolves to |
||
| 126 | /// if not defined. |
||
| 127 | Str COFFWeakExternFallbackName; |
||
| 128 | |||
| 129 | /// Specified section name, if any. |
||
| 130 | Str SectionName; |
||
| 131 | }; |
||
| 132 | |||
| 133 | |||
| 134 | struct Header { |
||
| 135 | /// Version number of the symtab format. This number should be incremented |
||
| 136 | /// when the format changes, but it does not need to be incremented if a |
||
| 137 | /// change to LLVM would cause it to create a different symbol table. |
||
| 138 | Word Version; |
||
| 139 | enum { kCurrentVersion = 3 }; |
||
| 140 | |||
| 141 | /// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION). |
||
| 142 | /// Consumers should rebuild the symbol table from IR if the producer's |
||
| 143 | /// version does not match the consumer's version due to potential differences |
||
| 144 | /// in symbol table format, symbol enumeration order and so on. |
||
| 145 | Str Producer; |
||
| 146 | |||
| 147 | Range<Module> Modules; |
||
| 148 | Range<Comdat> Comdats; |
||
| 149 | Range<Symbol> Symbols; |
||
| 150 | Range<Uncommon> Uncommons; |
||
| 151 | |||
| 152 | Str TargetTriple, SourceFileName; |
||
| 153 | |||
| 154 | /// COFF-specific: linker directives. |
||
| 155 | Str COFFLinkerOpts; |
||
| 156 | |||
| 157 | /// Dependent Library Specifiers |
||
| 158 | Range<Str> DependentLibraries; |
||
| 159 | }; |
||
| 160 | |||
| 161 | } // end namespace storage |
||
| 162 | |||
| 163 | /// Fills in Symtab and StrtabBuilder with a valid symbol and string table for |
||
| 164 | /// Mods. |
||
| 165 | Error build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab, |
||
| 166 | StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc); |
||
| 167 | |||
| 168 | /// This represents a symbol that has been read from a storage::Symbol and |
||
| 169 | /// possibly a storage::Uncommon. |
||
| 170 | struct Symbol { |
||
| 171 | // Copied from storage::Symbol. |
||
| 172 | StringRef Name, IRName; |
||
| 173 | int ComdatIndex; |
||
| 174 | uint32_t Flags; |
||
| 175 | |||
| 176 | // Copied from storage::Uncommon. |
||
| 177 | uint32_t CommonSize, CommonAlign; |
||
| 178 | StringRef COFFWeakExternFallbackName; |
||
| 179 | StringRef SectionName; |
||
| 180 | |||
| 181 | /// Returns the mangled symbol name. |
||
| 182 | StringRef getName() const { return Name; } |
||
| 183 | |||
| 184 | /// Returns the unmangled symbol name, or the empty string if this is not an |
||
| 185 | /// IR symbol. |
||
| 186 | StringRef getIRName() const { return IRName; } |
||
| 187 | |||
| 188 | /// Returns the index into the comdat table (see Reader::getComdatTable()), or |
||
| 189 | /// -1 if not a comdat member. |
||
| 190 | int getComdatIndex() const { return ComdatIndex; } |
||
| 191 | |||
| 192 | using S = storage::Symbol; |
||
| 193 | |||
| 194 | GlobalValue::VisibilityTypes getVisibility() const { |
||
| 195 | return GlobalValue::VisibilityTypes((Flags >> S::FB_visibility) & 3); |
||
| 196 | } |
||
| 197 | |||
| 198 | bool isUndefined() const { return (Flags >> S::FB_undefined) & 1; } |
||
| 199 | bool isWeak() const { return (Flags >> S::FB_weak) & 1; } |
||
| 200 | bool isCommon() const { return (Flags >> S::FB_common) & 1; } |
||
| 201 | bool isIndirect() const { return (Flags >> S::FB_indirect) & 1; } |
||
| 202 | bool isUsed() const { return (Flags >> S::FB_used) & 1; } |
||
| 203 | bool isTLS() const { return (Flags >> S::FB_tls) & 1; } |
||
| 204 | |||
| 205 | bool canBeOmittedFromSymbolTable() const { |
||
| 206 | return (Flags >> S::FB_may_omit) & 1; |
||
| 207 | } |
||
| 208 | |||
| 209 | bool isGlobal() const { return (Flags >> S::FB_global) & 1; } |
||
| 210 | bool isFormatSpecific() const { return (Flags >> S::FB_format_specific) & 1; } |
||
| 211 | bool isUnnamedAddr() const { return (Flags >> S::FB_unnamed_addr) & 1; } |
||
| 212 | bool isExecutable() const { return (Flags >> S::FB_executable) & 1; } |
||
| 213 | |||
| 214 | uint64_t getCommonSize() const { |
||
| 215 | assert(isCommon()); |
||
| 216 | return CommonSize; |
||
| 217 | } |
||
| 218 | |||
| 219 | uint32_t getCommonAlignment() const { |
||
| 220 | assert(isCommon()); |
||
| 221 | return CommonAlign; |
||
| 222 | } |
||
| 223 | |||
| 224 | /// COFF-specific: for weak externals, returns the name of the symbol that is |
||
| 225 | /// used as a fallback if the weak external remains undefined. |
||
| 226 | StringRef getCOFFWeakExternalFallback() const { |
||
| 227 | assert(isWeak() && isIndirect()); |
||
| 228 | return COFFWeakExternFallbackName; |
||
| 229 | } |
||
| 230 | |||
| 231 | StringRef getSectionName() const { return SectionName; } |
||
| 232 | }; |
||
| 233 | |||
| 234 | /// This class can be used to read a Symtab and Strtab produced by |
||
| 235 | /// irsymtab::build. |
||
| 236 | class Reader { |
||
| 237 | StringRef Symtab, Strtab; |
||
| 238 | |||
| 239 | ArrayRef<storage::Module> Modules; |
||
| 240 | ArrayRef<storage::Comdat> Comdats; |
||
| 241 | ArrayRef<storage::Symbol> Symbols; |
||
| 242 | ArrayRef<storage::Uncommon> Uncommons; |
||
| 243 | ArrayRef<storage::Str> DependentLibraries; |
||
| 244 | |||
| 245 | StringRef str(storage::Str S) const { return S.get(Strtab); } |
||
| 246 | |||
| 247 | template <typename T> ArrayRef<T> range(storage::Range<T> R) const { |
||
| 248 | return R.get(Symtab); |
||
| 249 | } |
||
| 250 | |||
| 251 | const storage::Header &header() const { |
||
| 252 | return *reinterpret_cast<const storage::Header *>(Symtab.data()); |
||
| 253 | } |
||
| 254 | |||
| 255 | public: |
||
| 256 | class SymbolRef; |
||
| 257 | |||
| 258 | Reader() = default; |
||
| 259 | Reader(StringRef Symtab, StringRef Strtab) : Symtab(Symtab), Strtab(Strtab) { |
||
| 260 | Modules = range(header().Modules); |
||
| 261 | Comdats = range(header().Comdats); |
||
| 262 | Symbols = range(header().Symbols); |
||
| 263 | Uncommons = range(header().Uncommons); |
||
| 264 | DependentLibraries = range(header().DependentLibraries); |
||
| 265 | } |
||
| 266 | |||
| 267 | using symbol_range = iterator_range<object::content_iterator<SymbolRef>>; |
||
| 268 | |||
| 269 | /// Returns the symbol table for the entire bitcode file. |
||
| 270 | /// The symbols enumerated by this method are ephemeral, but they can be |
||
| 271 | /// copied into an irsymtab::Symbol object. |
||
| 272 | symbol_range symbols() const; |
||
| 273 | |||
| 274 | size_t getNumModules() const { return Modules.size(); } |
||
| 275 | |||
| 276 | /// Returns a slice of the symbol table for the I'th module in the file. |
||
| 277 | /// The symbols enumerated by this method are ephemeral, but they can be |
||
| 278 | /// copied into an irsymtab::Symbol object. |
||
| 279 | symbol_range module_symbols(unsigned I) const; |
||
| 280 | |||
| 281 | StringRef getTargetTriple() const { return str(header().TargetTriple); } |
||
| 282 | |||
| 283 | /// Returns the source file path specified at compile time. |
||
| 284 | StringRef getSourceFileName() const { return str(header().SourceFileName); } |
||
| 285 | |||
| 286 | /// Returns a table with all the comdats used by this file. |
||
| 287 | std::vector<std::pair<StringRef, llvm::Comdat::SelectionKind>> |
||
| 288 | getComdatTable() const { |
||
| 289 | std::vector<std::pair<StringRef, llvm::Comdat::SelectionKind>> ComdatTable; |
||
| 290 | ComdatTable.reserve(Comdats.size()); |
||
| 291 | for (auto C : Comdats) |
||
| 292 | ComdatTable.push_back({str(C.Name), llvm::Comdat::SelectionKind( |
||
| 293 | uint32_t(C.SelectionKind))}); |
||
| 294 | return ComdatTable; |
||
| 295 | } |
||
| 296 | |||
| 297 | /// COFF-specific: returns linker options specified in the input file. |
||
| 298 | StringRef getCOFFLinkerOpts() const { return str(header().COFFLinkerOpts); } |
||
| 299 | |||
| 300 | /// Returns dependent library specifiers |
||
| 301 | std::vector<StringRef> getDependentLibraries() const { |
||
| 302 | std::vector<StringRef> Specifiers; |
||
| 303 | Specifiers.reserve(DependentLibraries.size()); |
||
| 304 | for (auto S : DependentLibraries) { |
||
| 305 | Specifiers.push_back(str(S)); |
||
| 306 | } |
||
| 307 | return Specifiers; |
||
| 308 | } |
||
| 309 | }; |
||
| 310 | |||
| 311 | /// Ephemeral symbols produced by Reader::symbols() and |
||
| 312 | /// Reader::module_symbols(). |
||
| 313 | class Reader::SymbolRef : public Symbol { |
||
| 314 | const storage::Symbol *SymI, *SymE; |
||
| 315 | const storage::Uncommon *UncI; |
||
| 316 | const Reader *R; |
||
| 317 | |||
| 318 | void read() { |
||
| 319 | if (SymI == SymE) |
||
| 320 | return; |
||
| 321 | |||
| 322 | Name = R->str(SymI->Name); |
||
| 323 | IRName = R->str(SymI->IRName); |
||
| 324 | ComdatIndex = SymI->ComdatIndex; |
||
| 325 | Flags = SymI->Flags; |
||
| 326 | |||
| 327 | if (Flags & (1 << storage::Symbol::FB_has_uncommon)) { |
||
| 328 | CommonSize = UncI->CommonSize; |
||
| 329 | CommonAlign = UncI->CommonAlign; |
||
| 330 | COFFWeakExternFallbackName = R->str(UncI->COFFWeakExternFallbackName); |
||
| 331 | SectionName = R->str(UncI->SectionName); |
||
| 332 | } else |
||
| 333 | // Reset this field so it can be queried unconditionally for all symbols. |
||
| 334 | SectionName = ""; |
||
| 335 | } |
||
| 336 | |||
| 337 | public: |
||
| 338 | SymbolRef(const storage::Symbol *SymI, const storage::Symbol *SymE, |
||
| 339 | const storage::Uncommon *UncI, const Reader *R) |
||
| 340 | : SymI(SymI), SymE(SymE), UncI(UncI), R(R) { |
||
| 341 | read(); |
||
| 342 | } |
||
| 343 | |||
| 344 | void moveNext() { |
||
| 345 | ++SymI; |
||
| 346 | if (Flags & (1 << storage::Symbol::FB_has_uncommon)) |
||
| 347 | ++UncI; |
||
| 348 | read(); |
||
| 349 | } |
||
| 350 | |||
| 351 | bool operator==(const SymbolRef &Other) const { return SymI == Other.SymI; } |
||
| 352 | }; |
||
| 353 | |||
| 354 | inline Reader::symbol_range Reader::symbols() const { |
||
| 355 | return {SymbolRef(Symbols.begin(), Symbols.end(), Uncommons.begin(), this), |
||
| 356 | SymbolRef(Symbols.end(), Symbols.end(), nullptr, this)}; |
||
| 357 | } |
||
| 358 | |||
| 359 | inline Reader::symbol_range Reader::module_symbols(unsigned I) const { |
||
| 360 | const storage::Module &M = Modules[I]; |
||
| 361 | const storage::Symbol *MBegin = Symbols.begin() + M.Begin, |
||
| 362 | *MEnd = Symbols.begin() + M.End; |
||
| 363 | return {SymbolRef(MBegin, MEnd, Uncommons.begin() + M.UncBegin, this), |
||
| 364 | SymbolRef(MEnd, MEnd, nullptr, this)}; |
||
| 365 | } |
||
| 366 | |||
| 367 | /// The contents of the irsymtab in a bitcode file. Any underlying data for the |
||
| 368 | /// irsymtab are owned by Symtab and Strtab. |
||
| 369 | struct FileContents { |
||
| 370 | SmallVector<char, 0> Symtab, Strtab; |
||
| 371 | Reader TheReader; |
||
| 372 | }; |
||
| 373 | |||
| 374 | /// Reads the contents of a bitcode file, creating its irsymtab if necessary. |
||
| 375 | Expected<FileContents> readBitcode(const BitcodeFileContents &BFC); |
||
| 376 | |||
| 377 | } // end namespace irsymtab |
||
| 378 | } // end namespace llvm |
||
| 379 | |||
| 380 | #endif // LLVM_OBJECT_IRSYMTAB_H |