Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line | 
|---|---|---|---|
| 14 | pmbaty | 1 | //===- IRSymtab.h - data definitions for IR symbol tables -------*- C++ -*-===// | 
| 2 | // | ||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | ||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| 6 | // | ||
| 7 | //===----------------------------------------------------------------------===// | ||
| 8 | // | ||
| 9 | // This file contains data definitions and a reader and builder for a symbol | ||
| 10 | // table for LLVM IR. Its purpose is to allow linkers and other consumers of | ||
| 11 | // bitcode files to efficiently read the symbol table for symbol resolution | ||
| 12 | // purposes without needing to construct a module in memory. | ||
| 13 | // | ||
| 14 | // As with most object files the symbol table has two parts: the symbol table | ||
| 15 | // itself and a string table which is referenced by the symbol table. | ||
| 16 | // | ||
| 17 | // A symbol table corresponds to a single bitcode file, which may consist of | ||
| 18 | // multiple modules, so symbol tables may likewise contain symbols for multiple | ||
| 19 | // modules. | ||
| 20 | // | ||
| 21 | //===----------------------------------------------------------------------===// | ||
| 22 | |||
| 23 | #ifndef LLVM_OBJECT_IRSYMTAB_H | ||
| 24 | #define LLVM_OBJECT_IRSYMTAB_H | ||
| 25 | |||
| 26 | #include "llvm/ADT/ArrayRef.h" | ||
| 27 | #include "llvm/ADT/StringRef.h" | ||
| 28 | #include "llvm/ADT/iterator_range.h" | ||
| 29 | #include "llvm/IR/Comdat.h" | ||
| 30 | #include "llvm/IR/GlobalValue.h" | ||
| 31 | #include "llvm/Object/SymbolicFile.h" | ||
| 32 | #include "llvm/Support/Allocator.h" | ||
| 33 | #include "llvm/Support/Endian.h" | ||
| 34 | #include "llvm/Support/Error.h" | ||
| 35 | #include <cassert> | ||
| 36 | #include <cstdint> | ||
| 37 | #include <vector> | ||
| 38 | |||
| 39 | namespace llvm { | ||
| 40 | |||
| 41 | struct BitcodeFileContents; | ||
| 42 | class StringTableBuilder; | ||
| 43 | |||
| 44 | namespace irsymtab { | ||
| 45 | |||
| 46 | namespace storage { | ||
| 47 | |||
| 48 | // The data structures in this namespace define the low-level serialization | ||
| 49 | // format. Clients that just want to read a symbol table should use the | ||
| 50 | // irsymtab::Reader class. | ||
| 51 | |||
| 52 | using Word = support::ulittle32_t; | ||
| 53 | |||
| 54 | /// A reference to a string in the string table. | ||
| 55 | struct Str { | ||
| 56 |   Word Offset, Size; | ||
| 57 | |||
| 58 | StringRef get(StringRef Strtab) const { | ||
| 59 | return {Strtab.data() + Offset, Size}; | ||
| 60 |   } | ||
| 61 | }; | ||
| 62 | |||
| 63 | /// A reference to a range of objects in the symbol table. | ||
| 64 | template <typename T> struct Range { | ||
| 65 |   Word Offset, Size; | ||
| 66 | |||
| 67 | ArrayRef<T> get(StringRef Symtab) const { | ||
| 68 | return {reinterpret_cast<const T *>(Symtab.data() + Offset), Size}; | ||
| 69 |   } | ||
| 70 | }; | ||
| 71 | |||
| 72 | /// Describes the range of a particular module's symbols within the symbol | ||
| 73 | /// table. | ||
| 74 | struct Module { | ||
| 75 |   Word Begin, End; | ||
| 76 | |||
| 77 |   /// The index of the first Uncommon for this Module. | ||
| 78 |   Word UncBegin; | ||
| 79 | }; | ||
| 80 | |||
| 81 | /// This is equivalent to an IR comdat. | ||
| 82 | struct Comdat { | ||
| 83 |   Str Name; | ||
| 84 | |||
| 85 |   // llvm::Comdat::SelectionKind | ||
| 86 |   Word SelectionKind; | ||
| 87 | }; | ||
| 88 | |||
| 89 | /// Contains the information needed by linkers for symbol resolution, as well as | ||
| 90 | /// by the LTO implementation itself. | ||
| 91 | struct Symbol { | ||
| 92 |   /// The mangled symbol name. | ||
| 93 |   Str Name; | ||
| 94 | |||
| 95 |   /// The unmangled symbol name, or the empty string if this is not an IR | ||
| 96 |   /// symbol. | ||
| 97 |   Str IRName; | ||
| 98 | |||
| 99 |   /// The index into Header::Comdats, or -1 if not a comdat member. | ||
| 100 |   Word ComdatIndex; | ||
| 101 | |||
| 102 |   Word Flags; | ||
| 103 | enum FlagBits { | ||
| 104 |     FB_visibility, // 2 bits | ||
| 105 | FB_has_uncommon = FB_visibility + 2, | ||
| 106 | FB_undefined, | ||
| 107 | FB_weak, | ||
| 108 | FB_common, | ||
| 109 | FB_indirect, | ||
| 110 | FB_used, | ||
| 111 | FB_tls, | ||
| 112 | FB_may_omit, | ||
| 113 | FB_global, | ||
| 114 | FB_format_specific, | ||
| 115 | FB_unnamed_addr, | ||
| 116 | FB_executable, | ||
| 117 | }; | ||
| 118 | }; | ||
| 119 | |||
| 120 | /// This data structure contains rarely used symbol fields and is optionally | ||
| 121 | /// referenced by a Symbol. | ||
| 122 | struct Uncommon { | ||
| 123 |   Word CommonSize, CommonAlign; | ||
| 124 | |||
| 125 |   /// COFF-specific: the name of the symbol that a weak external resolves to | ||
| 126 |   /// if not defined. | ||
| 127 |   Str COFFWeakExternFallbackName; | ||
| 128 | |||
| 129 |   /// Specified section name, if any. | ||
| 130 |   Str SectionName; | ||
| 131 | }; | ||
| 132 | |||
| 133 | |||
| 134 | struct Header { | ||
| 135 |   /// Version number of the symtab format. This number should be incremented | ||
| 136 |   /// when the format changes, but it does not need to be incremented if a | ||
| 137 |   /// change to LLVM would cause it to create a different symbol table. | ||
| 138 |   Word Version; | ||
| 139 | enum { kCurrentVersion = 3 }; | ||
| 140 | |||
| 141 |   /// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION). | ||
| 142 |   /// Consumers should rebuild the symbol table from IR if the producer's | ||
| 143 |   /// version does not match the consumer's version due to potential differences | ||
| 144 |   /// in symbol table format, symbol enumeration order and so on. | ||
| 145 |   Str Producer; | ||
| 146 | |||
| 147 | Range<Module> Modules; | ||
| 148 | Range<Comdat> Comdats; | ||
| 149 | Range<Symbol> Symbols; | ||
| 150 | Range<Uncommon> Uncommons; | ||
| 151 | |||
| 152 |   Str TargetTriple, SourceFileName; | ||
| 153 | |||
| 154 |   /// COFF-specific: linker directives. | ||
| 155 |   Str COFFLinkerOpts; | ||
| 156 | |||
| 157 |   /// Dependent Library Specifiers | ||
| 158 | Range<Str> DependentLibraries; | ||
| 159 | }; | ||
| 160 | |||
| 161 | } // end namespace storage | ||
| 162 | |||
| 163 | /// Fills in Symtab and StrtabBuilder with a valid symbol and string table for | ||
| 164 | /// Mods. | ||
| 165 | Error build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab, | ||
| 166 | StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc); | ||
| 167 | |||
| 168 | /// This represents a symbol that has been read from a storage::Symbol and | ||
| 169 | /// possibly a storage::Uncommon. | ||
| 170 | struct Symbol { | ||
| 171 |   // Copied from storage::Symbol. | ||
| 172 |   StringRef Name, IRName; | ||
| 173 | int ComdatIndex; | ||
| 174 | uint32_t Flags; | ||
| 175 | |||
| 176 |   // Copied from storage::Uncommon. | ||
| 177 | uint32_t CommonSize, CommonAlign; | ||
| 178 |   StringRef COFFWeakExternFallbackName; | ||
| 179 |   StringRef SectionName; | ||
| 180 | |||
| 181 |   /// Returns the mangled symbol name. | ||
| 182 | StringRef getName() const { return Name; } | ||
| 183 | |||
| 184 |   /// Returns the unmangled symbol name, or the empty string if this is not an | ||
| 185 |   /// IR symbol. | ||
| 186 | StringRef getIRName() const { return IRName; } | ||
| 187 | |||
| 188 |   /// Returns the index into the comdat table (see Reader::getComdatTable()), or | ||
| 189 |   /// -1 if not a comdat member. | ||
| 190 | int getComdatIndex() const { return ComdatIndex; } | ||
| 191 | |||
| 192 | using S = storage::Symbol; | ||
| 193 | |||
| 194 | GlobalValue::VisibilityTypes getVisibility() const { | ||
| 195 | return GlobalValue::VisibilityTypes((Flags >> S::FB_visibility) & 3); | ||
| 196 |   } | ||
| 197 | |||
| 198 | bool isUndefined() const { return (Flags >> S::FB_undefined) & 1; } | ||
| 199 | bool isWeak() const { return (Flags >> S::FB_weak) & 1; } | ||
| 200 | bool isCommon() const { return (Flags >> S::FB_common) & 1; } | ||
| 201 | bool isIndirect() const { return (Flags >> S::FB_indirect) & 1; } | ||
| 202 | bool isUsed() const { return (Flags >> S::FB_used) & 1; } | ||
| 203 | bool isTLS() const { return (Flags >> S::FB_tls) & 1; } | ||
| 204 | |||
| 205 | bool canBeOmittedFromSymbolTable() const { | ||
| 206 | return (Flags >> S::FB_may_omit) & 1; | ||
| 207 |   } | ||
| 208 | |||
| 209 | bool isGlobal() const { return (Flags >> S::FB_global) & 1; } | ||
| 210 | bool isFormatSpecific() const { return (Flags >> S::FB_format_specific) & 1; } | ||
| 211 | bool isUnnamedAddr() const { return (Flags >> S::FB_unnamed_addr) & 1; } | ||
| 212 | bool isExecutable() const { return (Flags >> S::FB_executable) & 1; } | ||
| 213 | |||
| 214 | uint64_t getCommonSize() const { | ||
| 215 | assert(isCommon()); | ||
| 216 | return CommonSize; | ||
| 217 |   } | ||
| 218 | |||
| 219 | uint32_t getCommonAlignment() const { | ||
| 220 | assert(isCommon()); | ||
| 221 | return CommonAlign; | ||
| 222 |   } | ||
| 223 | |||
| 224 |   /// COFF-specific: for weak externals, returns the name of the symbol that is | ||
| 225 |   /// used as a fallback if the weak external remains undefined. | ||
| 226 | StringRef getCOFFWeakExternalFallback() const { | ||
| 227 | assert(isWeak() && isIndirect()); | ||
| 228 | return COFFWeakExternFallbackName; | ||
| 229 |   } | ||
| 230 | |||
| 231 | StringRef getSectionName() const { return SectionName; } | ||
| 232 | }; | ||
| 233 | |||
| 234 | /// This class can be used to read a Symtab and Strtab produced by | ||
| 235 | /// irsymtab::build. | ||
| 236 | class Reader { | ||
| 237 |   StringRef Symtab, Strtab; | ||
| 238 | |||
| 239 | ArrayRef<storage::Module> Modules; | ||
| 240 | ArrayRef<storage::Comdat> Comdats; | ||
| 241 | ArrayRef<storage::Symbol> Symbols; | ||
| 242 | ArrayRef<storage::Uncommon> Uncommons; | ||
| 243 | ArrayRef<storage::Str> DependentLibraries; | ||
| 244 | |||
| 245 | StringRef str(storage::Str S) const { return S.get(Strtab); } | ||
| 246 | |||
| 247 | template <typename T> ArrayRef<T> range(storage::Range<T> R) const { | ||
| 248 | return R.get(Symtab); | ||
| 249 |   } | ||
| 250 | |||
| 251 | const storage::Header &header() const { | ||
| 252 | return *reinterpret_cast<const storage::Header *>(Symtab.data()); | ||
| 253 |   } | ||
| 254 | |||
| 255 | public: | ||
| 256 | class SymbolRef; | ||
| 257 | |||
| 258 | Reader() = default; | ||
| 259 | Reader(StringRef Symtab, StringRef Strtab) : Symtab(Symtab), Strtab(Strtab) { | ||
| 260 | Modules = range(header().Modules); | ||
| 261 | Comdats = range(header().Comdats); | ||
| 262 | Symbols = range(header().Symbols); | ||
| 263 | Uncommons = range(header().Uncommons); | ||
| 264 | DependentLibraries = range(header().DependentLibraries); | ||
| 265 |   } | ||
| 266 | |||
| 267 | using symbol_range = iterator_range<object::content_iterator<SymbolRef>>; | ||
| 268 | |||
| 269 |   /// Returns the symbol table for the entire bitcode file. | ||
| 270 |   /// The symbols enumerated by this method are ephemeral, but they can be | ||
| 271 |   /// copied into an irsymtab::Symbol object. | ||
| 272 | symbol_range symbols() const; | ||
| 273 | |||
| 274 | size_t getNumModules() const { return Modules.size(); } | ||
| 275 | |||
| 276 |   /// Returns a slice of the symbol table for the I'th module in the file. | ||
| 277 |   /// The symbols enumerated by this method are ephemeral, but they can be | ||
| 278 |   /// copied into an irsymtab::Symbol object. | ||
| 279 | symbol_range module_symbols(unsigned I) const; | ||
| 280 | |||
| 281 | StringRef getTargetTriple() const { return str(header().TargetTriple); } | ||
| 282 | |||
| 283 |   /// Returns the source file path specified at compile time. | ||
| 284 | StringRef getSourceFileName() const { return str(header().SourceFileName); } | ||
| 285 | |||
| 286 |   /// Returns a table with all the comdats used by this file. | ||
| 287 | std::vector<std::pair<StringRef, llvm::Comdat::SelectionKind>> | ||
| 288 | getComdatTable() const { | ||
| 289 | std::vector<std::pair<StringRef, llvm::Comdat::SelectionKind>> ComdatTable; | ||
| 290 | ComdatTable.reserve(Comdats.size()); | ||
| 291 | for (auto C : Comdats) | ||
| 292 | ComdatTable.push_back({str(C.Name), llvm::Comdat::SelectionKind( | ||
| 293 | uint32_t(C.SelectionKind))}); | ||
| 294 | return ComdatTable; | ||
| 295 |   } | ||
| 296 | |||
| 297 |   /// COFF-specific: returns linker options specified in the input file. | ||
| 298 | StringRef getCOFFLinkerOpts() const { return str(header().COFFLinkerOpts); } | ||
| 299 | |||
| 300 |   /// Returns dependent library specifiers | ||
| 301 | std::vector<StringRef> getDependentLibraries() const { | ||
| 302 | std::vector<StringRef> Specifiers; | ||
| 303 | Specifiers.reserve(DependentLibraries.size()); | ||
| 304 | for (auto S : DependentLibraries) { | ||
| 305 | Specifiers.push_back(str(S)); | ||
| 306 |     } | ||
| 307 | return Specifiers; | ||
| 308 |   } | ||
| 309 | }; | ||
| 310 | |||
| 311 | /// Ephemeral symbols produced by Reader::symbols() and | ||
| 312 | /// Reader::module_symbols(). | ||
| 313 | class Reader::SymbolRef : public Symbol { | ||
| 314 | const storage::Symbol *SymI, *SymE; | ||
| 315 | const storage::Uncommon *UncI; | ||
| 316 | const Reader *R; | ||
| 317 | |||
| 318 | void read() { | ||
| 319 | if (SymI == SymE) | ||
| 320 | return; | ||
| 321 | |||
| 322 | Name = R->str(SymI->Name); | ||
| 323 | IRName = R->str(SymI->IRName); | ||
| 324 | ComdatIndex = SymI->ComdatIndex; | ||
| 325 | Flags = SymI->Flags; | ||
| 326 | |||
| 327 | if (Flags & (1 << storage::Symbol::FB_has_uncommon)) { | ||
| 328 | CommonSize = UncI->CommonSize; | ||
| 329 | CommonAlign = UncI->CommonAlign; | ||
| 330 | COFFWeakExternFallbackName = R->str(UncI->COFFWeakExternFallbackName); | ||
| 331 | SectionName = R->str(UncI->SectionName); | ||
| 332 | } else | ||
| 333 |       // Reset this field so it can be queried unconditionally for all symbols. | ||
| 334 | SectionName = ""; | ||
| 335 |   } | ||
| 336 | |||
| 337 | public: | ||
| 338 | SymbolRef(const storage::Symbol *SymI, const storage::Symbol *SymE, | ||
| 339 | const storage::Uncommon *UncI, const Reader *R) | ||
| 340 | : SymI(SymI), SymE(SymE), UncI(UncI), R(R) { | ||
| 341 | read(); | ||
| 342 |   } | ||
| 343 | |||
| 344 | void moveNext() { | ||
| 345 | ++SymI; | ||
| 346 | if (Flags & (1 << storage::Symbol::FB_has_uncommon)) | ||
| 347 | ++UncI; | ||
| 348 | read(); | ||
| 349 |   } | ||
| 350 | |||
| 351 | bool operator==(const SymbolRef &Other) const { return SymI == Other.SymI; } | ||
| 352 | }; | ||
| 353 | |||
| 354 | inline Reader::symbol_range Reader::symbols() const { | ||
| 355 | return {SymbolRef(Symbols.begin(), Symbols.end(), Uncommons.begin(), this), | ||
| 356 | SymbolRef(Symbols.end(), Symbols.end(), nullptr, this)}; | ||
| 357 | } | ||
| 358 | |||
| 359 | inline Reader::symbol_range Reader::module_symbols(unsigned I) const { | ||
| 360 | const storage::Module &M = Modules[I]; | ||
| 361 | const storage::Symbol *MBegin = Symbols.begin() + M.Begin, | ||
| 362 | *MEnd = Symbols.begin() + M.End; | ||
| 363 | return {SymbolRef(MBegin, MEnd, Uncommons.begin() + M.UncBegin, this), | ||
| 364 | SymbolRef(MEnd, MEnd, nullptr, this)}; | ||
| 365 | } | ||
| 366 | |||
| 367 | /// The contents of the irsymtab in a bitcode file. Any underlying data for the | ||
| 368 | /// irsymtab are owned by Symtab and Strtab. | ||
| 369 | struct FileContents { | ||
| 370 | SmallVector<char, 0> Symtab, Strtab; | ||
| 371 |   Reader TheReader; | ||
| 372 | }; | ||
| 373 | |||
| 374 | /// Reads the contents of a bitcode file, creating its irsymtab if necessary. | ||
| 375 | Expected<FileContents> readBitcode(const BitcodeFileContents &BFC); | ||
| 376 | |||
| 377 | } // end namespace irsymtab | ||
| 378 | } // end namespace llvm | ||
| 379 | |||
| 380 | #endif // LLVM_OBJECT_IRSYMTAB_H |