Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===- MachO.h - MachO object file implementation ---------------*- C++ -*-===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // |
||
9 | // This file declares the MachOObjectFile class, which implement the ObjectFile |
||
10 | // interface for MachO files. |
||
11 | // |
||
12 | //===----------------------------------------------------------------------===// |
||
13 | |||
14 | #ifndef LLVM_OBJECT_MACHO_H |
||
15 | #define LLVM_OBJECT_MACHO_H |
||
16 | |||
17 | #include "llvm/ADT/ArrayRef.h" |
||
18 | #include "llvm/ADT/SmallString.h" |
||
19 | #include "llvm/ADT/SmallVector.h" |
||
20 | #include "llvm/ADT/StringExtras.h" |
||
21 | #include "llvm/ADT/StringRef.h" |
||
22 | #include "llvm/ADT/Triple.h" |
||
23 | #include "llvm/ADT/iterator_range.h" |
||
24 | #include "llvm/BinaryFormat/MachO.h" |
||
25 | #include "llvm/BinaryFormat/Swift.h" |
||
26 | #include "llvm/MC/SubtargetFeature.h" |
||
27 | #include "llvm/Object/Binary.h" |
||
28 | #include "llvm/Object/ObjectFile.h" |
||
29 | #include "llvm/Object/SymbolicFile.h" |
||
30 | #include "llvm/Support/Error.h" |
||
31 | #include "llvm/Support/Format.h" |
||
32 | #include "llvm/Support/MemoryBuffer.h" |
||
33 | #include "llvm/Support/raw_ostream.h" |
||
34 | #include <cstdint> |
||
35 | #include <memory> |
||
36 | #include <string> |
||
37 | #include <system_error> |
||
38 | |||
39 | namespace llvm { |
||
40 | namespace object { |
||
41 | |||
42 | /// DiceRef - This is a value type class that represents a single |
||
43 | /// data in code entry in the table in a Mach-O object file. |
||
44 | class DiceRef { |
||
45 | DataRefImpl DicePimpl; |
||
46 | const ObjectFile *OwningObject = nullptr; |
||
47 | |||
48 | public: |
||
49 | DiceRef() = default; |
||
50 | DiceRef(DataRefImpl DiceP, const ObjectFile *Owner); |
||
51 | |||
52 | bool operator==(const DiceRef &Other) const; |
||
53 | bool operator<(const DiceRef &Other) const; |
||
54 | |||
55 | void moveNext(); |
||
56 | |||
57 | std::error_code getOffset(uint32_t &Result) const; |
||
58 | std::error_code getLength(uint16_t &Result) const; |
||
59 | std::error_code getKind(uint16_t &Result) const; |
||
60 | |||
61 | DataRefImpl getRawDataRefImpl() const; |
||
62 | const ObjectFile *getObjectFile() const; |
||
63 | }; |
||
64 | using dice_iterator = content_iterator<DiceRef>; |
||
65 | |||
66 | /// ExportEntry encapsulates the current-state-of-the-walk used when doing a |
||
67 | /// non-recursive walk of the trie data structure. This allows you to iterate |
||
68 | /// across all exported symbols using: |
||
69 | /// Error Err = Error::success(); |
||
70 | /// for (const llvm::object::ExportEntry &AnExport : Obj->exports(&Err)) { |
||
71 | /// } |
||
72 | /// if (Err) { report error ... |
||
73 | class ExportEntry { |
||
74 | public: |
||
75 | ExportEntry(Error *Err, const MachOObjectFile *O, ArrayRef<uint8_t> Trie); |
||
76 | |||
77 | StringRef name() const; |
||
78 | uint64_t flags() const; |
||
79 | uint64_t address() const; |
||
80 | uint64_t other() const; |
||
81 | StringRef otherName() const; |
||
82 | uint32_t nodeOffset() const; |
||
83 | |||
84 | bool operator==(const ExportEntry &) const; |
||
85 | |||
86 | void moveNext(); |
||
87 | |||
88 | private: |
||
89 | friend class MachOObjectFile; |
||
90 | |||
91 | void moveToFirst(); |
||
92 | void moveToEnd(); |
||
93 | uint64_t readULEB128(const uint8_t *&p, const char **error); |
||
94 | void pushDownUntilBottom(); |
||
95 | void pushNode(uint64_t Offset); |
||
96 | |||
97 | // Represents a node in the mach-o exports trie. |
||
98 | struct NodeState { |
||
99 | NodeState(const uint8_t *Ptr); |
||
100 | |||
101 | const uint8_t *Start; |
||
102 | const uint8_t *Current; |
||
103 | uint64_t Flags = 0; |
||
104 | uint64_t Address = 0; |
||
105 | uint64_t Other = 0; |
||
106 | const char *ImportName = nullptr; |
||
107 | unsigned ChildCount = 0; |
||
108 | unsigned NextChildIndex = 0; |
||
109 | unsigned ParentStringLength = 0; |
||
110 | bool IsExportNode = false; |
||
111 | }; |
||
112 | using NodeList = SmallVector<NodeState, 16>; |
||
113 | using node_iterator = NodeList::const_iterator; |
||
114 | |||
115 | Error *E; |
||
116 | const MachOObjectFile *O; |
||
117 | ArrayRef<uint8_t> Trie; |
||
118 | SmallString<256> CumulativeString; |
||
119 | NodeList Stack; |
||
120 | bool Done = false; |
||
121 | |||
122 | iterator_range<node_iterator> nodes() const { |
||
123 | return make_range(Stack.begin(), Stack.end()); |
||
124 | } |
||
125 | }; |
||
126 | using export_iterator = content_iterator<ExportEntry>; |
||
127 | |||
128 | // Segment info so SegIndex/SegOffset pairs in a Mach-O Bind or Rebase entry |
||
129 | // can be checked and translated. Only the SegIndex/SegOffset pairs from |
||
130 | // checked entries are to be used with the segmentName(), sectionName() and |
||
131 | // address() methods below. |
||
132 | class BindRebaseSegInfo { |
||
133 | public: |
||
134 | BindRebaseSegInfo(const MachOObjectFile *Obj); |
||
135 | |||
136 | // Used to check a Mach-O Bind or Rebase entry for errors when iterating. |
||
137 | const char* checkSegAndOffsets(int32_t SegIndex, uint64_t SegOffset, |
||
138 | uint8_t PointerSize, uint32_t Count=1, |
||
139 | uint32_t Skip=0); |
||
140 | // Used with valid SegIndex/SegOffset values from checked entries. |
||
141 | StringRef segmentName(int32_t SegIndex); |
||
142 | StringRef sectionName(int32_t SegIndex, uint64_t SegOffset); |
||
143 | uint64_t address(uint32_t SegIndex, uint64_t SegOffset); |
||
144 | |||
145 | private: |
||
146 | struct SectionInfo { |
||
147 | uint64_t Address; |
||
148 | uint64_t Size; |
||
149 | StringRef SectionName; |
||
150 | StringRef SegmentName; |
||
151 | uint64_t OffsetInSegment; |
||
152 | uint64_t SegmentStartAddress; |
||
153 | int32_t SegmentIndex; |
||
154 | }; |
||
155 | const SectionInfo &findSection(int32_t SegIndex, uint64_t SegOffset); |
||
156 | |||
157 | SmallVector<SectionInfo, 32> Sections; |
||
158 | int32_t MaxSegIndex; |
||
159 | }; |
||
160 | |||
161 | /// MachORebaseEntry encapsulates the current state in the decompression of |
||
162 | /// rebasing opcodes. This allows you to iterate through the compressed table of |
||
163 | /// rebasing using: |
||
164 | /// Error Err = Error::success(); |
||
165 | /// for (const llvm::object::MachORebaseEntry &Entry : Obj->rebaseTable(&Err)) { |
||
166 | /// } |
||
167 | /// if (Err) { report error ... |
||
168 | class MachORebaseEntry { |
||
169 | public: |
||
170 | MachORebaseEntry(Error *Err, const MachOObjectFile *O, |
||
171 | ArrayRef<uint8_t> opcodes, bool is64Bit); |
||
172 | |||
173 | int32_t segmentIndex() const; |
||
174 | uint64_t segmentOffset() const; |
||
175 | StringRef typeName() const; |
||
176 | StringRef segmentName() const; |
||
177 | StringRef sectionName() const; |
||
178 | uint64_t address() const; |
||
179 | |||
180 | bool operator==(const MachORebaseEntry &) const; |
||
181 | |||
182 | void moveNext(); |
||
183 | |||
184 | private: |
||
185 | friend class MachOObjectFile; |
||
186 | |||
187 | void moveToFirst(); |
||
188 | void moveToEnd(); |
||
189 | uint64_t readULEB128(const char **error); |
||
190 | |||
191 | Error *E; |
||
192 | const MachOObjectFile *O; |
||
193 | ArrayRef<uint8_t> Opcodes; |
||
194 | const uint8_t *Ptr; |
||
195 | uint64_t SegmentOffset = 0; |
||
196 | int32_t SegmentIndex = -1; |
||
197 | uint64_t RemainingLoopCount = 0; |
||
198 | uint64_t AdvanceAmount = 0; |
||
199 | uint8_t RebaseType = 0; |
||
200 | uint8_t PointerSize; |
||
201 | bool Done = false; |
||
202 | }; |
||
203 | using rebase_iterator = content_iterator<MachORebaseEntry>; |
||
204 | |||
205 | /// MachOBindEntry encapsulates the current state in the decompression of |
||
206 | /// binding opcodes. This allows you to iterate through the compressed table of |
||
207 | /// bindings using: |
||
208 | /// Error Err = Error::success(); |
||
209 | /// for (const llvm::object::MachOBindEntry &Entry : Obj->bindTable(&Err)) { |
||
210 | /// } |
||
211 | /// if (Err) { report error ... |
||
212 | class MachOBindEntry { |
||
213 | public: |
||
214 | enum class Kind { Regular, Lazy, Weak }; |
||
215 | |||
216 | MachOBindEntry(Error *Err, const MachOObjectFile *O, |
||
217 | ArrayRef<uint8_t> Opcodes, bool is64Bit, MachOBindEntry::Kind); |
||
218 | |||
219 | int32_t segmentIndex() const; |
||
220 | uint64_t segmentOffset() const; |
||
221 | StringRef typeName() const; |
||
222 | StringRef symbolName() const; |
||
223 | uint32_t flags() const; |
||
224 | int64_t addend() const; |
||
225 | int ordinal() const; |
||
226 | |||
227 | StringRef segmentName() const; |
||
228 | StringRef sectionName() const; |
||
229 | uint64_t address() const; |
||
230 | |||
231 | bool operator==(const MachOBindEntry &) const; |
||
232 | |||
233 | void moveNext(); |
||
234 | |||
235 | private: |
||
236 | friend class MachOObjectFile; |
||
237 | |||
238 | void moveToFirst(); |
||
239 | void moveToEnd(); |
||
240 | uint64_t readULEB128(const char **error); |
||
241 | int64_t readSLEB128(const char **error); |
||
242 | |||
243 | Error *E; |
||
244 | const MachOObjectFile *O; |
||
245 | ArrayRef<uint8_t> Opcodes; |
||
246 | const uint8_t *Ptr; |
||
247 | uint64_t SegmentOffset = 0; |
||
248 | int32_t SegmentIndex = -1; |
||
249 | StringRef SymbolName; |
||
250 | bool LibraryOrdinalSet = false; |
||
251 | int Ordinal = 0; |
||
252 | uint32_t Flags = 0; |
||
253 | int64_t Addend = 0; |
||
254 | uint64_t RemainingLoopCount = 0; |
||
255 | uint64_t AdvanceAmount = 0; |
||
256 | uint8_t BindType = 0; |
||
257 | uint8_t PointerSize; |
||
258 | Kind TableKind; |
||
259 | bool Done = false; |
||
260 | }; |
||
261 | using bind_iterator = content_iterator<MachOBindEntry>; |
||
262 | |||
263 | /// ChainedFixupTarget holds all the information about an external symbol |
||
264 | /// necessary to bind this binary to that symbol. These values are referenced |
||
265 | /// indirectly by chained fixup binds. This structure captures values from all |
||
266 | /// import and symbol formats. |
||
267 | /// |
||
268 | /// Be aware there are two notions of weak here: |
||
269 | /// WeakImport == true |
||
270 | /// The associated bind may be set to 0 if this symbol is missing from its |
||
271 | /// parent library. This is called a "weak import." |
||
272 | /// LibOrdinal == BIND_SPECIAL_DYLIB_WEAK_LOOKUP |
||
273 | /// This symbol may be coalesced with other libraries vending the same |
||
274 | /// symbol. E.g., C++'s "operator new". This is called a "weak bind." |
||
275 | struct ChainedFixupTarget { |
||
276 | public: |
||
277 | ChainedFixupTarget(int LibOrdinal, uint32_t NameOffset, StringRef Symbol, |
||
278 | uint64_t Addend, bool WeakImport) |
||
279 | : LibOrdinal(LibOrdinal), NameOffset(NameOffset), SymbolName(Symbol), |
||
280 | Addend(Addend), WeakImport(WeakImport) {} |
||
281 | |||
282 | int libOrdinal() { return LibOrdinal; } |
||
283 | uint32_t nameOffset() { return NameOffset; } |
||
284 | StringRef symbolName() { return SymbolName; } |
||
285 | uint64_t addend() { return Addend; } |
||
286 | bool weakImport() { return WeakImport; } |
||
287 | bool weakBind() { |
||
288 | return LibOrdinal == MachO::BIND_SPECIAL_DYLIB_WEAK_LOOKUP; |
||
289 | } |
||
290 | |||
291 | private: |
||
292 | int LibOrdinal; |
||
293 | uint32_t NameOffset; |
||
294 | StringRef SymbolName; |
||
295 | uint64_t Addend; |
||
296 | bool WeakImport; |
||
297 | }; |
||
298 | |||
299 | struct ChainedFixupsSegment { |
||
300 | ChainedFixupsSegment(uint8_t SegIdx, uint32_t Offset, |
||
301 | const MachO::dyld_chained_starts_in_segment &Header, |
||
302 | std::vector<uint16_t> &&PageStarts) |
||
303 | : SegIdx(SegIdx), Offset(Offset), Header(Header), |
||
304 | PageStarts(PageStarts){}; |
||
305 | |||
306 | uint32_t SegIdx; |
||
307 | uint32_t Offset; // dyld_chained_starts_in_image::seg_info_offset[SegIdx] |
||
308 | MachO::dyld_chained_starts_in_segment Header; |
||
309 | std::vector<uint16_t> PageStarts; // page_start[] entries, host endianness |
||
310 | }; |
||
311 | |||
312 | /// MachOAbstractFixupEntry is an abstract class representing a fixup in a |
||
313 | /// MH_DYLDLINK file. Fixups generally represent rebases and binds. Binds also |
||
314 | /// subdivide into additional subtypes (weak, lazy, reexport). |
||
315 | /// |
||
316 | /// The two concrete subclasses of MachOAbstractFixupEntry are: |
||
317 | /// |
||
318 | /// MachORebaseBindEntry - for dyld opcode-based tables, including threaded- |
||
319 | /// rebase, where rebases are mixed in with other |
||
320 | /// bind opcodes. |
||
321 | /// MachOChainedFixupEntry - for pointer chains embedded in data pages. |
||
322 | class MachOAbstractFixupEntry { |
||
323 | public: |
||
324 | MachOAbstractFixupEntry(Error *Err, const MachOObjectFile *O); |
||
325 | |||
326 | int32_t segmentIndex() const; |
||
327 | uint64_t segmentOffset() const; |
||
328 | uint64_t segmentAddress() const; |
||
329 | StringRef segmentName() const; |
||
330 | StringRef sectionName() const; |
||
331 | StringRef typeName() const; |
||
332 | StringRef symbolName() const; |
||
333 | uint32_t flags() const; |
||
334 | int64_t addend() const; |
||
335 | int ordinal() const; |
||
336 | |||
337 | /// \return the location of this fixup as a VM Address. For the VM |
||
338 | /// Address this fixup is pointing to, use pointerValue(). |
||
339 | uint64_t address() const; |
||
340 | |||
341 | /// \return the VM Address pointed to by this fixup. Use |
||
342 | /// pointerValue() to compare against other VM Addresses, such as |
||
343 | /// section addresses or segment vmaddrs. |
||
344 | uint64_t pointerValue() const { return PointerValue; } |
||
345 | |||
346 | /// \return the raw "on-disk" representation of the fixup. For |
||
347 | /// Threaded rebases and Chained pointers these values are generally |
||
348 | /// encoded into various different pointer formats. This value is |
||
349 | /// exposed in API for tools that want to display and annotate the |
||
350 | /// raw bits. |
||
351 | uint64_t rawValue() const { return RawValue; } |
||
352 | |||
353 | void moveNext(); |
||
354 | |||
355 | protected: |
||
356 | Error *E; |
||
357 | const MachOObjectFile *O; |
||
358 | uint64_t SegmentOffset = 0; |
||
359 | int32_t SegmentIndex = -1; |
||
360 | StringRef SymbolName; |
||
361 | int32_t Ordinal = 0; |
||
362 | uint32_t Flags = 0; |
||
363 | int64_t Addend = 0; |
||
364 | uint64_t PointerValue = 0; |
||
365 | uint64_t RawValue = 0; |
||
366 | bool Done = false; |
||
367 | |||
368 | void moveToFirst(); |
||
369 | void moveToEnd(); |
||
370 | |||
371 | /// \return the vm address of the start of __TEXT segment. |
||
372 | uint64_t textAddress() const { return TextAddress; } |
||
373 | |||
374 | private: |
||
375 | uint64_t TextAddress; |
||
376 | }; |
||
377 | |||
378 | class MachOChainedFixupEntry : public MachOAbstractFixupEntry { |
||
379 | public: |
||
380 | enum class FixupKind { Bind, Rebase }; |
||
381 | |||
382 | MachOChainedFixupEntry(Error *Err, const MachOObjectFile *O, bool Parse); |
||
383 | |||
384 | bool operator==(const MachOChainedFixupEntry &) const; |
||
385 | |||
386 | bool isBind() const { return Kind == FixupKind::Bind; } |
||
387 | bool isRebase() const { return Kind == FixupKind::Rebase; } |
||
388 | |||
389 | void moveNext(); |
||
390 | void moveToFirst(); |
||
391 | void moveToEnd(); |
||
392 | |||
393 | private: |
||
394 | void findNextPageWithFixups(); |
||
395 | |||
396 | std::vector<ChainedFixupTarget> FixupTargets; |
||
397 | std::vector<ChainedFixupsSegment> Segments; |
||
398 | ArrayRef<uint8_t> SegmentData; |
||
399 | FixupKind Kind; |
||
400 | uint32_t InfoSegIndex = 0; // Index into Segments |
||
401 | uint32_t PageIndex = 0; // Index into Segments[InfoSegIdx].PageStarts |
||
402 | uint32_t PageOffset = 0; // Page offset of the current fixup |
||
403 | }; |
||
404 | using fixup_iterator = content_iterator<MachOChainedFixupEntry>; |
||
405 | |||
406 | class MachOObjectFile : public ObjectFile { |
||
407 | public: |
||
408 | struct LoadCommandInfo { |
||
409 | const char *Ptr; // Where in memory the load command is. |
||
410 | MachO::load_command C; // The command itself. |
||
411 | }; |
||
412 | using LoadCommandList = SmallVector<LoadCommandInfo, 4>; |
||
413 | using load_command_iterator = LoadCommandList::const_iterator; |
||
414 | |||
415 | static Expected<std::unique_ptr<MachOObjectFile>> |
||
416 | create(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits, |
||
417 | uint32_t UniversalCputype = 0, uint32_t UniversalIndex = 0); |
||
418 | |||
419 | static bool isMachOPairedReloc(uint64_t RelocType, uint64_t Arch); |
||
420 | |||
421 | void moveSymbolNext(DataRefImpl &Symb) const override; |
||
422 | |||
423 | uint64_t getNValue(DataRefImpl Sym) const; |
||
424 | Expected<StringRef> getSymbolName(DataRefImpl Symb) const override; |
||
425 | |||
426 | // MachO specific. |
||
427 | Error checkSymbolTable() const; |
||
428 | |||
429 | std::error_code getIndirectName(DataRefImpl Symb, StringRef &Res) const; |
||
430 | unsigned getSectionType(SectionRef Sec) const; |
||
431 | |||
432 | Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const override; |
||
433 | uint32_t getSymbolAlignment(DataRefImpl Symb) const override; |
||
434 | uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override; |
||
435 | Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override; |
||
436 | Expected<uint32_t> getSymbolFlags(DataRefImpl Symb) const override; |
||
437 | Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override; |
||
438 | unsigned getSymbolSectionID(SymbolRef Symb) const; |
||
439 | unsigned getSectionID(SectionRef Sec) const; |
||
440 | |||
441 | void moveSectionNext(DataRefImpl &Sec) const override; |
||
442 | Expected<StringRef> getSectionName(DataRefImpl Sec) const override; |
||
443 | uint64_t getSectionAddress(DataRefImpl Sec) const override; |
||
444 | uint64_t getSectionIndex(DataRefImpl Sec) const override; |
||
445 | uint64_t getSectionSize(DataRefImpl Sec) const override; |
||
446 | ArrayRef<uint8_t> getSectionContents(uint32_t Offset, uint64_t Size) const; |
||
447 | Expected<ArrayRef<uint8_t>> |
||
448 | getSectionContents(DataRefImpl Sec) const override; |
||
449 | uint64_t getSectionAlignment(DataRefImpl Sec) const override; |
||
450 | Expected<SectionRef> getSection(unsigned SectionIndex) const; |
||
451 | Expected<SectionRef> getSection(StringRef SectionName) const; |
||
452 | bool isSectionCompressed(DataRefImpl Sec) const override; |
||
453 | bool isSectionText(DataRefImpl Sec) const override; |
||
454 | bool isSectionData(DataRefImpl Sec) const override; |
||
455 | bool isSectionBSS(DataRefImpl Sec) const override; |
||
456 | bool isSectionVirtual(DataRefImpl Sec) const override; |
||
457 | bool isSectionBitcode(DataRefImpl Sec) const override; |
||
458 | bool isDebugSection(DataRefImpl Sec) const override; |
||
459 | |||
460 | /// Return the raw contents of an entire segment. |
||
461 | ArrayRef<uint8_t> getSegmentContents(StringRef SegmentName) const; |
||
462 | ArrayRef<uint8_t> getSegmentContents(size_t SegmentIndex) const; |
||
463 | |||
464 | /// When dsymutil generates the companion file, it strips all unnecessary |
||
465 | /// sections (e.g. everything in the _TEXT segment) by omitting their body |
||
466 | /// and setting the offset in their corresponding load command to zero. |
||
467 | /// |
||
468 | /// While the load command itself is valid, reading the section corresponds |
||
469 | /// to reading the number of bytes specified in the load command, starting |
||
470 | /// from offset 0 (i.e. the Mach-O header at the beginning of the file). |
||
471 | bool isSectionStripped(DataRefImpl Sec) const override; |
||
472 | |||
473 | relocation_iterator section_rel_begin(DataRefImpl Sec) const override; |
||
474 | relocation_iterator section_rel_end(DataRefImpl Sec) const override; |
||
475 | |||
476 | relocation_iterator extrel_begin() const; |
||
477 | relocation_iterator extrel_end() const; |
||
478 | iterator_range<relocation_iterator> external_relocations() const { |
||
479 | return make_range(extrel_begin(), extrel_end()); |
||
480 | } |
||
481 | |||
482 | relocation_iterator locrel_begin() const; |
||
483 | relocation_iterator locrel_end() const; |
||
484 | |||
485 | void moveRelocationNext(DataRefImpl &Rel) const override; |
||
486 | uint64_t getRelocationOffset(DataRefImpl Rel) const override; |
||
487 | symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override; |
||
488 | section_iterator getRelocationSection(DataRefImpl Rel) const; |
||
489 | uint64_t getRelocationType(DataRefImpl Rel) const override; |
||
490 | void getRelocationTypeName(DataRefImpl Rel, |
||
491 | SmallVectorImpl<char> &Result) const override; |
||
492 | uint8_t getRelocationLength(DataRefImpl Rel) const; |
||
493 | |||
494 | // MachO specific. |
||
495 | std::error_code getLibraryShortNameByIndex(unsigned Index, StringRef &) const; |
||
496 | uint32_t getLibraryCount() const; |
||
497 | |||
498 | section_iterator getRelocationRelocatedSection(relocation_iterator Rel) const; |
||
499 | |||
500 | // TODO: Would be useful to have an iterator based version |
||
501 | // of the load command interface too. |
||
502 | |||
503 | basic_symbol_iterator symbol_begin() const override; |
||
504 | basic_symbol_iterator symbol_end() const override; |
||
505 | |||
506 | // MachO specific. |
||
507 | symbol_iterator getSymbolByIndex(unsigned Index) const; |
||
508 | uint64_t getSymbolIndex(DataRefImpl Symb) const; |
||
509 | |||
510 | section_iterator section_begin() const override; |
||
511 | section_iterator section_end() const override; |
||
512 | |||
513 | uint8_t getBytesInAddress() const override; |
||
514 | |||
515 | StringRef getFileFormatName() const override; |
||
516 | Triple::ArchType getArch() const override; |
||
517 | Expected<SubtargetFeatures> getFeatures() const override { |
||
518 | return SubtargetFeatures(); |
||
519 | } |
||
520 | Triple getArchTriple(const char **McpuDefault = nullptr) const; |
||
521 | |||
522 | relocation_iterator section_rel_begin(unsigned Index) const; |
||
523 | relocation_iterator section_rel_end(unsigned Index) const; |
||
524 | |||
525 | dice_iterator begin_dices() const; |
||
526 | dice_iterator end_dices() const; |
||
527 | |||
528 | load_command_iterator begin_load_commands() const; |
||
529 | load_command_iterator end_load_commands() const; |
||
530 | iterator_range<load_command_iterator> load_commands() const; |
||
531 | |||
532 | /// For use iterating over all exported symbols. |
||
533 | iterator_range<export_iterator> exports(Error &Err) const; |
||
534 | |||
535 | /// For use examining a trie not in a MachOObjectFile. |
||
536 | static iterator_range<export_iterator> exports(Error &Err, |
||
537 | ArrayRef<uint8_t> Trie, |
||
538 | const MachOObjectFile *O = |
||
539 | nullptr); |
||
540 | |||
541 | /// For use iterating over all rebase table entries. |
||
542 | iterator_range<rebase_iterator> rebaseTable(Error &Err); |
||
543 | |||
544 | /// For use examining rebase opcodes in a MachOObjectFile. |
||
545 | static iterator_range<rebase_iterator> rebaseTable(Error &Err, |
||
546 | MachOObjectFile *O, |
||
547 | ArrayRef<uint8_t> Opcodes, |
||
548 | bool is64); |
||
549 | |||
550 | /// For use iterating over all bind table entries. |
||
551 | iterator_range<bind_iterator> bindTable(Error &Err); |
||
552 | |||
553 | /// For iterating over all chained fixups. |
||
554 | iterator_range<fixup_iterator> fixupTable(Error &Err); |
||
555 | |||
556 | /// For use iterating over all lazy bind table entries. |
||
557 | iterator_range<bind_iterator> lazyBindTable(Error &Err); |
||
558 | |||
559 | /// For use iterating over all weak bind table entries. |
||
560 | iterator_range<bind_iterator> weakBindTable(Error &Err); |
||
561 | |||
562 | /// For use examining bind opcodes in a MachOObjectFile. |
||
563 | static iterator_range<bind_iterator> bindTable(Error &Err, |
||
564 | MachOObjectFile *O, |
||
565 | ArrayRef<uint8_t> Opcodes, |
||
566 | bool is64, |
||
567 | MachOBindEntry::Kind); |
||
568 | |||
569 | // Given a SegIndex, SegOffset, and PointerSize, verify a valid section exists |
||
570 | // that fully contains a pointer at that location. Multiple fixups in a bind |
||
571 | // (such as with the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode) can |
||
572 | // be tested via the Count and Skip parameters. |
||
573 | // |
||
574 | // This is used by MachOBindEntry::moveNext() to validate a MachOBindEntry. |
||
575 | const char *BindEntryCheckSegAndOffsets(int32_t SegIndex, uint64_t SegOffset, |
||
576 | uint8_t PointerSize, uint32_t Count=1, |
||
577 | uint32_t Skip=0) const { |
||
578 | return BindRebaseSectionTable->checkSegAndOffsets(SegIndex, SegOffset, |
||
579 | PointerSize, Count, Skip); |
||
580 | } |
||
581 | |||
582 | // Given a SegIndex, SegOffset, and PointerSize, verify a valid section exists |
||
583 | // that fully contains a pointer at that location. Multiple fixups in a rebase |
||
584 | // (such as with the REBASE_OPCODE_DO_*_TIMES* opcodes) can be tested via the |
||
585 | // Count and Skip parameters. |
||
586 | // |
||
587 | // This is used by MachORebaseEntry::moveNext() to validate a MachORebaseEntry |
||
588 | const char *RebaseEntryCheckSegAndOffsets(int32_t SegIndex, |
||
589 | uint64_t SegOffset, |
||
590 | uint8_t PointerSize, |
||
591 | uint32_t Count=1, |
||
592 | uint32_t Skip=0) const { |
||
593 | return BindRebaseSectionTable->checkSegAndOffsets(SegIndex, SegOffset, |
||
594 | PointerSize, Count, Skip); |
||
595 | } |
||
596 | |||
597 | /// For use with the SegIndex of a checked Mach-O Bind or Rebase entry to |
||
598 | /// get the segment name. |
||
599 | StringRef BindRebaseSegmentName(int32_t SegIndex) const { |
||
600 | return BindRebaseSectionTable->segmentName(SegIndex); |
||
601 | } |
||
602 | |||
603 | /// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or |
||
604 | /// Rebase entry to get the section name. |
||
605 | StringRef BindRebaseSectionName(uint32_t SegIndex, uint64_t SegOffset) const { |
||
606 | return BindRebaseSectionTable->sectionName(SegIndex, SegOffset); |
||
607 | } |
||
608 | |||
609 | /// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or |
||
610 | /// Rebase entry to get the address. |
||
611 | uint64_t BindRebaseAddress(uint32_t SegIndex, uint64_t SegOffset) const { |
||
612 | return BindRebaseSectionTable->address(SegIndex, SegOffset); |
||
613 | } |
||
614 | |||
615 | // In a MachO file, sections have a segment name. This is used in the .o |
||
616 | // files. They have a single segment, but this field specifies which segment |
||
617 | // a section should be put in the final object. |
||
618 | StringRef getSectionFinalSegmentName(DataRefImpl Sec) const; |
||
619 | |||
620 | // Names are stored as 16 bytes. These returns the raw 16 bytes without |
||
621 | // interpreting them as a C string. |
||
622 | ArrayRef<char> getSectionRawName(DataRefImpl Sec) const; |
||
623 | ArrayRef<char> getSectionRawFinalSegmentName(DataRefImpl Sec) const; |
||
624 | |||
625 | // MachO specific Info about relocations. |
||
626 | bool isRelocationScattered(const MachO::any_relocation_info &RE) const; |
||
627 | unsigned getPlainRelocationSymbolNum( |
||
628 | const MachO::any_relocation_info &RE) const; |
||
629 | bool getPlainRelocationExternal(const MachO::any_relocation_info &RE) const; |
||
630 | bool getScatteredRelocationScattered( |
||
631 | const MachO::any_relocation_info &RE) const; |
||
632 | uint32_t getScatteredRelocationValue( |
||
633 | const MachO::any_relocation_info &RE) const; |
||
634 | uint32_t getScatteredRelocationType( |
||
635 | const MachO::any_relocation_info &RE) const; |
||
636 | unsigned getAnyRelocationAddress(const MachO::any_relocation_info &RE) const; |
||
637 | unsigned getAnyRelocationPCRel(const MachO::any_relocation_info &RE) const; |
||
638 | unsigned getAnyRelocationLength(const MachO::any_relocation_info &RE) const; |
||
639 | unsigned getAnyRelocationType(const MachO::any_relocation_info &RE) const; |
||
640 | SectionRef getAnyRelocationSection(const MachO::any_relocation_info &RE) const; |
||
641 | |||
642 | // MachO specific structures. |
||
643 | MachO::section getSection(DataRefImpl DRI) const; |
||
644 | MachO::section_64 getSection64(DataRefImpl DRI) const; |
||
645 | MachO::section getSection(const LoadCommandInfo &L, unsigned Index) const; |
||
646 | MachO::section_64 getSection64(const LoadCommandInfo &L,unsigned Index) const; |
||
647 | MachO::nlist getSymbolTableEntry(DataRefImpl DRI) const; |
||
648 | MachO::nlist_64 getSymbol64TableEntry(DataRefImpl DRI) const; |
||
649 | |||
650 | MachO::linkedit_data_command |
||
651 | getLinkeditDataLoadCommand(const LoadCommandInfo &L) const; |
||
652 | MachO::segment_command |
||
653 | getSegmentLoadCommand(const LoadCommandInfo &L) const; |
||
654 | MachO::segment_command_64 |
||
655 | getSegment64LoadCommand(const LoadCommandInfo &L) const; |
||
656 | MachO::linker_option_command |
||
657 | getLinkerOptionLoadCommand(const LoadCommandInfo &L) const; |
||
658 | MachO::version_min_command |
||
659 | getVersionMinLoadCommand(const LoadCommandInfo &L) const; |
||
660 | MachO::note_command |
||
661 | getNoteLoadCommand(const LoadCommandInfo &L) const; |
||
662 | MachO::build_version_command |
||
663 | getBuildVersionLoadCommand(const LoadCommandInfo &L) const; |
||
664 | MachO::build_tool_version |
||
665 | getBuildToolVersion(unsigned index) const; |
||
666 | MachO::dylib_command |
||
667 | getDylibIDLoadCommand(const LoadCommandInfo &L) const; |
||
668 | MachO::dyld_info_command |
||
669 | getDyldInfoLoadCommand(const LoadCommandInfo &L) const; |
||
670 | MachO::dylinker_command |
||
671 | getDylinkerCommand(const LoadCommandInfo &L) const; |
||
672 | MachO::uuid_command |
||
673 | getUuidCommand(const LoadCommandInfo &L) const; |
||
674 | MachO::rpath_command |
||
675 | getRpathCommand(const LoadCommandInfo &L) const; |
||
676 | MachO::source_version_command |
||
677 | getSourceVersionCommand(const LoadCommandInfo &L) const; |
||
678 | MachO::entry_point_command |
||
679 | getEntryPointCommand(const LoadCommandInfo &L) const; |
||
680 | MachO::encryption_info_command |
||
681 | getEncryptionInfoCommand(const LoadCommandInfo &L) const; |
||
682 | MachO::encryption_info_command_64 |
||
683 | getEncryptionInfoCommand64(const LoadCommandInfo &L) const; |
||
684 | MachO::sub_framework_command |
||
685 | getSubFrameworkCommand(const LoadCommandInfo &L) const; |
||
686 | MachO::sub_umbrella_command |
||
687 | getSubUmbrellaCommand(const LoadCommandInfo &L) const; |
||
688 | MachO::sub_library_command |
||
689 | getSubLibraryCommand(const LoadCommandInfo &L) const; |
||
690 | MachO::sub_client_command |
||
691 | getSubClientCommand(const LoadCommandInfo &L) const; |
||
692 | MachO::routines_command |
||
693 | getRoutinesCommand(const LoadCommandInfo &L) const; |
||
694 | MachO::routines_command_64 |
||
695 | getRoutinesCommand64(const LoadCommandInfo &L) const; |
||
696 | MachO::thread_command |
||
697 | getThreadCommand(const LoadCommandInfo &L) const; |
||
698 | |||
699 | MachO::any_relocation_info getRelocation(DataRefImpl Rel) const; |
||
700 | MachO::data_in_code_entry getDice(DataRefImpl Rel) const; |
||
701 | const MachO::mach_header &getHeader() const; |
||
702 | const MachO::mach_header_64 &getHeader64() const; |
||
703 | uint32_t |
||
704 | getIndirectSymbolTableEntry(const MachO::dysymtab_command &DLC, |
||
705 | unsigned Index) const; |
||
706 | MachO::data_in_code_entry getDataInCodeTableEntry(uint32_t DataOffset, |
||
707 | unsigned Index) const; |
||
708 | MachO::symtab_command getSymtabLoadCommand() const; |
||
709 | MachO::dysymtab_command getDysymtabLoadCommand() const; |
||
710 | MachO::linkedit_data_command getDataInCodeLoadCommand() const; |
||
711 | MachO::linkedit_data_command getLinkOptHintsLoadCommand() const; |
||
712 | ArrayRef<uint8_t> getDyldInfoRebaseOpcodes() const; |
||
713 | ArrayRef<uint8_t> getDyldInfoBindOpcodes() const; |
||
714 | ArrayRef<uint8_t> getDyldInfoWeakBindOpcodes() const; |
||
715 | ArrayRef<uint8_t> getDyldInfoLazyBindOpcodes() const; |
||
716 | ArrayRef<uint8_t> getDyldInfoExportsTrie() const; |
||
717 | |||
718 | /// If the optional is None, no header was found, but the object was |
||
719 | /// well-formed. |
||
720 | Expected<std::optional<MachO::dyld_chained_fixups_header>> |
||
721 | getChainedFixupsHeader() const; |
||
722 | Expected<std::vector<ChainedFixupTarget>> getDyldChainedFixupTargets() const; |
||
723 | |||
724 | // Note: This is a limited, temporary API, which will be removed when Apple |
||
725 | // upstreams their implementation. Please do not rely on this. |
||
726 | Expected<std::optional<MachO::linkedit_data_command>> |
||
727 | getChainedFixupsLoadCommand() const; |
||
728 | // Returns the number of sections listed in dyld_chained_starts_in_image, and |
||
729 | // a ChainedFixupsSegment for each segment that has fixups. |
||
730 | Expected<std::pair<size_t, std::vector<ChainedFixupsSegment>>> |
||
731 | getChainedFixupsSegments() const; |
||
732 | ArrayRef<uint8_t> getDyldExportsTrie() const; |
||
733 | |||
734 | SmallVector<uint64_t> getFunctionStarts() const; |
||
735 | ArrayRef<uint8_t> getUuid() const; |
||
736 | |||
737 | StringRef getStringTableData() const; |
||
738 | bool is64Bit() const; |
||
739 | void ReadULEB128s(uint64_t Index, SmallVectorImpl<uint64_t> &Out) const; |
||
740 | |||
741 | static StringRef guessLibraryShortName(StringRef Name, bool &isFramework, |
||
742 | StringRef &Suffix); |
||
743 | |||
744 | static Triple::ArchType getArch(uint32_t CPUType, uint32_t CPUSubType); |
||
745 | static Triple getArchTriple(uint32_t CPUType, uint32_t CPUSubType, |
||
746 | const char **McpuDefault = nullptr, |
||
747 | const char **ArchFlag = nullptr); |
||
748 | static bool isValidArch(StringRef ArchFlag); |
||
749 | static ArrayRef<StringRef> getValidArchs(); |
||
750 | static Triple getHostArch(); |
||
751 | |||
752 | bool isRelocatableObject() const override; |
||
753 | |||
754 | StringRef mapDebugSectionName(StringRef Name) const override; |
||
755 | |||
756 | llvm::binaryformat::Swift5ReflectionSectionKind |
||
757 | mapReflectionSectionNameToEnumValue(StringRef SectionName) const override; |
||
758 | |||
759 | bool hasPageZeroSegment() const { return HasPageZeroSegment; } |
||
760 | |||
761 | static bool classof(const Binary *v) { |
||
762 | return v->isMachO(); |
||
763 | } |
||
764 | |||
765 | static uint32_t |
||
766 | getVersionMinMajor(MachO::version_min_command &C, bool SDK) { |
||
767 | uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version; |
||
768 | return (VersionOrSDK >> 16) & 0xffff; |
||
769 | } |
||
770 | |||
771 | static uint32_t |
||
772 | getVersionMinMinor(MachO::version_min_command &C, bool SDK) { |
||
773 | uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version; |
||
774 | return (VersionOrSDK >> 8) & 0xff; |
||
775 | } |
||
776 | |||
777 | static uint32_t |
||
778 | getVersionMinUpdate(MachO::version_min_command &C, bool SDK) { |
||
779 | uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version; |
||
780 | return VersionOrSDK & 0xff; |
||
781 | } |
||
782 | |||
783 | static std::string getBuildPlatform(uint32_t platform) { |
||
784 | switch (platform) { |
||
785 | case MachO::PLATFORM_MACOS: return "macos"; |
||
786 | case MachO::PLATFORM_IOS: return "ios"; |
||
787 | case MachO::PLATFORM_TVOS: return "tvos"; |
||
788 | case MachO::PLATFORM_WATCHOS: return "watchos"; |
||
789 | case MachO::PLATFORM_BRIDGEOS: return "bridgeos"; |
||
790 | case MachO::PLATFORM_MACCATALYST: return "macCatalyst"; |
||
791 | case MachO::PLATFORM_IOSSIMULATOR: return "iossimulator"; |
||
792 | case MachO::PLATFORM_TVOSSIMULATOR: return "tvossimulator"; |
||
793 | case MachO::PLATFORM_WATCHOSSIMULATOR: return "watchossimulator"; |
||
794 | case MachO::PLATFORM_DRIVERKIT: return "driverkit"; |
||
795 | default: |
||
796 | std::string ret; |
||
797 | raw_string_ostream ss(ret); |
||
798 | ss << format_hex(platform, 8, true); |
||
799 | return ss.str(); |
||
800 | } |
||
801 | } |
||
802 | |||
803 | static std::string getBuildTool(uint32_t tools) { |
||
804 | switch (tools) { |
||
805 | case MachO::TOOL_CLANG: return "clang"; |
||
806 | case MachO::TOOL_SWIFT: return "swift"; |
||
807 | case MachO::TOOL_LD: return "ld"; |
||
808 | default: |
||
809 | std::string ret; |
||
810 | raw_string_ostream ss(ret); |
||
811 | ss << format_hex(tools, 8, true); |
||
812 | return ss.str(); |
||
813 | } |
||
814 | } |
||
815 | |||
816 | static std::string getVersionString(uint32_t version) { |
||
817 | uint32_t major = (version >> 16) & 0xffff; |
||
818 | uint32_t minor = (version >> 8) & 0xff; |
||
819 | uint32_t update = version & 0xff; |
||
820 | |||
821 | SmallString<32> Version; |
||
822 | Version = utostr(major) + "." + utostr(minor); |
||
823 | if (update != 0) |
||
824 | Version += "." + utostr(update); |
||
825 | return std::string(std::string(Version.str())); |
||
826 | } |
||
827 | |||
828 | /// If the input path is a .dSYM bundle (as created by the dsymutil tool), |
||
829 | /// return the paths to the object files found in the bundle, otherwise return |
||
830 | /// an empty vector. If the path appears to be a .dSYM bundle but no objects |
||
831 | /// were found or there was a filesystem error, then return an error. |
||
832 | static Expected<std::vector<std::string>> |
||
833 | findDsymObjectMembers(StringRef Path); |
||
834 | |||
835 | private: |
||
836 | MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits, |
||
837 | Error &Err, uint32_t UniversalCputype = 0, |
||
838 | uint32_t UniversalIndex = 0); |
||
839 | |||
840 | uint64_t getSymbolValueImpl(DataRefImpl Symb) const override; |
||
841 | |||
842 | union { |
||
843 | MachO::mach_header_64 Header64; |
||
844 | MachO::mach_header Header; |
||
845 | }; |
||
846 | using SectionList = SmallVector<const char*, 1>; |
||
847 | SectionList Sections; |
||
848 | using LibraryList = SmallVector<const char*, 1>; |
||
849 | LibraryList Libraries; |
||
850 | LoadCommandList LoadCommands; |
||
851 | using LibraryShortName = SmallVector<StringRef, 1>; |
||
852 | using BuildToolList = SmallVector<const char*, 1>; |
||
853 | BuildToolList BuildTools; |
||
854 | mutable LibraryShortName LibrariesShortNames; |
||
855 | std::unique_ptr<BindRebaseSegInfo> BindRebaseSectionTable; |
||
856 | const char *SymtabLoadCmd = nullptr; |
||
857 | const char *DysymtabLoadCmd = nullptr; |
||
858 | const char *DataInCodeLoadCmd = nullptr; |
||
859 | const char *LinkOptHintsLoadCmd = nullptr; |
||
860 | const char *DyldInfoLoadCmd = nullptr; |
||
861 | const char *FuncStartsLoadCmd = nullptr; |
||
862 | const char *DyldChainedFixupsLoadCmd = nullptr; |
||
863 | const char *DyldExportsTrieLoadCmd = nullptr; |
||
864 | const char *UuidLoadCmd = nullptr; |
||
865 | bool HasPageZeroSegment = false; |
||
866 | }; |
||
867 | |||
868 | /// DiceRef |
||
869 | inline DiceRef::DiceRef(DataRefImpl DiceP, const ObjectFile *Owner) |
||
870 | : DicePimpl(DiceP) , OwningObject(Owner) {} |
||
871 | |||
872 | inline bool DiceRef::operator==(const DiceRef &Other) const { |
||
873 | return DicePimpl == Other.DicePimpl; |
||
874 | } |
||
875 | |||
876 | inline bool DiceRef::operator<(const DiceRef &Other) const { |
||
877 | return DicePimpl < Other.DicePimpl; |
||
878 | } |
||
879 | |||
880 | inline void DiceRef::moveNext() { |
||
881 | const MachO::data_in_code_entry *P = |
||
882 | reinterpret_cast<const MachO::data_in_code_entry *>(DicePimpl.p); |
||
883 | DicePimpl.p = reinterpret_cast<uintptr_t>(P + 1); |
||
884 | } |
||
885 | |||
886 | // Since a Mach-O data in code reference, a DiceRef, can only be created when |
||
887 | // the OwningObject ObjectFile is a MachOObjectFile a static_cast<> is used for |
||
888 | // the methods that get the values of the fields of the reference. |
||
889 | |||
890 | inline std::error_code DiceRef::getOffset(uint32_t &Result) const { |
||
891 | const MachOObjectFile *MachOOF = |
||
892 | static_cast<const MachOObjectFile *>(OwningObject); |
||
893 | MachO::data_in_code_entry Dice = MachOOF->getDice(DicePimpl); |
||
894 | Result = Dice.offset; |
||
895 | return std::error_code(); |
||
896 | } |
||
897 | |||
898 | inline std::error_code DiceRef::getLength(uint16_t &Result) const { |
||
899 | const MachOObjectFile *MachOOF = |
||
900 | static_cast<const MachOObjectFile *>(OwningObject); |
||
901 | MachO::data_in_code_entry Dice = MachOOF->getDice(DicePimpl); |
||
902 | Result = Dice.length; |
||
903 | return std::error_code(); |
||
904 | } |
||
905 | |||
906 | inline std::error_code DiceRef::getKind(uint16_t &Result) const { |
||
907 | const MachOObjectFile *MachOOF = |
||
908 | static_cast<const MachOObjectFile *>(OwningObject); |
||
909 | MachO::data_in_code_entry Dice = MachOOF->getDice(DicePimpl); |
||
910 | Result = Dice.kind; |
||
911 | return std::error_code(); |
||
912 | } |
||
913 | |||
914 | inline DataRefImpl DiceRef::getRawDataRefImpl() const { |
||
915 | return DicePimpl; |
||
916 | } |
||
917 | |||
918 | inline const ObjectFile *DiceRef::getObjectFile() const { |
||
919 | return OwningObject; |
||
920 | } |
||
921 | |||
922 | } // end namespace object |
||
923 | } // end namespace llvm |
||
924 | |||
925 | #endif // LLVM_OBJECT_MACHO_H |