Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
//===- GsymCreator.h --------------------------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
 
9
#ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H
10
#define LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H
11
 
12
#include <functional>
13
#include <memory>
14
#include <mutex>
15
#include <thread>
16
 
17
#include "llvm/ADT/AddressRanges.h"
18
#include "llvm/ADT/ArrayRef.h"
19
#include "llvm/ADT/StringSet.h"
20
#include "llvm/DebugInfo/GSYM/FileEntry.h"
21
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
22
#include "llvm/MC/StringTableBuilder.h"
23
#include "llvm/Support/Endian.h"
24
#include "llvm/Support/Error.h"
25
#include "llvm/Support/Path.h"
26
 
27
namespace llvm {
28
 
29
namespace gsym {
30
class FileWriter;
31
 
32
/// GsymCreator is used to emit GSYM data to a stand alone file or section
33
/// within a file.
34
///
35
/// The GsymCreator is designed to be used in 3 stages:
36
/// - Create FunctionInfo objects and add them
37
/// - Finalize the GsymCreator object
38
/// - Save to file or section
39
///
40
/// The first stage involves creating FunctionInfo objects from another source
41
/// of information like compiler debug info metadata, DWARF or Breakpad files.
42
/// Any strings in the FunctionInfo or contained information, like InlineInfo
43
/// or LineTable objects, should get the string table offsets by calling
44
/// GsymCreator::insertString(...). Any file indexes that are needed should be
45
/// obtained by calling GsymCreator::insertFile(...). All of the function calls
46
/// in GsymCreator are thread safe. This allows multiple threads to create and
47
/// add FunctionInfo objects while parsing debug information.
48
///
49
/// Once all of the FunctionInfo objects have been added, the
50
/// GsymCreator::finalize(...) must be called prior to saving. This function
51
/// will sort the FunctionInfo objects, finalize the string table, and do any
52
/// other passes on the information needed to prepare the information to be
53
/// saved.
54
///
55
/// Once the object has been finalized, it can be saved to a file or section.
56
///
57
/// ENCODING
58
///
59
/// GSYM files are designed to be memory mapped into a process as shared, read
60
/// only data, and used as is.
61
///
62
/// The GSYM file format when in a stand alone file consists of:
63
///   - Header
64
///   - Address Table
65
///   - Function Info Offsets
66
///   - File Table
67
///   - String Table
68
///   - Function Info Data
69
///
70
/// HEADER
71
///
72
/// The header is fully described in "llvm/DebugInfo/GSYM/Header.h".
73
///
74
/// ADDRESS TABLE
75
///
76
/// The address table immediately follows the header in the file and consists
77
/// of Header.NumAddresses address offsets. These offsets are sorted and can be
78
/// binary searched for efficient lookups. Addresses in the address table are
79
/// stored as offsets from a 64 bit base address found in Header.BaseAddress.
80
/// This allows the address table to contain 8, 16, or 32 offsets. This allows
81
/// the address table to not require full 64 bit addresses for each address.
82
/// The resulting GSYM size is smaller and causes fewer pages to be touched
83
/// during address lookups when the address table is smaller. The size of the
84
/// address offsets in the address table is specified in the header in
85
/// Header.AddrOffSize. The first offset in the address table is aligned to
86
/// Header.AddrOffSize alignment to ensure efficient access when loaded into
87
/// memory.
88
///
89
/// FUNCTION INFO OFFSETS TABLE
90
///
91
/// The function info offsets table immediately follows the address table and
92
/// consists of Header.NumAddresses 32 bit file offsets: one for each address
93
/// in the address table. This data is aligned to a 4 byte boundary. The
94
/// offsets in this table are the relative offsets from the start offset of the
95
/// GSYM header and point to the function info data for each address in the
96
/// address table. Keeping this data separate from the address table helps to
97
/// reduce the number of pages that are touched when address lookups occur on a
98
/// GSYM file.
99
///
100
/// FILE TABLE
101
///
102
/// The file table immediately follows the function info offsets table. The
103
/// encoding of the FileTable is:
104
///
105
/// struct FileTable {
106
///   uint32_t Count;
107
///   FileEntry Files[];
108
/// };
109
///
110
/// The file table starts with a 32 bit count of the number of files that are
111
/// used in all of the function info, followed by that number of FileEntry
112
/// structures. The file table is aligned to a 4 byte boundary, Each file in
113
/// the file table is represented with a FileEntry structure.
114
/// See "llvm/DebugInfo/GSYM/FileEntry.h" for details.
115
///
116
/// STRING TABLE
117
///
118
/// The string table follows the file table in stand alone GSYM files and
119
/// contains all strings for everything contained in the GSYM file. Any string
120
/// data should be added to the string table and any references to strings
121
/// inside GSYM information must be stored as 32 bit string table offsets into
122
/// this string table. The string table always starts with an empty string at
123
/// offset zero and is followed by any strings needed by the GSYM information.
124
/// The start of the string table is not aligned to any boundary.
125
///
126
/// FUNCTION INFO DATA
127
///
128
/// The function info data is the payload that contains information about the
129
/// address that is being looked up. It contains all of the encoded
130
/// FunctionInfo objects. Each encoded FunctionInfo's data is pointed to by an
131
/// entry in the Function Info Offsets Table. For details on the exact encoding
132
/// of FunctionInfo objects, see "llvm/DebugInfo/GSYM/FunctionInfo.h".
133
class GsymCreator {
134
  // Private member variables require Mutex protections
135
  mutable std::mutex Mutex;
136
  std::vector<FunctionInfo> Funcs;
137
  StringTableBuilder StrTab;
138
  StringSet<> StringStorage;
139
  DenseMap<llvm::gsym::FileEntry, uint32_t> FileEntryToIndex;
140
  std::vector<llvm::gsym::FileEntry> Files;
141
  std::vector<uint8_t> UUID;
142
  std::optional<AddressRanges> ValidTextRanges;
143
  AddressRanges Ranges;
144
  std::optional<uint64_t> BaseAddress;
145
  bool Finalized = false;
146
  bool Quiet;
147
 
148
public:
149
  GsymCreator(bool Quiet = false);
150
 
151
  /// Save a GSYM file to a stand alone file.
152
  ///
153
  /// \param Path The file path to save the GSYM file to.
154
  /// \param ByteOrder The endianness to use when saving the file.
155
  /// \returns An error object that indicates success or failure of the save.
156
  llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder) const;
157
 
158
  /// Encode a GSYM into the file writer stream at the current position.
159
  ///
160
  /// \param O The stream to save the binary data to
161
  /// \returns An error object that indicates success or failure of the save.
162
  llvm::Error encode(FileWriter &O) const;
163
 
164
  /// Insert a string into the GSYM string table.
165
  ///
166
  /// All strings used by GSYM files must be uniqued by adding them to this
167
  /// string pool and using the returned offset for any string values.
168
  ///
169
  /// \param S The string to insert into the string table.
170
  /// \param Copy If true, then make a backing copy of the string. If false,
171
  ///             the string is owned by another object that will stay around
172
  ///             long enough for the GsymCreator to save the GSYM file.
173
  /// \returns The unique 32 bit offset into the string table.
174
  uint32_t insertString(StringRef S, bool Copy = true);
175
 
176
  /// Insert a file into this GSYM creator.
177
  ///
178
  /// Inserts a file by adding a FileEntry into the "Files" member variable if
179
  /// the file has not already been added. The file path is split into
180
  /// directory and filename which are both added to the string table. This
181
  /// allows paths to be stored efficiently by reusing the directories that are
182
  /// common between multiple files.
183
  ///
184
  /// \param   Path The path to the file to insert.
185
  /// \param   Style The path style for the "Path" parameter.
186
  /// \returns The unique file index for the inserted file.
187
  uint32_t insertFile(StringRef Path,
188
                      sys::path::Style Style = sys::path::Style::native);
189
 
190
  /// Add a function info to this GSYM creator.
191
  ///
192
  /// All information in the FunctionInfo object must use the
193
  /// GsymCreator::insertString(...) function when creating string table
194
  /// offsets for names and other strings.
195
  ///
196
  /// \param   FI The function info object to emplace into our functions list.
197
  void addFunctionInfo(FunctionInfo &&FI);
198
 
199
  /// Finalize the data in the GSYM creator prior to saving the data out.
200
  ///
201
  /// Finalize must be called after all FunctionInfo objects have been added
202
  /// and before GsymCreator::save() is called.
203
  ///
204
  /// \param  OS Output stream to report duplicate function infos, overlapping
205
  ///         function infos, and function infos that were merged or removed.
206
  /// \returns An error object that indicates success or failure of the
207
  ///          finalize.
208
  llvm::Error finalize(llvm::raw_ostream &OS);
209
 
210
  /// Set the UUID value.
211
  ///
212
  /// \param UUIDBytes The new UUID bytes.
213
  void setUUID(llvm::ArrayRef<uint8_t> UUIDBytes) {
214
    UUID.assign(UUIDBytes.begin(), UUIDBytes.end());
215
  }
216
 
217
  /// Thread safe iteration over all function infos.
218
  ///
219
  /// \param  Callback A callback function that will get called with each
220
  ///         FunctionInfo. If the callback returns false, stop iterating.
221
  void forEachFunctionInfo(
222
      std::function<bool(FunctionInfo &)> const &Callback);
223
 
224
  /// Thread safe const iteration over all function infos.
225
  ///
226
  /// \param  Callback A callback function that will get called with each
227
  ///         FunctionInfo. If the callback returns false, stop iterating.
228
  void forEachFunctionInfo(
229
      std::function<bool(const FunctionInfo &)> const &Callback) const;
230
 
231
  /// Get the current number of FunctionInfo objects contained in this
232
  /// object.
233
  size_t getNumFunctionInfos() const;
234
 
235
  /// Check if an address has already been added as a function info.
236
  ///
237
  /// FunctionInfo data can come from many sources: debug info, symbol tables,
238
  /// exception information, and more. Symbol tables should be added after
239
  /// debug info and can use this function to see if a symbol's start address
240
  /// has already been added to the GsymReader. Calling this before adding
241
  /// a function info from a source other than debug info avoids clients adding
242
  /// many redundant FunctionInfo objects from many sources only for them to be
243
  /// removed during the finalize() call.
244
  bool hasFunctionInfoForAddress(uint64_t Addr) const;
245
 
246
  /// Set valid .text address ranges that all functions must be contained in.
247
  void SetValidTextRanges(AddressRanges &TextRanges) {
248
    ValidTextRanges = TextRanges;
249
  }
250
 
251
  /// Get the valid text ranges.
252
  const std::optional<AddressRanges> GetValidTextRanges() const {
253
    return ValidTextRanges;
254
  }
255
 
256
  /// Check if an address is a valid code address.
257
  ///
258
  /// Any functions whose addresses do not exist within these function bounds
259
  /// will not be converted into the final GSYM. This allows the object file
260
  /// to figure out the valid file address ranges of all the code sections
261
  /// and ensure we don't add invalid functions to the final output. Many
262
  /// linkers have issues when dead stripping functions from DWARF debug info
263
  /// where they set the DW_AT_low_pc to zero, but newer DWARF has the
264
  /// DW_AT_high_pc as an offset from the DW_AT_low_pc and these size
265
  /// attributes have no relocations that can be applied. This results in DWARF
266
  /// where many functions have an DW_AT_low_pc of zero and a valid offset size
267
  /// for DW_AT_high_pc. If we extract all valid ranges from an object file
268
  /// that are marked with executable permissions, we can properly ensure that
269
  /// these functions are removed.
270
  ///
271
  /// \param Addr An address to check.
272
  ///
273
  /// \returns True if the address is in the valid text ranges or if no valid
274
  ///          text ranges have been set, false otherwise.
275
  bool IsValidTextAddress(uint64_t Addr) const;
276
 
277
  /// Set the base address to use for the GSYM file.
278
  ///
279
  /// Setting the base address to use for the GSYM file. Object files typically
280
  /// get loaded from a base address when the OS loads them into memory. Using
281
  /// GSYM files for symbolication becomes easier if the base address in the
282
  /// GSYM header is the same address as it allows addresses to be easily slid
283
  /// and allows symbolication without needing to find the original base
284
  /// address in the original object file.
285
  ///
286
  /// \param  Addr The address to use as the base address of the GSYM file
287
  ///              when it is saved to disk.
288
  void setBaseAddress(uint64_t Addr) {
289
    BaseAddress = Addr;
290
  }
291
 
292
  /// Whether the transformation should be quiet, i.e. not output warnings.
293
  bool isQuiet() const { return Quiet; }
294
};
295
 
296
} // namespace gsym
297
} // namespace llvm
298
 
299
#endif // LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H