Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. //===- GsymCreator.h --------------------------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8.  
  9. #ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H
  10. #define LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H
  11.  
  12. #include <functional>
  13. #include <memory>
  14. #include <mutex>
  15. #include <thread>
  16.  
  17. #include "llvm/ADT/AddressRanges.h"
  18. #include "llvm/ADT/ArrayRef.h"
  19. #include "llvm/ADT/StringSet.h"
  20. #include "llvm/DebugInfo/GSYM/FileEntry.h"
  21. #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
  22. #include "llvm/MC/StringTableBuilder.h"
  23. #include "llvm/Support/Endian.h"
  24. #include "llvm/Support/Error.h"
  25. #include "llvm/Support/Path.h"
  26.  
  27. namespace llvm {
  28.  
  29. namespace gsym {
  30. class FileWriter;
  31.  
  32. /// GsymCreator is used to emit GSYM data to a stand alone file or section
  33. /// within a file.
  34. ///
  35. /// The GsymCreator is designed to be used in 3 stages:
  36. /// - Create FunctionInfo objects and add them
  37. /// - Finalize the GsymCreator object
  38. /// - Save to file or section
  39. ///
  40. /// The first stage involves creating FunctionInfo objects from another source
  41. /// of information like compiler debug info metadata, DWARF or Breakpad files.
  42. /// Any strings in the FunctionInfo or contained information, like InlineInfo
  43. /// or LineTable objects, should get the string table offsets by calling
  44. /// GsymCreator::insertString(...). Any file indexes that are needed should be
  45. /// obtained by calling GsymCreator::insertFile(...). All of the function calls
  46. /// in GsymCreator are thread safe. This allows multiple threads to create and
  47. /// add FunctionInfo objects while parsing debug information.
  48. ///
  49. /// Once all of the FunctionInfo objects have been added, the
  50. /// GsymCreator::finalize(...) must be called prior to saving. This function
  51. /// will sort the FunctionInfo objects, finalize the string table, and do any
  52. /// other passes on the information needed to prepare the information to be
  53. /// saved.
  54. ///
  55. /// Once the object has been finalized, it can be saved to a file or section.
  56. ///
  57. /// ENCODING
  58. ///
  59. /// GSYM files are designed to be memory mapped into a process as shared, read
  60. /// only data, and used as is.
  61. ///
  62. /// The GSYM file format when in a stand alone file consists of:
  63. ///   - Header
  64. ///   - Address Table
  65. ///   - Function Info Offsets
  66. ///   - File Table
  67. ///   - String Table
  68. ///   - Function Info Data
  69. ///
  70. /// HEADER
  71. ///
  72. /// The header is fully described in "llvm/DebugInfo/GSYM/Header.h".
  73. ///
  74. /// ADDRESS TABLE
  75. ///
  76. /// The address table immediately follows the header in the file and consists
  77. /// of Header.NumAddresses address offsets. These offsets are sorted and can be
  78. /// binary searched for efficient lookups. Addresses in the address table are
  79. /// stored as offsets from a 64 bit base address found in Header.BaseAddress.
  80. /// This allows the address table to contain 8, 16, or 32 offsets. This allows
  81. /// the address table to not require full 64 bit addresses for each address.
  82. /// The resulting GSYM size is smaller and causes fewer pages to be touched
  83. /// during address lookups when the address table is smaller. The size of the
  84. /// address offsets in the address table is specified in the header in
  85. /// Header.AddrOffSize. The first offset in the address table is aligned to
  86. /// Header.AddrOffSize alignment to ensure efficient access when loaded into
  87. /// memory.
  88. ///
  89. /// FUNCTION INFO OFFSETS TABLE
  90. ///
  91. /// The function info offsets table immediately follows the address table and
  92. /// consists of Header.NumAddresses 32 bit file offsets: one for each address
  93. /// in the address table. This data is aligned to a 4 byte boundary. The
  94. /// offsets in this table are the relative offsets from the start offset of the
  95. /// GSYM header and point to the function info data for each address in the
  96. /// address table. Keeping this data separate from the address table helps to
  97. /// reduce the number of pages that are touched when address lookups occur on a
  98. /// GSYM file.
  99. ///
  100. /// FILE TABLE
  101. ///
  102. /// The file table immediately follows the function info offsets table. The
  103. /// encoding of the FileTable is:
  104. ///
  105. /// struct FileTable {
  106. ///   uint32_t Count;
  107. ///   FileEntry Files[];
  108. /// };
  109. ///
  110. /// The file table starts with a 32 bit count of the number of files that are
  111. /// used in all of the function info, followed by that number of FileEntry
  112. /// structures. The file table is aligned to a 4 byte boundary, Each file in
  113. /// the file table is represented with a FileEntry structure.
  114. /// See "llvm/DebugInfo/GSYM/FileEntry.h" for details.
  115. ///
  116. /// STRING TABLE
  117. ///
  118. /// The string table follows the file table in stand alone GSYM files and
  119. /// contains all strings for everything contained in the GSYM file. Any string
  120. /// data should be added to the string table and any references to strings
  121. /// inside GSYM information must be stored as 32 bit string table offsets into
  122. /// this string table. The string table always starts with an empty string at
  123. /// offset zero and is followed by any strings needed by the GSYM information.
  124. /// The start of the string table is not aligned to any boundary.
  125. ///
  126. /// FUNCTION INFO DATA
  127. ///
  128. /// The function info data is the payload that contains information about the
  129. /// address that is being looked up. It contains all of the encoded
  130. /// FunctionInfo objects. Each encoded FunctionInfo's data is pointed to by an
  131. /// entry in the Function Info Offsets Table. For details on the exact encoding
  132. /// of FunctionInfo objects, see "llvm/DebugInfo/GSYM/FunctionInfo.h".
  133. class GsymCreator {
  134.   // Private member variables require Mutex protections
  135.   mutable std::mutex Mutex;
  136.   std::vector<FunctionInfo> Funcs;
  137.   StringTableBuilder StrTab;
  138.   StringSet<> StringStorage;
  139.   DenseMap<llvm::gsym::FileEntry, uint32_t> FileEntryToIndex;
  140.   std::vector<llvm::gsym::FileEntry> Files;
  141.   std::vector<uint8_t> UUID;
  142.   std::optional<AddressRanges> ValidTextRanges;
  143.   AddressRanges Ranges;
  144.   std::optional<uint64_t> BaseAddress;
  145.   bool Finalized = false;
  146.   bool Quiet;
  147.  
  148. public:
  149.   GsymCreator(bool Quiet = false);
  150.  
  151.   /// Save a GSYM file to a stand alone file.
  152.   ///
  153.   /// \param Path The file path to save the GSYM file to.
  154.   /// \param ByteOrder The endianness to use when saving the file.
  155.   /// \returns An error object that indicates success or failure of the save.
  156.   llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder) const;
  157.  
  158.   /// Encode a GSYM into the file writer stream at the current position.
  159.   ///
  160.   /// \param O The stream to save the binary data to
  161.   /// \returns An error object that indicates success or failure of the save.
  162.   llvm::Error encode(FileWriter &O) const;
  163.  
  164.   /// Insert a string into the GSYM string table.
  165.   ///
  166.   /// All strings used by GSYM files must be uniqued by adding them to this
  167.   /// string pool and using the returned offset for any string values.
  168.   ///
  169.   /// \param S The string to insert into the string table.
  170.   /// \param Copy If true, then make a backing copy of the string. If false,
  171.   ///             the string is owned by another object that will stay around
  172.   ///             long enough for the GsymCreator to save the GSYM file.
  173.   /// \returns The unique 32 bit offset into the string table.
  174.   uint32_t insertString(StringRef S, bool Copy = true);
  175.  
  176.   /// Insert a file into this GSYM creator.
  177.   ///
  178.   /// Inserts a file by adding a FileEntry into the "Files" member variable if
  179.   /// the file has not already been added. The file path is split into
  180.   /// directory and filename which are both added to the string table. This
  181.   /// allows paths to be stored efficiently by reusing the directories that are
  182.   /// common between multiple files.
  183.   ///
  184.   /// \param   Path The path to the file to insert.
  185.   /// \param   Style The path style for the "Path" parameter.
  186.   /// \returns The unique file index for the inserted file.
  187.   uint32_t insertFile(StringRef Path,
  188.                       sys::path::Style Style = sys::path::Style::native);
  189.  
  190.   /// Add a function info to this GSYM creator.
  191.   ///
  192.   /// All information in the FunctionInfo object must use the
  193.   /// GsymCreator::insertString(...) function when creating string table
  194.   /// offsets for names and other strings.
  195.   ///
  196.   /// \param   FI The function info object to emplace into our functions list.
  197.   void addFunctionInfo(FunctionInfo &&FI);
  198.  
  199.   /// Finalize the data in the GSYM creator prior to saving the data out.
  200.   ///
  201.   /// Finalize must be called after all FunctionInfo objects have been added
  202.   /// and before GsymCreator::save() is called.
  203.   ///
  204.   /// \param  OS Output stream to report duplicate function infos, overlapping
  205.   ///         function infos, and function infos that were merged or removed.
  206.   /// \returns An error object that indicates success or failure of the
  207.   ///          finalize.
  208.   llvm::Error finalize(llvm::raw_ostream &OS);
  209.  
  210.   /// Set the UUID value.
  211.   ///
  212.   /// \param UUIDBytes The new UUID bytes.
  213.   void setUUID(llvm::ArrayRef<uint8_t> UUIDBytes) {
  214.     UUID.assign(UUIDBytes.begin(), UUIDBytes.end());
  215.   }
  216.  
  217.   /// Thread safe iteration over all function infos.
  218.   ///
  219.   /// \param  Callback A callback function that will get called with each
  220.   ///         FunctionInfo. If the callback returns false, stop iterating.
  221.   void forEachFunctionInfo(
  222.       std::function<bool(FunctionInfo &)> const &Callback);
  223.  
  224.   /// Thread safe const iteration over all function infos.
  225.   ///
  226.   /// \param  Callback A callback function that will get called with each
  227.   ///         FunctionInfo. If the callback returns false, stop iterating.
  228.   void forEachFunctionInfo(
  229.       std::function<bool(const FunctionInfo &)> const &Callback) const;
  230.  
  231.   /// Get the current number of FunctionInfo objects contained in this
  232.   /// object.
  233.   size_t getNumFunctionInfos() const;
  234.  
  235.   /// Check if an address has already been added as a function info.
  236.   ///
  237.   /// FunctionInfo data can come from many sources: debug info, symbol tables,
  238.   /// exception information, and more. Symbol tables should be added after
  239.   /// debug info and can use this function to see if a symbol's start address
  240.   /// has already been added to the GsymReader. Calling this before adding
  241.   /// a function info from a source other than debug info avoids clients adding
  242.   /// many redundant FunctionInfo objects from many sources only for them to be
  243.   /// removed during the finalize() call.
  244.   bool hasFunctionInfoForAddress(uint64_t Addr) const;
  245.  
  246.   /// Set valid .text address ranges that all functions must be contained in.
  247.   void SetValidTextRanges(AddressRanges &TextRanges) {
  248.     ValidTextRanges = TextRanges;
  249.   }
  250.  
  251.   /// Get the valid text ranges.
  252.   const std::optional<AddressRanges> GetValidTextRanges() const {
  253.     return ValidTextRanges;
  254.   }
  255.  
  256.   /// Check if an address is a valid code address.
  257.   ///
  258.   /// Any functions whose addresses do not exist within these function bounds
  259.   /// will not be converted into the final GSYM. This allows the object file
  260.   /// to figure out the valid file address ranges of all the code sections
  261.   /// and ensure we don't add invalid functions to the final output. Many
  262.   /// linkers have issues when dead stripping functions from DWARF debug info
  263.   /// where they set the DW_AT_low_pc to zero, but newer DWARF has the
  264.   /// DW_AT_high_pc as an offset from the DW_AT_low_pc and these size
  265.   /// attributes have no relocations that can be applied. This results in DWARF
  266.   /// where many functions have an DW_AT_low_pc of zero and a valid offset size
  267.   /// for DW_AT_high_pc. If we extract all valid ranges from an object file
  268.   /// that are marked with executable permissions, we can properly ensure that
  269.   /// these functions are removed.
  270.   ///
  271.   /// \param Addr An address to check.
  272.   ///
  273.   /// \returns True if the address is in the valid text ranges or if no valid
  274.   ///          text ranges have been set, false otherwise.
  275.   bool IsValidTextAddress(uint64_t Addr) const;
  276.  
  277.   /// Set the base address to use for the GSYM file.
  278.   ///
  279.   /// Setting the base address to use for the GSYM file. Object files typically
  280.   /// get loaded from a base address when the OS loads them into memory. Using
  281.   /// GSYM files for symbolication becomes easier if the base address in the
  282.   /// GSYM header is the same address as it allows addresses to be easily slid
  283.   /// and allows symbolication without needing to find the original base
  284.   /// address in the original object file.
  285.   ///
  286.   /// \param  Addr The address to use as the base address of the GSYM file
  287.   ///              when it is saved to disk.
  288.   void setBaseAddress(uint64_t Addr) {
  289.     BaseAddress = Addr;
  290.   }
  291.  
  292.   /// Whether the transformation should be quiet, i.e. not output warnings.
  293.   bool isQuiet() const { return Quiet; }
  294. };
  295.  
  296. } // namespace gsym
  297. } // namespace llvm
  298.  
  299. #endif // LLVM_DEBUGINFO_GSYM_GSYMCREATOR_H
  300.