Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | //===- DWARFDebugLine.h -----------------------------------------*- C++ -*-===// |
| 2 | // |
||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 6 | // |
||
| 7 | //===----------------------------------------------------------------------===// |
||
| 8 | |||
| 9 | #ifndef LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H |
||
| 10 | #define LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H |
||
| 11 | |||
| 12 | #include "llvm/ADT/StringRef.h" |
||
| 13 | #include "llvm/BinaryFormat/Dwarf.h" |
||
| 14 | #include "llvm/DebugInfo/DIContext.h" |
||
| 15 | #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" |
||
| 16 | #include "llvm/DebugInfo/DWARF/DWARFUnit.h" |
||
| 17 | #include "llvm/Support/MD5.h" |
||
| 18 | #include "llvm/Support/Path.h" |
||
| 19 | #include <cstdint> |
||
| 20 | #include <map> |
||
| 21 | #include <string> |
||
| 22 | #include <vector> |
||
| 23 | |||
| 24 | namespace llvm { |
||
| 25 | |||
| 26 | class raw_ostream; |
||
| 27 | |||
| 28 | class DWARFDebugLine { |
||
| 29 | public: |
||
| 30 | struct FileNameEntry { |
||
| 31 | FileNameEntry() = default; |
||
| 32 | |||
| 33 | DWARFFormValue Name; |
||
| 34 | uint64_t DirIdx = 0; |
||
| 35 | uint64_t ModTime = 0; |
||
| 36 | uint64_t Length = 0; |
||
| 37 | MD5::MD5Result Checksum; |
||
| 38 | DWARFFormValue Source; |
||
| 39 | }; |
||
| 40 | |||
| 41 | /// Tracks which optional content types are present in a DWARF file name |
||
| 42 | /// entry format. |
||
| 43 | struct ContentTypeTracker { |
||
| 44 | ContentTypeTracker() = default; |
||
| 45 | |||
| 46 | /// Whether filename entries provide a modification timestamp. |
||
| 47 | bool HasModTime = false; |
||
| 48 | /// Whether filename entries provide a file size. |
||
| 49 | bool HasLength = false; |
||
| 50 | /// For v5, whether filename entries provide an MD5 checksum. |
||
| 51 | bool HasMD5 = false; |
||
| 52 | /// For v5, whether filename entries provide source text. |
||
| 53 | bool HasSource = false; |
||
| 54 | |||
| 55 | /// Update tracked content types with \p ContentType. |
||
| 56 | void trackContentType(dwarf::LineNumberEntryFormat ContentType); |
||
| 57 | }; |
||
| 58 | |||
| 59 | struct Prologue { |
||
| 60 | Prologue(); |
||
| 61 | |||
| 62 | /// The size in bytes of the statement information for this compilation unit |
||
| 63 | /// (not including the total_length field itself). |
||
| 64 | uint64_t TotalLength; |
||
| 65 | /// Version, address size (starting in v5), and DWARF32/64 format; these |
||
| 66 | /// parameters affect interpretation of forms (used in the directory and |
||
| 67 | /// file tables starting with v5). |
||
| 68 | dwarf::FormParams FormParams; |
||
| 69 | /// The number of bytes following the prologue_length field to the beginning |
||
| 70 | /// of the first byte of the statement program itself. |
||
| 71 | uint64_t PrologueLength; |
||
| 72 | /// In v5, size in bytes of a segment selector. |
||
| 73 | uint8_t SegSelectorSize; |
||
| 74 | /// The size in bytes of the smallest target machine instruction. Statement |
||
| 75 | /// program opcodes that alter the address register first multiply their |
||
| 76 | /// operands by this value. |
||
| 77 | uint8_t MinInstLength; |
||
| 78 | /// The maximum number of individual operations that may be encoded in an |
||
| 79 | /// instruction. |
||
| 80 | uint8_t MaxOpsPerInst; |
||
| 81 | /// The initial value of theis_stmtregister. |
||
| 82 | uint8_t DefaultIsStmt; |
||
| 83 | /// This parameter affects the meaning of the special opcodes. See below. |
||
| 84 | int8_t LineBase; |
||
| 85 | /// This parameter affects the meaning of the special opcodes. See below. |
||
| 86 | uint8_t LineRange; |
||
| 87 | /// The number assigned to the first special opcode. |
||
| 88 | uint8_t OpcodeBase; |
||
| 89 | /// This tracks which optional file format content types are present. |
||
| 90 | ContentTypeTracker ContentTypes; |
||
| 91 | std::vector<uint8_t> StandardOpcodeLengths; |
||
| 92 | std::vector<DWARFFormValue> IncludeDirectories; |
||
| 93 | std::vector<FileNameEntry> FileNames; |
||
| 94 | |||
| 95 | const dwarf::FormParams getFormParams() const { return FormParams; } |
||
| 96 | uint16_t getVersion() const { return FormParams.Version; } |
||
| 97 | uint8_t getAddressSize() const { return FormParams.AddrSize; } |
||
| 98 | bool isDWARF64() const { return FormParams.Format == dwarf::DWARF64; } |
||
| 99 | |||
| 100 | uint32_t sizeofTotalLength() const { return isDWARF64() ? 12 : 4; } |
||
| 101 | |||
| 102 | uint32_t sizeofPrologueLength() const { return isDWARF64() ? 8 : 4; } |
||
| 103 | |||
| 104 | bool totalLengthIsValid() const; |
||
| 105 | |||
| 106 | /// Length of the prologue in bytes. |
||
| 107 | uint64_t getLength() const; |
||
| 108 | |||
| 109 | /// Get DWARF-version aware access to the file name entry at the provided |
||
| 110 | /// index. |
||
| 111 | const llvm::DWARFDebugLine::FileNameEntry & |
||
| 112 | getFileNameEntry(uint64_t Index) const; |
||
| 113 | |||
| 114 | bool hasFileAtIndex(uint64_t FileIndex) const; |
||
| 115 | |||
| 116 | std::optional<uint64_t> getLastValidFileIndex() const; |
||
| 117 | |||
| 118 | bool |
||
| 119 | getFileNameByIndex(uint64_t FileIndex, StringRef CompDir, |
||
| 120 | DILineInfoSpecifier::FileLineInfoKind Kind, |
||
| 121 | std::string &Result, |
||
| 122 | sys::path::Style Style = sys::path::Style::native) const; |
||
| 123 | |||
| 124 | void clear(); |
||
| 125 | void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const; |
||
| 126 | Error parse(DWARFDataExtractor Data, uint64_t *OffsetPtr, |
||
| 127 | function_ref<void(Error)> RecoverableErrorHandler, |
||
| 128 | const DWARFContext &Ctx, const DWARFUnit *U = nullptr); |
||
| 129 | }; |
||
| 130 | |||
| 131 | /// Standard .debug_line state machine structure. |
||
| 132 | struct Row { |
||
| 133 | explicit Row(bool DefaultIsStmt = false); |
||
| 134 | |||
| 135 | /// Called after a row is appended to the matrix. |
||
| 136 | void postAppend(); |
||
| 137 | void reset(bool DefaultIsStmt); |
||
| 138 | void dump(raw_ostream &OS) const; |
||
| 139 | |||
| 140 | static void dumpTableHeader(raw_ostream &OS, unsigned Indent); |
||
| 141 | |||
| 142 | static bool orderByAddress(const Row &LHS, const Row &RHS) { |
||
| 143 | return std::tie(LHS.Address.SectionIndex, LHS.Address.Address) < |
||
| 144 | std::tie(RHS.Address.SectionIndex, RHS.Address.Address); |
||
| 145 | } |
||
| 146 | |||
| 147 | /// The program-counter value corresponding to a machine instruction |
||
| 148 | /// generated by the compiler and section index pointing to the section |
||
| 149 | /// containg this PC. If relocation information is present then section |
||
| 150 | /// index is the index of the section which contains above address. |
||
| 151 | /// Otherwise this is object::SectionedAddress::Undef value. |
||
| 152 | object::SectionedAddress Address; |
||
| 153 | /// An unsigned integer indicating a source line number. Lines are numbered |
||
| 154 | /// beginning at 1. The compiler may emit the value 0 in cases where an |
||
| 155 | /// instruction cannot be attributed to any source line. |
||
| 156 | uint32_t Line; |
||
| 157 | /// An unsigned integer indicating a column number within a source line. |
||
| 158 | /// Columns are numbered beginning at 1. The value 0 is reserved to indicate |
||
| 159 | /// that a statement begins at the 'left edge' of the line. |
||
| 160 | uint16_t Column; |
||
| 161 | /// An unsigned integer indicating the identity of the source file |
||
| 162 | /// corresponding to a machine instruction. |
||
| 163 | uint16_t File; |
||
| 164 | /// An unsigned integer representing the DWARF path discriminator value |
||
| 165 | /// for this location. |
||
| 166 | uint32_t Discriminator; |
||
| 167 | /// An unsigned integer whose value encodes the applicable instruction set |
||
| 168 | /// architecture for the current instruction. |
||
| 169 | uint8_t Isa; |
||
| 170 | /// A boolean indicating that the current instruction is the beginning of a |
||
| 171 | /// statement. |
||
| 172 | uint8_t IsStmt : 1, |
||
| 173 | /// A boolean indicating that the current instruction is the |
||
| 174 | /// beginning of a basic block. |
||
| 175 | BasicBlock : 1, |
||
| 176 | /// A boolean indicating that the current address is that of the |
||
| 177 | /// first byte after the end of a sequence of target machine |
||
| 178 | /// instructions. |
||
| 179 | EndSequence : 1, |
||
| 180 | /// A boolean indicating that the current address is one (of possibly |
||
| 181 | /// many) where execution should be suspended for an entry breakpoint |
||
| 182 | /// of a function. |
||
| 183 | PrologueEnd : 1, |
||
| 184 | /// A boolean indicating that the current address is one (of possibly |
||
| 185 | /// many) where execution should be suspended for an exit breakpoint |
||
| 186 | /// of a function. |
||
| 187 | EpilogueBegin : 1; |
||
| 188 | }; |
||
| 189 | |||
| 190 | /// Represents a series of contiguous machine instructions. Line table for |
||
| 191 | /// each compilation unit may consist of multiple sequences, which are not |
||
| 192 | /// guaranteed to be in the order of ascending instruction address. |
||
| 193 | struct Sequence { |
||
| 194 | Sequence(); |
||
| 195 | |||
| 196 | /// Sequence describes instructions at address range [LowPC, HighPC) |
||
| 197 | /// and is described by line table rows [FirstRowIndex, LastRowIndex). |
||
| 198 | uint64_t LowPC; |
||
| 199 | uint64_t HighPC; |
||
| 200 | /// If relocation information is present then this is the index of the |
||
| 201 | /// section which contains above addresses. Otherwise this is |
||
| 202 | /// object::SectionedAddress::Undef value. |
||
| 203 | uint64_t SectionIndex; |
||
| 204 | unsigned FirstRowIndex; |
||
| 205 | unsigned LastRowIndex; |
||
| 206 | bool Empty; |
||
| 207 | |||
| 208 | void reset(); |
||
| 209 | |||
| 210 | static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) { |
||
| 211 | return std::tie(LHS.SectionIndex, LHS.HighPC) < |
||
| 212 | std::tie(RHS.SectionIndex, RHS.HighPC); |
||
| 213 | } |
||
| 214 | |||
| 215 | bool isValid() const { |
||
| 216 | return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex); |
||
| 217 | } |
||
| 218 | |||
| 219 | bool containsPC(object::SectionedAddress PC) const { |
||
| 220 | return SectionIndex == PC.SectionIndex && |
||
| 221 | (LowPC <= PC.Address && PC.Address < HighPC); |
||
| 222 | } |
||
| 223 | }; |
||
| 224 | |||
| 225 | struct LineTable { |
||
| 226 | LineTable(); |
||
| 227 | |||
| 228 | /// Represents an invalid row |
||
| 229 | const uint32_t UnknownRowIndex = UINT32_MAX; |
||
| 230 | |||
| 231 | void appendRow(const DWARFDebugLine::Row &R) { Rows.push_back(R); } |
||
| 232 | |||
| 233 | void appendSequence(const DWARFDebugLine::Sequence &S) { |
||
| 234 | Sequences.push_back(S); |
||
| 235 | } |
||
| 236 | |||
| 237 | /// Returns the index of the row with file/line info for a given address, |
||
| 238 | /// or UnknownRowIndex if there is no such row. |
||
| 239 | uint32_t lookupAddress(object::SectionedAddress Address) const; |
||
| 240 | |||
| 241 | bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size, |
||
| 242 | std::vector<uint32_t> &Result) const; |
||
| 243 | |||
| 244 | bool hasFileAtIndex(uint64_t FileIndex) const { |
||
| 245 | return Prologue.hasFileAtIndex(FileIndex); |
||
| 246 | } |
||
| 247 | |||
| 248 | std::optional<uint64_t> getLastValidFileIndex() const { |
||
| 249 | return Prologue.getLastValidFileIndex(); |
||
| 250 | } |
||
| 251 | |||
| 252 | /// Extracts filename by its index in filename table in prologue. |
||
| 253 | /// In Dwarf 4, the files are 1-indexed and the current compilation file |
||
| 254 | /// name is not represented in the list. In DWARF v5, the files are |
||
| 255 | /// 0-indexed and the primary source file has the index 0. |
||
| 256 | /// Returns true on success. |
||
| 257 | bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir, |
||
| 258 | DILineInfoSpecifier::FileLineInfoKind Kind, |
||
| 259 | std::string &Result) const { |
||
| 260 | return Prologue.getFileNameByIndex(FileIndex, CompDir, Kind, Result); |
||
| 261 | } |
||
| 262 | |||
| 263 | /// Fills the Result argument with the file and line information |
||
| 264 | /// corresponding to Address. Returns true on success. |
||
| 265 | bool getFileLineInfoForAddress(object::SectionedAddress Address, |
||
| 266 | const char *CompDir, |
||
| 267 | DILineInfoSpecifier::FileLineInfoKind Kind, |
||
| 268 | DILineInfo &Result) const; |
||
| 269 | |||
| 270 | /// Extracts directory name by its Entry in include directories table |
||
| 271 | /// in prologue. Returns true on success. |
||
| 272 | bool getDirectoryForEntry(const FileNameEntry &Entry, |
||
| 273 | std::string &Directory) const; |
||
| 274 | |||
| 275 | void dump(raw_ostream &OS, DIDumpOptions DumpOptions) const; |
||
| 276 | void clear(); |
||
| 277 | |||
| 278 | /// Parse prologue and all rows. |
||
| 279 | Error parse(DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr, |
||
| 280 | const DWARFContext &Ctx, const DWARFUnit *U, |
||
| 281 | function_ref<void(Error)> RecoverableErrorHandler, |
||
| 282 | raw_ostream *OS = nullptr, bool Verbose = false); |
||
| 283 | |||
| 284 | using RowVector = std::vector<Row>; |
||
| 285 | using RowIter = RowVector::const_iterator; |
||
| 286 | using SequenceVector = std::vector<Sequence>; |
||
| 287 | using SequenceIter = SequenceVector::const_iterator; |
||
| 288 | |||
| 289 | struct Prologue Prologue; |
||
| 290 | RowVector Rows; |
||
| 291 | SequenceVector Sequences; |
||
| 292 | |||
| 293 | private: |
||
| 294 | uint32_t findRowInSeq(const DWARFDebugLine::Sequence &Seq, |
||
| 295 | object::SectionedAddress Address) const; |
||
| 296 | std::optional<StringRef> |
||
| 297 | getSourceByIndex(uint64_t FileIndex, |
||
| 298 | DILineInfoSpecifier::FileLineInfoKind Kind) const; |
||
| 299 | |||
| 300 | uint32_t lookupAddressImpl(object::SectionedAddress Address) const; |
||
| 301 | |||
| 302 | bool lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size, |
||
| 303 | std::vector<uint32_t> &Result) const; |
||
| 304 | }; |
||
| 305 | |||
| 306 | const LineTable *getLineTable(uint64_t Offset) const; |
||
| 307 | Expected<const LineTable *> |
||
| 308 | getOrParseLineTable(DWARFDataExtractor &DebugLineData, uint64_t Offset, |
||
| 309 | const DWARFContext &Ctx, const DWARFUnit *U, |
||
| 310 | function_ref<void(Error)> RecoverableErrorHandler); |
||
| 311 | void clearLineTable(uint64_t Offset); |
||
| 312 | |||
| 313 | /// Helper to allow for parsing of an entire .debug_line section in sequence. |
||
| 314 | class SectionParser { |
||
| 315 | public: |
||
| 316 | using LineToUnitMap = std::map<uint64_t, DWARFUnit *>; |
||
| 317 | |||
| 318 | SectionParser(DWARFDataExtractor &Data, const DWARFContext &C, |
||
| 319 | DWARFUnitVector::iterator_range Units); |
||
| 320 | |||
| 321 | /// Get the next line table from the section. Report any issues via the |
||
| 322 | /// handlers. |
||
| 323 | /// |
||
| 324 | /// \param RecoverableErrorHandler - any issues that don't prevent further |
||
| 325 | /// parsing of the table will be reported through this handler. |
||
| 326 | /// \param UnrecoverableErrorHandler - any issues that prevent further |
||
| 327 | /// parsing of the table will be reported through this handler. |
||
| 328 | /// \param OS - if not null, the parser will print information about the |
||
| 329 | /// table as it parses it. |
||
| 330 | /// \param Verbose - if true, the parser will print verbose information when |
||
| 331 | /// printing to the output. |
||
| 332 | LineTable parseNext(function_ref<void(Error)> RecoverableErrorHandler, |
||
| 333 | function_ref<void(Error)> UnrecoverableErrorHandler, |
||
| 334 | raw_ostream *OS = nullptr, bool Verbose = false); |
||
| 335 | |||
| 336 | /// Skip the current line table and go to the following line table (if |
||
| 337 | /// present) immediately. |
||
| 338 | /// |
||
| 339 | /// \param RecoverableErrorHandler - report any recoverable prologue |
||
| 340 | /// parsing issues via this handler. |
||
| 341 | /// \param UnrecoverableErrorHandler - report any unrecoverable prologue |
||
| 342 | /// parsing issues via this handler. |
||
| 343 | void skip(function_ref<void(Error)> RecoverableErrorHandler, |
||
| 344 | function_ref<void(Error)> UnrecoverableErrorHandler); |
||
| 345 | |||
| 346 | /// Indicates if the parser has parsed as much as possible. |
||
| 347 | /// |
||
| 348 | /// \note Certain problems with the line table structure might mean that |
||
| 349 | /// parsing stops before the end of the section is reached. |
||
| 350 | bool done() const { return Done; } |
||
| 351 | |||
| 352 | /// Get the offset the parser has reached. |
||
| 353 | uint64_t getOffset() const { return Offset; } |
||
| 354 | |||
| 355 | private: |
||
| 356 | DWARFUnit *prepareToParse(uint64_t Offset); |
||
| 357 | void moveToNextTable(uint64_t OldOffset, const Prologue &P); |
||
| 358 | |||
| 359 | LineToUnitMap LineToUnit; |
||
| 360 | |||
| 361 | DWARFDataExtractor &DebugLineData; |
||
| 362 | const DWARFContext &Context; |
||
| 363 | uint64_t Offset = 0; |
||
| 364 | bool Done = false; |
||
| 365 | }; |
||
| 366 | |||
| 367 | private: |
||
| 368 | struct ParsingState { |
||
| 369 | ParsingState(struct LineTable *LT, uint64_t TableOffset, |
||
| 370 | function_ref<void(Error)> ErrorHandler); |
||
| 371 | |||
| 372 | void resetRowAndSequence(); |
||
| 373 | void appendRowToMatrix(); |
||
| 374 | |||
| 375 | /// Advance the address by the \p OperationAdvance value. \returns the |
||
| 376 | /// amount advanced by. |
||
| 377 | uint64_t advanceAddr(uint64_t OperationAdvance, uint8_t Opcode, |
||
| 378 | uint64_t OpcodeOffset); |
||
| 379 | |||
| 380 | struct AddrAndAdjustedOpcode { |
||
| 381 | uint64_t AddrDelta; |
||
| 382 | uint8_t AdjustedOpcode; |
||
| 383 | }; |
||
| 384 | |||
| 385 | /// Advance the address as required by the specified \p Opcode. |
||
| 386 | /// \returns the amount advanced by and the calculated adjusted opcode. |
||
| 387 | AddrAndAdjustedOpcode advanceAddrForOpcode(uint8_t Opcode, |
||
| 388 | uint64_t OpcodeOffset); |
||
| 389 | |||
| 390 | struct AddrAndLineDelta { |
||
| 391 | uint64_t Address; |
||
| 392 | int32_t Line; |
||
| 393 | }; |
||
| 394 | |||
| 395 | /// Advance the line and address as required by the specified special \p |
||
| 396 | /// Opcode. \returns the address and line delta. |
||
| 397 | AddrAndLineDelta handleSpecialOpcode(uint8_t Opcode, uint64_t OpcodeOffset); |
||
| 398 | |||
| 399 | /// Line table we're currently parsing. |
||
| 400 | struct LineTable *LineTable; |
||
| 401 | struct Row Row; |
||
| 402 | struct Sequence Sequence; |
||
| 403 | |||
| 404 | private: |
||
| 405 | uint64_t LineTableOffset; |
||
| 406 | |||
| 407 | bool ReportAdvanceAddrProblem = true; |
||
| 408 | bool ReportBadLineRange = true; |
||
| 409 | function_ref<void(Error)> ErrorHandler; |
||
| 410 | }; |
||
| 411 | |||
| 412 | using LineTableMapTy = std::map<uint64_t, LineTable>; |
||
| 413 | using LineTableIter = LineTableMapTy::iterator; |
||
| 414 | using LineTableConstIter = LineTableMapTy::const_iterator; |
||
| 415 | |||
| 416 | LineTableMapTy LineTableMap; |
||
| 417 | }; |
||
| 418 | |||
| 419 | } // end namespace llvm |
||
| 420 | |||
| 421 | #endif // LLVM_DEBUGINFO_DWARF_DWARFDEBUGLINE_H |