Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
//===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
 
9
#ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
10
#define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
11
 
12
#include "llvm/ADT/StringRef.h"
13
#include "llvm/BinaryFormat/XCOFF.h"
14
#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
15
#include <cstdint>
16
#include <memory>
17
#include <vector>
18
 
19
namespace llvm {
20
 
21
struct XCOFFSymbolInfoTy {
22
  std::optional<XCOFF::StorageMappingClass> StorageMappingClass;
23
  std::optional<uint32_t> Index;
24
  bool IsLabel = false;
25
  bool operator<(const XCOFFSymbolInfoTy &SymInfo) const;
26
};
27
 
28
struct SymbolInfoTy {
29
  uint64_t Addr;
30
  StringRef Name;
31
  // XCOFF uses XCOFFSymInfo. Other targets use Type.
32
  XCOFFSymbolInfoTy XCOFFSymInfo;
33
  uint8_t Type;
34
 
35
private:
36
  bool IsXCOFF;
37
  bool HasType;
38
 
39
public:
40
  SymbolInfoTy(uint64_t Addr, StringRef Name,
41
               std::optional<XCOFF::StorageMappingClass> Smc,
42
               std::optional<uint32_t> Idx, bool Label)
43
      : Addr(Addr), Name(Name), XCOFFSymInfo{Smc, Idx, Label}, Type(0),
44
        IsXCOFF(true), HasType(false) {}
45
  SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type,
46
               bool IsXCOFF = false)
47
      : Addr(Addr), Name(Name), Type(Type), IsXCOFF(IsXCOFF), HasType(true) {}
48
  bool isXCOFF() const { return IsXCOFF; }
49
 
50
private:
51
  friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) {
52
    assert((P1.IsXCOFF == P2.IsXCOFF && P1.HasType == P2.HasType) &&
53
           "The value of IsXCOFF and HasType in P1 and P2 should be the same "
54
           "respectively.");
55
 
56
    if (P1.IsXCOFF && P1.HasType)
57
      return std::tie(P1.Addr, P1.Type, P1.Name) <
58
             std::tie(P2.Addr, P2.Type, P2.Name);
59
 
60
    if (P1.IsXCOFF)
61
      return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) <
62
             std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name);
63
 
64
    return std::tie(P1.Addr, P1.Name, P1.Type) <
65
           std::tie(P2.Addr, P2.Name, P2.Type);
66
  }
67
};
68
 
69
using SectionSymbolsTy = std::vector<SymbolInfoTy>;
70
 
71
template <typename T> class ArrayRef;
72
class MCContext;
73
class MCInst;
74
class MCSubtargetInfo;
75
class raw_ostream;
76
 
77
/// Superclass for all disassemblers. Consumes a memory region and provides an
78
/// array of assembly instructions.
79
class MCDisassembler {
80
public:
81
  /// Ternary decode status. Most backends will just use Fail and
82
  /// Success, however some have a concept of an instruction with
83
  /// understandable semantics but which is architecturally
84
  /// incorrect. An example of this is ARM UNPREDICTABLE instructions
85
  /// which are disassemblable but cause undefined behaviour.
86
  ///
87
  /// Because it makes sense to disassemble these instructions, there
88
  /// is a "soft fail" failure mode that indicates the MCInst& is
89
  /// valid but architecturally incorrect.
90
  ///
91
  /// The enum numbers are deliberately chosen such that reduction
92
  /// from Success->SoftFail ->Fail can be done with a simple
93
  /// bitwise-AND:
94
  ///
95
  ///   LEFT & TOP =  | Success       Unpredictable   Fail
96
  ///   --------------+-----------------------------------
97
  ///   Success       | Success       Unpredictable   Fail
98
  ///   Unpredictable | Unpredictable Unpredictable   Fail
99
  ///   Fail          | Fail          Fail            Fail
100
  ///
101
  /// An easy way of encoding this is as 0b11, 0b01, 0b00 for
102
  /// Success, SoftFail, Fail respectively.
103
  enum DecodeStatus {
104
    Fail = 0,
105
    SoftFail = 1,
106
    Success = 3
107
  };
108
 
109
  MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
110
    : Ctx(Ctx), STI(STI) {}
111
 
112
  virtual ~MCDisassembler();
113
 
114
  /// Returns the disassembly of a single instruction.
115
  ///
116
  /// \param Instr    - An MCInst to populate with the contents of the
117
  ///                   instruction.
118
  /// \param Size     - A value to populate with the size of the instruction, or
119
  ///                   the number of bytes consumed while attempting to decode
120
  ///                   an invalid instruction.
121
  /// \param Address  - The address, in the memory space of region, of the first
122
  ///                   byte of the instruction.
123
  /// \param Bytes    - A reference to the actual bytes of the instruction.
124
  /// \param CStream  - The stream to print comments and annotations on.
125
  /// \return         - MCDisassembler::Success if the instruction is valid,
126
  ///                   MCDisassembler::SoftFail if the instruction was
127
  ///                                            disassemblable but invalid,
128
  ///                   MCDisassembler::Fail if the instruction was invalid.
129
  virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
130
                                      ArrayRef<uint8_t> Bytes, uint64_t Address,
131
                                      raw_ostream &CStream) const = 0;
132
 
133
  /// Used to perform separate target specific disassembly for a particular
134
  /// symbol. May parse any prelude that precedes instructions after the
135
  /// start of a symbol, or the entire symbol.
136
  /// This is used for example by WebAssembly to decode preludes.
137
  ///
138
  /// Base implementation returns std::nullopt. So all targets by default ignore
139
  /// to treat symbols separately.
140
  ///
141
  /// \param Symbol   - The symbol.
142
  /// \param Size     - The number of bytes consumed.
143
  /// \param Address  - The address, in the memory space of region, of the first
144
  ///                   byte of the symbol.
145
  /// \param Bytes    - A reference to the actual bytes at the symbol location.
146
  /// \param CStream  - The stream to print comments and annotations on.
147
  /// \return         - MCDisassembler::Success if bytes are decoded
148
  ///                   successfully. Size must hold the number of bytes that
149
  ///                   were decoded.
150
  ///                 - MCDisassembler::Fail if the bytes are invalid. Size
151
  ///                   must hold the number of bytes that were decoded before
152
  ///                   failing. The target must print nothing. This can be
153
  ///                   done by buffering the output if needed.
154
  ///                 - std::nullopt if the target doesn't want to handle the
155
  ///                   symbol separately. Value of Size is ignored in this
156
  ///                   case.
157
  virtual std::optional<DecodeStatus>
158
  onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
159
                uint64_t Address, raw_ostream &CStream) const;
160
  // TODO:
161
  // Implement similar hooks that can be used at other points during
162
  // disassembly. Something along the following lines:
163
  // - onBeforeInstructionDecode()
164
  // - onAfterInstructionDecode()
165
  // - onSymbolEnd()
166
  // It should help move much of the target specific code from llvm-objdump to
167
  // respective target disassemblers.
168
 
169
  /// Suggest a distance to skip in a buffer of data to find the next
170
  /// place to look for the start of an instruction. For example, if
171
  /// all instructions have a fixed alignment, this might advance to
172
  /// the next multiple of that alignment.
173
  ///
174
  /// If not overridden, the default is 1.
175
  ///
176
  /// \param Address  - The address, in the memory space of region, of the
177
  ///                   starting point (typically the first byte of something
178
  ///                   that did not decode as a valid instruction at all).
179
  /// \param Bytes    - A reference to the actual bytes at Address. May be
180
  ///                   needed in order to determine the width of an
181
  ///                   unrecognized instruction (e.g. in Thumb this is a simple
182
  ///                   consistent criterion that doesn't require knowing the
183
  ///                   specific instruction). The caller can pass as much data
184
  ///                   as they have available, and the function is required to
185
  ///                   make a reasonable default choice if not enough data is
186
  ///                   available to make a better one.
187
  /// \return         - A number of bytes to skip. Must always be greater than
188
  ///                   zero. May be greater than the size of Bytes.
189
  virtual uint64_t suggestBytesToSkip(ArrayRef<uint8_t> Bytes,
190
                                      uint64_t Address) const;
191
 
192
private:
193
  MCContext &Ctx;
194
 
195
protected:
196
  // Subtarget information, for instruction decoding predicates if required.
197
  const MCSubtargetInfo &STI;
198
  std::unique_ptr<MCSymbolizer> Symbolizer;
199
 
200
public:
201
  // Helpers around MCSymbolizer
202
  bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address,
203
                                bool IsBranch, uint64_t Offset, uint64_t OpSize,
204
                                uint64_t InstSize) const;
205
 
206
  void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const;
207
 
208
  /// Set \p Symzer as the current symbolizer.
209
  /// This takes ownership of \p Symzer, and deletes the previously set one.
210
  void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer);
211
 
212
  MCContext& getContext() const { return Ctx; }
213
 
214
  const MCSubtargetInfo& getSubtargetInfo() const { return STI; }
215
 
216
  // Marked mutable because we cache it inside the disassembler, rather than
217
  // having to pass it around as an argument through all the autogenerated code.
218
  mutable raw_ostream *CommentStream = nullptr;
219
};
220
 
221
} // end namespace llvm
222
 
223
#endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H