Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. //===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8.  
  9. #ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
  10. #define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
  11.  
  12. #include "llvm/ADT/StringRef.h"
  13. #include "llvm/BinaryFormat/XCOFF.h"
  14. #include "llvm/MC/MCDisassembler/MCSymbolizer.h"
  15. #include <cstdint>
  16. #include <memory>
  17. #include <vector>
  18.  
  19. namespace llvm {
  20.  
  21. struct XCOFFSymbolInfoTy {
  22.   std::optional<XCOFF::StorageMappingClass> StorageMappingClass;
  23.   std::optional<uint32_t> Index;
  24.   bool IsLabel = false;
  25.   bool operator<(const XCOFFSymbolInfoTy &SymInfo) const;
  26. };
  27.  
  28. struct SymbolInfoTy {
  29.   uint64_t Addr;
  30.   StringRef Name;
  31.   // XCOFF uses XCOFFSymInfo. Other targets use Type.
  32.   XCOFFSymbolInfoTy XCOFFSymInfo;
  33.   uint8_t Type;
  34.  
  35. private:
  36.   bool IsXCOFF;
  37.   bool HasType;
  38.  
  39. public:
  40.   SymbolInfoTy(uint64_t Addr, StringRef Name,
  41.                std::optional<XCOFF::StorageMappingClass> Smc,
  42.                std::optional<uint32_t> Idx, bool Label)
  43.       : Addr(Addr), Name(Name), XCOFFSymInfo{Smc, Idx, Label}, Type(0),
  44.         IsXCOFF(true), HasType(false) {}
  45.   SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type,
  46.                bool IsXCOFF = false)
  47.       : Addr(Addr), Name(Name), Type(Type), IsXCOFF(IsXCOFF), HasType(true) {}
  48.   bool isXCOFF() const { return IsXCOFF; }
  49.  
  50. private:
  51.   friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) {
  52.     assert((P1.IsXCOFF == P2.IsXCOFF && P1.HasType == P2.HasType) &&
  53.            "The value of IsXCOFF and HasType in P1 and P2 should be the same "
  54.            "respectively.");
  55.  
  56.     if (P1.IsXCOFF && P1.HasType)
  57.       return std::tie(P1.Addr, P1.Type, P1.Name) <
  58.              std::tie(P2.Addr, P2.Type, P2.Name);
  59.  
  60.     if (P1.IsXCOFF)
  61.       return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) <
  62.              std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name);
  63.  
  64.     return std::tie(P1.Addr, P1.Name, P1.Type) <
  65.            std::tie(P2.Addr, P2.Name, P2.Type);
  66.   }
  67. };
  68.  
  69. using SectionSymbolsTy = std::vector<SymbolInfoTy>;
  70.  
  71. template <typename T> class ArrayRef;
  72. class MCContext;
  73. class MCInst;
  74. class MCSubtargetInfo;
  75. class raw_ostream;
  76.  
  77. /// Superclass for all disassemblers. Consumes a memory region and provides an
  78. /// array of assembly instructions.
  79. class MCDisassembler {
  80. public:
  81.   /// Ternary decode status. Most backends will just use Fail and
  82.   /// Success, however some have a concept of an instruction with
  83.   /// understandable semantics but which is architecturally
  84.   /// incorrect. An example of this is ARM UNPREDICTABLE instructions
  85.   /// which are disassemblable but cause undefined behaviour.
  86.   ///
  87.   /// Because it makes sense to disassemble these instructions, there
  88.   /// is a "soft fail" failure mode that indicates the MCInst& is
  89.   /// valid but architecturally incorrect.
  90.   ///
  91.   /// The enum numbers are deliberately chosen such that reduction
  92.   /// from Success->SoftFail ->Fail can be done with a simple
  93.   /// bitwise-AND:
  94.   ///
  95.   ///   LEFT & TOP =  | Success       Unpredictable   Fail
  96.   ///   --------------+-----------------------------------
  97.   ///   Success       | Success       Unpredictable   Fail
  98.   ///   Unpredictable | Unpredictable Unpredictable   Fail
  99.   ///   Fail          | Fail          Fail            Fail
  100.   ///
  101.   /// An easy way of encoding this is as 0b11, 0b01, 0b00 for
  102.   /// Success, SoftFail, Fail respectively.
  103.   enum DecodeStatus {
  104.     Fail = 0,
  105.     SoftFail = 1,
  106.     Success = 3
  107.   };
  108.  
  109.   MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
  110.     : Ctx(Ctx), STI(STI) {}
  111.  
  112.   virtual ~MCDisassembler();
  113.  
  114.   /// Returns the disassembly of a single instruction.
  115.   ///
  116.   /// \param Instr    - An MCInst to populate with the contents of the
  117.   ///                   instruction.
  118.   /// \param Size     - A value to populate with the size of the instruction, or
  119.   ///                   the number of bytes consumed while attempting to decode
  120.   ///                   an invalid instruction.
  121.   /// \param Address  - The address, in the memory space of region, of the first
  122.   ///                   byte of the instruction.
  123.   /// \param Bytes    - A reference to the actual bytes of the instruction.
  124.   /// \param CStream  - The stream to print comments and annotations on.
  125.   /// \return         - MCDisassembler::Success if the instruction is valid,
  126.   ///                   MCDisassembler::SoftFail if the instruction was
  127.   ///                                            disassemblable but invalid,
  128.   ///                   MCDisassembler::Fail if the instruction was invalid.
  129.   virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
  130.                                       ArrayRef<uint8_t> Bytes, uint64_t Address,
  131.                                       raw_ostream &CStream) const = 0;
  132.  
  133.   /// Used to perform separate target specific disassembly for a particular
  134.   /// symbol. May parse any prelude that precedes instructions after the
  135.   /// start of a symbol, or the entire symbol.
  136.   /// This is used for example by WebAssembly to decode preludes.
  137.   ///
  138.   /// Base implementation returns std::nullopt. So all targets by default ignore
  139.   /// to treat symbols separately.
  140.   ///
  141.   /// \param Symbol   - The symbol.
  142.   /// \param Size     - The number of bytes consumed.
  143.   /// \param Address  - The address, in the memory space of region, of the first
  144.   ///                   byte of the symbol.
  145.   /// \param Bytes    - A reference to the actual bytes at the symbol location.
  146.   /// \param CStream  - The stream to print comments and annotations on.
  147.   /// \return         - MCDisassembler::Success if bytes are decoded
  148.   ///                   successfully. Size must hold the number of bytes that
  149.   ///                   were decoded.
  150.   ///                 - MCDisassembler::Fail if the bytes are invalid. Size
  151.   ///                   must hold the number of bytes that were decoded before
  152.   ///                   failing. The target must print nothing. This can be
  153.   ///                   done by buffering the output if needed.
  154.   ///                 - std::nullopt if the target doesn't want to handle the
  155.   ///                   symbol separately. Value of Size is ignored in this
  156.   ///                   case.
  157.   virtual std::optional<DecodeStatus>
  158.   onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
  159.                 uint64_t Address, raw_ostream &CStream) const;
  160.   // TODO:
  161.   // Implement similar hooks that can be used at other points during
  162.   // disassembly. Something along the following lines:
  163.   // - onBeforeInstructionDecode()
  164.   // - onAfterInstructionDecode()
  165.   // - onSymbolEnd()
  166.   // It should help move much of the target specific code from llvm-objdump to
  167.   // respective target disassemblers.
  168.  
  169.   /// Suggest a distance to skip in a buffer of data to find the next
  170.   /// place to look for the start of an instruction. For example, if
  171.   /// all instructions have a fixed alignment, this might advance to
  172.   /// the next multiple of that alignment.
  173.   ///
  174.   /// If not overridden, the default is 1.
  175.   ///
  176.   /// \param Address  - The address, in the memory space of region, of the
  177.   ///                   starting point (typically the first byte of something
  178.   ///                   that did not decode as a valid instruction at all).
  179.   /// \param Bytes    - A reference to the actual bytes at Address. May be
  180.   ///                   needed in order to determine the width of an
  181.   ///                   unrecognized instruction (e.g. in Thumb this is a simple
  182.   ///                   consistent criterion that doesn't require knowing the
  183.   ///                   specific instruction). The caller can pass as much data
  184.   ///                   as they have available, and the function is required to
  185.   ///                   make a reasonable default choice if not enough data is
  186.   ///                   available to make a better one.
  187.   /// \return         - A number of bytes to skip. Must always be greater than
  188.   ///                   zero. May be greater than the size of Bytes.
  189.   virtual uint64_t suggestBytesToSkip(ArrayRef<uint8_t> Bytes,
  190.                                       uint64_t Address) const;
  191.  
  192. private:
  193.   MCContext &Ctx;
  194.  
  195. protected:
  196.   // Subtarget information, for instruction decoding predicates if required.
  197.   const MCSubtargetInfo &STI;
  198.   std::unique_ptr<MCSymbolizer> Symbolizer;
  199.  
  200. public:
  201.   // Helpers around MCSymbolizer
  202.   bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address,
  203.                                 bool IsBranch, uint64_t Offset, uint64_t OpSize,
  204.                                 uint64_t InstSize) const;
  205.  
  206.   void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const;
  207.  
  208.   /// Set \p Symzer as the current symbolizer.
  209.   /// This takes ownership of \p Symzer, and deletes the previously set one.
  210.   void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer);
  211.  
  212.   MCContext& getContext() const { return Ctx; }
  213.  
  214.   const MCSubtargetInfo& getSubtargetInfo() const { return STI; }
  215.  
  216.   // Marked mutable because we cache it inside the disassembler, rather than
  217.   // having to pass it around as an argument through all the autogenerated code.
  218.   mutable raw_ostream *CommentStream = nullptr;
  219. };
  220.  
  221. } // end namespace llvm
  222.  
  223. #endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
  224.