Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line | 
|---|---|---|---|
| 14 | pmbaty | 1 | //===- MCPseudoProbe.h - Pseudo probe encoding support ---------*- C++ -*-===// | 
| 2 | // | ||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | ||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| 6 | // | ||
| 7 | //===----------------------------------------------------------------------===// | ||
| 8 | // | ||
| 9 | // This file contains the declaration of the MCPseudoProbe to support the pseudo | ||
| 10 | // probe encoding for AutoFDO. Pseudo probes together with their inline context | ||
| 11 | // are encoded in a DFS recursive way in the .pseudoprobe sections. For each | ||
| 12 | // .pseudoprobe section, the encoded binary data consist of a single or mutiple | ||
| 13 | // function records each for one outlined function. A function record has the | ||
| 14 | // following format : | ||
| 15 | // | ||
| 16 | // FUNCTION BODY (one for each outlined function present in the text section) | ||
| 17 | //    GUID (uint64) | ||
| 18 | //        GUID of the function's source name which may be different from the | ||
| 19 | //        actual binary linkage name. This GUID will be used to decode and | ||
| 20 | //        generate a profile against the source function name. | ||
| 21 | //    NPROBES (ULEB128) | ||
| 22 | //        Number of probes originating from this function. | ||
| 23 | //    NUM_INLINED_FUNCTIONS (ULEB128) | ||
| 24 | //        Number of callees inlined into this function, aka number of | ||
| 25 | //        first-level inlinees | ||
| 26 | //    PROBE RECORDS | ||
| 27 | //        A list of NPROBES entries. Each entry contains: | ||
| 28 | //          INDEX (ULEB128) | ||
| 29 | //          TYPE (uint4) | ||
| 30 | //            0 - block probe, 1 - indirect call, 2 - direct call | ||
| 31 | //          ATTRIBUTE (uint3) | ||
| 32 | //            1 - reserved | ||
| 33 | //          ADDRESS_TYPE (uint1) | ||
| 34 | //            0 - code address for regular probes (for downwards compatibility) | ||
| 35 | //              - GUID of linkage name for sentinel probes | ||
| 36 | //            1 - address delta | ||
| 37 | //          CODE_ADDRESS (uint64 or ULEB128) | ||
| 38 | //            code address or address delta, depending on ADDRESS_TYPE | ||
| 39 | //    INLINED FUNCTION RECORDS | ||
| 40 | //        A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined | ||
| 41 | //        callees.  Each record contains: | ||
| 42 | //          INLINE SITE | ||
| 43 | //            ID of the callsite probe (ULEB128) | ||
| 44 | //          FUNCTION BODY | ||
| 45 | //            A FUNCTION BODY entry describing the inlined function. | ||
| 46 | // | ||
| 47 | // TODO: retire the ADDRESS_TYPE encoding for code addresses once compatibility | ||
| 48 | // is no longer an issue. | ||
| 49 | //===----------------------------------------------------------------------===// | ||
| 50 | |||
| 51 | #ifndef LLVM_MC_MCPSEUDOPROBE_H | ||
| 52 | #define LLVM_MC_MCPSEUDOPROBE_H | ||
| 53 | |||
| 54 | #include "llvm/ADT/DenseSet.h" | ||
| 55 | #include "llvm/ADT/SmallVector.h" | ||
| 56 | #include "llvm/ADT/StringRef.h" | ||
| 57 | #include "llvm/IR/PseudoProbe.h" | ||
| 58 | #include "llvm/Support/ErrorOr.h" | ||
| 59 | #include <list> | ||
| 60 | #include <map> | ||
| 61 | #include <memory> | ||
| 62 | #include <string> | ||
| 63 | #include <tuple> | ||
| 64 | #include <type_traits> | ||
| 65 | #include <unordered_map> | ||
| 66 | #include <unordered_set> | ||
| 67 | #include <vector> | ||
| 68 | |||
| 69 | namespace llvm { | ||
| 70 | |||
| 71 | class MCSymbol; | ||
| 72 | class MCObjectStreamer; | ||
| 73 | class raw_ostream; | ||
| 74 | |||
| 75 | enum class MCPseudoProbeFlag { | ||
| 76 |   // If set, indicates that the probe is encoded as an address delta | ||
| 77 |   // instead of a real code address. | ||
| 78 | AddressDelta = 0x1, | ||
| 79 | }; | ||
| 80 | |||
| 81 | // Function descriptor decoded from .pseudo_probe_desc section | ||
| 82 | struct MCPseudoProbeFuncDesc { | ||
| 83 | uint64_t FuncGUID = 0; | ||
| 84 | uint64_t FuncHash = 0; | ||
| 85 | std::string FuncName; | ||
| 86 | |||
| 87 | MCPseudoProbeFuncDesc(uint64_t GUID, uint64_t Hash, StringRef Name) | ||
| 88 | : FuncGUID(GUID), FuncHash(Hash), FuncName(Name){}; | ||
| 89 | |||
| 90 | void print(raw_ostream &OS); | ||
| 91 | }; | ||
| 92 | |||
| 93 | class MCDecodedPseudoProbe; | ||
| 94 | |||
| 95 | // An inline frame has the form <CalleeGuid, ProbeID> | ||
| 96 | using InlineSite = std::tuple<uint64_t, uint32_t>; | ||
| 97 | using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>; | ||
| 98 | // GUID to PseudoProbeFuncDesc map | ||
| 99 | using GUIDProbeFunctionMap = | ||
| 100 | std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>; | ||
| 101 | // Address to pseudo probes map. | ||
| 102 | using AddressProbesMap = | ||
| 103 | std::unordered_map<uint64_t, std::list<MCDecodedPseudoProbe>>; | ||
| 104 | |||
| 105 | class MCDecodedPseudoProbeInlineTree; | ||
| 106 | |||
| 107 | class MCPseudoProbeBase { | ||
| 108 | protected: | ||
| 109 | uint64_t Guid; | ||
| 110 | uint64_t Index; | ||
| 111 | uint8_t Attributes; | ||
| 112 | uint8_t Type; | ||
| 113 |   // The value should be equal to PseudoProbeReservedId::Last + 1 which is | ||
| 114 |   // defined in SampleProfileProbe.h. The header file is not included here to | ||
| 115 |   // reduce the dependency from MC to IPO. | ||
| 116 | const static uint32_t PseudoProbeFirstId = 1; | ||
| 117 | |||
| 118 | public: | ||
| 119 | MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T) | ||
| 120 | : Guid(G), Index(I), Attributes(At), Type(T) {} | ||
| 121 | |||
| 122 | bool isEntry() const { return Index == PseudoProbeFirstId; } | ||
| 123 | |||
| 124 | uint64_t getGuid() const { return Guid; } | ||
| 125 | |||
| 126 | uint64_t getIndex() const { return Index; } | ||
| 127 | |||
| 128 | uint8_t getAttributes() const { return Attributes; } | ||
| 129 | |||
| 130 | uint8_t getType() const { return Type; } | ||
| 131 | |||
| 132 | bool isBlock() const { | ||
| 133 | return Type == static_cast<uint8_t>(PseudoProbeType::Block); | ||
| 134 |   } | ||
| 135 | |||
| 136 | bool isIndirectCall() const { | ||
| 137 | return Type == static_cast<uint8_t>(PseudoProbeType::IndirectCall); | ||
| 138 |   } | ||
| 139 | |||
| 140 | bool isDirectCall() const { | ||
| 141 | return Type == static_cast<uint8_t>(PseudoProbeType::DirectCall); | ||
| 142 |   } | ||
| 143 | |||
| 144 | bool isCall() const { return isIndirectCall() || isDirectCall(); } | ||
| 145 | |||
| 146 | void setAttributes(uint8_t Attr) { Attributes = Attr; } | ||
| 147 | }; | ||
| 148 | |||
| 149 | /// Instances of this class represent a pseudo probe instance for a pseudo probe | ||
| 150 | /// table entry, which is created during a machine instruction is assembled and | ||
| 151 | /// uses an address from a temporary label created at the current address in the | ||
| 152 | /// current section. | ||
| 153 | class MCPseudoProbe : public MCPseudoProbeBase { | ||
| 154 | MCSymbol *Label; | ||
| 155 | |||
| 156 | public: | ||
| 157 | MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type, | ||
| 158 | uint64_t Attributes) | ||
| 159 | : MCPseudoProbeBase(Guid, Index, Attributes, Type), Label(Label) { | ||
| 160 | assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8"); | ||
| 161 | assert(Attributes <= 0xFF && | ||
| 162 | "Probe attributes too big to encode, exceeding 2^16"); | ||
| 163 |   } | ||
| 164 | |||
| 165 | MCSymbol *getLabel() const { return Label; } | ||
| 166 | void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const; | ||
| 167 | }; | ||
| 168 | |||
| 169 | // Represents a callsite with caller function name and probe id | ||
| 170 | using MCPseduoProbeFrameLocation = std::pair<StringRef, uint32_t>; | ||
| 171 | |||
| 172 | class MCDecodedPseudoProbe : public MCPseudoProbeBase { | ||
| 173 | uint64_t Address; | ||
| 174 | MCDecodedPseudoProbeInlineTree *InlineTree; | ||
| 175 | |||
| 176 | public: | ||
| 177 | MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K, | ||
| 178 | uint8_t At, MCDecodedPseudoProbeInlineTree *Tree) | ||
| 179 | : MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K)), Address(Ad), | ||
| 180 | InlineTree(Tree){}; | ||
| 181 | |||
| 182 | uint64_t getAddress() const { return Address; } | ||
| 183 | |||
| 184 | void setAddress(uint64_t Addr) { Address = Addr; } | ||
| 185 | |||
| 186 | MCDecodedPseudoProbeInlineTree *getInlineTreeNode() const { | ||
| 187 | return InlineTree; | ||
| 188 |   } | ||
| 189 | |||
| 190 |   // Get the inlined context by traversing current inline tree backwards, | ||
| 191 |   // each tree node has its InlineSite which is taken as the context. | ||
| 192 |   // \p ContextStack is populated in root to leaf order | ||
| 193 |   void | ||
| 194 | getInlineContext(SmallVectorImpl<MCPseduoProbeFrameLocation> &ContextStack, | ||
| 195 | const GUIDProbeFunctionMap &GUID2FuncMAP) const; | ||
| 196 | |||
| 197 |   // Helper function to get the string from context stack | ||
| 198 | std::string | ||
| 199 | getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP) const; | ||
| 200 | |||
| 201 |   // Print pseudo probe while disassembling | ||
| 202 | void print(raw_ostream &OS, const GUIDProbeFunctionMap &GUID2FuncMAP, | ||
| 203 | bool ShowName) const; | ||
| 204 | }; | ||
| 205 | |||
| 206 | template <typename ProbeType, typename DerivedProbeInlineTreeType> | ||
| 207 | class MCPseudoProbeInlineTreeBase { | ||
| 208 | struct InlineSiteHash { | ||
| 209 | uint64_t operator()(const InlineSite &Site) const { | ||
| 210 | return std::get<0>(Site) ^ std::get<1>(Site); | ||
| 211 |     } | ||
| 212 | }; | ||
| 213 | |||
| 214 | protected: | ||
| 215 |   // Track children (e.g. inlinees) of current context | ||
| 216 | using InlinedProbeTreeMap = std::unordered_map< | ||
| 217 | InlineSite, std::unique_ptr<DerivedProbeInlineTreeType>, InlineSiteHash>; | ||
| 218 |   InlinedProbeTreeMap Children; | ||
| 219 |   // Set of probes that come with the function. | ||
| 220 | std::vector<ProbeType> Probes; | ||
| 221 | MCPseudoProbeInlineTreeBase() { | ||
| 222 | static_assert(std::is_base_of<MCPseudoProbeInlineTreeBase, | ||
| 223 | DerivedProbeInlineTreeType>::value, | ||
| 224 |                   "DerivedProbeInlineTreeType must be subclass of " | ||
| 225 | "MCPseudoProbeInlineTreeBase"); | ||
| 226 |   } | ||
| 227 | |||
| 228 | public: | ||
| 229 | uint64_t Guid = 0; | ||
| 230 | |||
| 231 |   // Root node has a GUID 0. | ||
| 232 | bool isRoot() const { return Guid == 0; } | ||
| 233 | InlinedProbeTreeMap &getChildren() { return Children; } | ||
| 234 | const InlinedProbeTreeMap &getChildren() const { return Children; } | ||
| 235 | std::vector<ProbeType> &getProbes() { return Probes; } | ||
| 236 | void addProbes(ProbeType Probe) { Probes.push_back(Probe); } | ||
| 237 |   // Caller node of the inline site | ||
| 238 | MCPseudoProbeInlineTreeBase<ProbeType, DerivedProbeInlineTreeType> *Parent; | ||
| 239 | DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) { | ||
| 240 | auto Ret = Children.emplace( | ||
| 241 | Site, std::make_unique<DerivedProbeInlineTreeType>(Site)); | ||
| 242 | Ret.first->second->Parent = this; | ||
| 243 | return Ret.first->second.get(); | ||
| 244 | }; | ||
| 245 | }; | ||
| 246 | |||
| 247 | // A Tri-tree based data structure to group probes by inline stack. | ||
| 248 | // A tree is allocated for a standalone .text section. A fake | ||
| 249 | // instance is created as the root of a tree. | ||
| 250 | // A real instance of this class is created for each function, either a | ||
| 251 | // not inlined function that has code in .text section or an inlined function. | ||
| 252 | class MCPseudoProbeInlineTree | ||
| 253 | : public MCPseudoProbeInlineTreeBase<MCPseudoProbe, | ||
| 254 | MCPseudoProbeInlineTree> { | ||
| 255 | public: | ||
| 256 | MCPseudoProbeInlineTree() = default; | ||
| 257 | MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; } | ||
| 258 | MCPseudoProbeInlineTree(const InlineSite &Site) { | ||
| 259 | this->Guid = std::get<0>(Site); | ||
| 260 |   } | ||
| 261 | |||
| 262 |   // MCPseudoProbeInlineTree method based on Inlinees | ||
| 263 | void addPseudoProbe(const MCPseudoProbe &Probe, | ||
| 264 | const MCPseudoProbeInlineStack &InlineStack); | ||
| 265 | void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *&LastProbe); | ||
| 266 | }; | ||
| 267 | |||
| 268 | // inline tree node for the decoded pseudo probe | ||
| 269 | class MCDecodedPseudoProbeInlineTree | ||
| 270 | : public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *, | ||
| 271 | MCDecodedPseudoProbeInlineTree> { | ||
| 272 | public: | ||
| 273 |   InlineSite ISite; | ||
| 274 |   // Used for decoding | ||
| 275 | uint32_t ChildrenToProcess = 0; | ||
| 276 | |||
| 277 | MCDecodedPseudoProbeInlineTree() = default; | ||
| 278 | MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){}; | ||
| 279 | |||
| 280 |   // Return false if it's a dummy inline site | ||
| 281 | bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); } | ||
| 282 | }; | ||
| 283 | |||
| 284 | /// Instances of this class represent the pseudo probes inserted into a compile | ||
| 285 | /// unit. | ||
| 286 | class MCPseudoProbeSections { | ||
| 287 | public: | ||
| 288 | void addPseudoProbe(MCSymbol *FuncSym, const MCPseudoProbe &Probe, | ||
| 289 | const MCPseudoProbeInlineStack &InlineStack) { | ||
| 290 | MCProbeDivisions[FuncSym].addPseudoProbe(Probe, InlineStack); | ||
| 291 |   } | ||
| 292 | |||
| 293 |   // TODO: Sort by getOrdinal to ensure a determinstic section order | ||
| 294 | using MCProbeDivisionMap = std::map<MCSymbol *, MCPseudoProbeInlineTree>; | ||
| 295 | |||
| 296 | private: | ||
| 297 |   // A collection of MCPseudoProbe for each function. The MCPseudoProbes are | ||
| 298 |   // grouped by GUIDs due to inlining that can bring probes from different | ||
| 299 |   // functions into one function. | ||
| 300 |   MCProbeDivisionMap MCProbeDivisions; | ||
| 301 | |||
| 302 | public: | ||
| 303 | const MCProbeDivisionMap &getMCProbes() const { return MCProbeDivisions; } | ||
| 304 | |||
| 305 | bool empty() const { return MCProbeDivisions.empty(); } | ||
| 306 | |||
| 307 | void emit(MCObjectStreamer *MCOS); | ||
| 308 | }; | ||
| 309 | |||
| 310 | class MCPseudoProbeTable { | ||
| 311 |   // A collection of MCPseudoProbe in the current module grouped by | ||
| 312 |   // functions. MCPseudoProbes will be encoded into a corresponding | ||
| 313 |   // .pseudoprobe section. With functions emitted as separate comdats, | ||
| 314 |   // a text section really only contains the code of a function solely, and the | ||
| 315 |   // probes associated with the text section will be emitted into a standalone | ||
| 316 |   // .pseudoprobe section that shares the same comdat group with the function. | ||
| 317 |   MCPseudoProbeSections MCProbeSections; | ||
| 318 | |||
| 319 | public: | ||
| 320 | static void emit(MCObjectStreamer *MCOS); | ||
| 321 | |||
| 322 | MCPseudoProbeSections &getProbeSections() { return MCProbeSections; } | ||
| 323 | |||
| 324 | #ifndef NDEBUG | ||
| 325 | static int DdgPrintIndent; | ||
| 326 | #endif | ||
| 327 | }; | ||
| 328 | |||
| 329 | class MCPseudoProbeDecoder { | ||
| 330 |   // GUID to PseudoProbeFuncDesc map. | ||
| 331 |   GUIDProbeFunctionMap GUID2FuncDescMap; | ||
| 332 | |||
| 333 |   // Address to probes map. | ||
| 334 |   AddressProbesMap Address2ProbesMap; | ||
| 335 | |||
| 336 |   // The dummy root of the inline trie, all the outlined function will directly | ||
| 337 |   // be the children of the dummy root, all the inlined function will be the | ||
| 338 |   // children of its inlineer. So the relation would be like: | ||
| 339 |   // DummyRoot --> OutlinedFunc --> InlinedFunc1 --> InlinedFunc2 | ||
| 340 |   MCDecodedPseudoProbeInlineTree DummyInlineRoot; | ||
| 341 | |||
| 342 |   /// Points to the current location in the buffer. | ||
| 343 | const uint8_t *Data = nullptr; | ||
| 344 | |||
| 345 |   /// Points to the end of the buffer. | ||
| 346 | const uint8_t *End = nullptr; | ||
| 347 | |||
| 348 |   /// Whether encoding is based on a starting probe with absolute code address. | ||
| 349 | bool EncodingIsAddrBased = false; | ||
| 350 | |||
| 351 |   // Decoding helper function | ||
| 352 | template <typename T> ErrorOr<T> readUnencodedNumber(); | ||
| 353 | template <typename T> ErrorOr<T> readUnsignedNumber(); | ||
| 354 | template <typename T> ErrorOr<T> readSignedNumber(); | ||
| 355 | ErrorOr<StringRef> readString(uint32_t Size); | ||
| 356 | |||
| 357 | public: | ||
| 358 | using Uint64Set = DenseSet<uint64_t>; | ||
| 359 | using Uint64Map = DenseMap<uint64_t, uint64_t>; | ||
| 360 | |||
| 361 |   // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map. | ||
| 362 | bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size); | ||
| 363 | |||
| 364 |   // Decode pseudo_probe section to build address to probes map for specifed | ||
| 365 |   // functions only. | ||
| 366 | bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size, | ||
| 367 | const Uint64Set &GuildFilter, | ||
| 368 | const Uint64Map &FuncStartAddrs); | ||
| 369 | |||
| 370 | bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur, | ||
| 371 | uint64_t &LastAddr, const Uint64Set &GuildFilter, | ||
| 372 | const Uint64Map &FuncStartAddrs); | ||
| 373 | |||
| 374 |   // Print pseudo_probe_desc section info | ||
| 375 | void printGUID2FuncDescMap(raw_ostream &OS); | ||
| 376 | |||
| 377 |   // Print pseudo_probe section info, used along with show-disassembly | ||
| 378 | void printProbeForAddress(raw_ostream &OS, uint64_t Address); | ||
| 379 | |||
| 380 |   // do printProbeForAddress for all addresses | ||
| 381 | void printProbesForAllAddresses(raw_ostream &OS); | ||
| 382 | |||
| 383 |   // Look up the probe of a call for the input address | ||
| 384 | const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const; | ||
| 385 | |||
| 386 | const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) const; | ||
| 387 | |||
| 388 |   // Helper function to populate one probe's inline stack into | ||
| 389 |   // \p InlineContextStack. | ||
| 390 |   // Current leaf location info will be added if IncludeLeaf is true | ||
| 391 |   // Example: | ||
| 392 |   //  Current probe(bar:3) inlined at foo:2 then inlined at main:1 | ||
| 393 |   //  IncludeLeaf = true,  Output: [main:1, foo:2, bar:3] | ||
| 394 |   //  IncludeLeaf = false, Output: [main:1, foo:2] | ||
| 395 | void getInlineContextForProbe( | ||
| 396 | const MCDecodedPseudoProbe *Probe, | ||
| 397 | SmallVectorImpl<MCPseduoProbeFrameLocation> &InlineContextStack, | ||
| 398 | bool IncludeLeaf) const; | ||
| 399 | |||
| 400 | const AddressProbesMap &getAddress2ProbesMap() const { | ||
| 401 | return Address2ProbesMap; | ||
| 402 |   } | ||
| 403 | |||
| 404 | AddressProbesMap &getAddress2ProbesMap() { return Address2ProbesMap; } | ||
| 405 | |||
| 406 | const GUIDProbeFunctionMap &getGUID2FuncDescMap() const { | ||
| 407 | return GUID2FuncDescMap; | ||
| 408 |   } | ||
| 409 | |||
| 410 | const MCPseudoProbeFuncDesc * | ||
| 411 | getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) const; | ||
| 412 | |||
| 413 | const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const { | ||
| 414 | return DummyInlineRoot; | ||
| 415 |   } | ||
| 416 | }; | ||
| 417 | |||
| 418 | } // end namespace llvm | ||
| 419 | |||
| 420 | #endif // LLVM_MC_MCPSEUDOPROBE_H |