Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // |
||
9 | // This file contains common definitions used in the reading and writing of |
||
10 | // sample profile data. |
||
11 | // |
||
12 | //===----------------------------------------------------------------------===// |
||
13 | |||
14 | #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H |
||
15 | #define LLVM_PROFILEDATA_SAMPLEPROF_H |
||
16 | |||
17 | #include "llvm/ADT/DenseSet.h" |
||
18 | #include "llvm/ADT/SmallVector.h" |
||
19 | #include "llvm/ADT/StringExtras.h" |
||
20 | #include "llvm/ADT/StringMap.h" |
||
21 | #include "llvm/ADT/StringRef.h" |
||
22 | #include "llvm/IR/Function.h" |
||
23 | #include "llvm/IR/GlobalValue.h" |
||
24 | #include "llvm/Support/Allocator.h" |
||
25 | #include "llvm/Support/Debug.h" |
||
26 | #include "llvm/Support/ErrorOr.h" |
||
27 | #include "llvm/Support/MathExtras.h" |
||
28 | #include <algorithm> |
||
29 | #include <cstdint> |
||
30 | #include <list> |
||
31 | #include <map> |
||
32 | #include <set> |
||
33 | #include <sstream> |
||
34 | #include <string> |
||
35 | #include <system_error> |
||
36 | #include <unordered_map> |
||
37 | #include <utility> |
||
38 | |||
39 | namespace llvm { |
||
40 | |||
41 | class DILocation; |
||
42 | class raw_ostream; |
||
43 | |||
44 | const std::error_category &sampleprof_category(); |
||
45 | |||
46 | enum class sampleprof_error { |
||
47 | success = 0, |
||
48 | bad_magic, |
||
49 | unsupported_version, |
||
50 | too_large, |
||
51 | truncated, |
||
52 | malformed, |
||
53 | unrecognized_format, |
||
54 | unsupported_writing_format, |
||
55 | truncated_name_table, |
||
56 | not_implemented, |
||
57 | counter_overflow, |
||
58 | ostream_seek_unsupported, |
||
59 | uncompress_failed, |
||
60 | zlib_unavailable, |
||
61 | hash_mismatch |
||
62 | }; |
||
63 | |||
64 | inline std::error_code make_error_code(sampleprof_error E) { |
||
65 | return std::error_code(static_cast<int>(E), sampleprof_category()); |
||
66 | } |
||
67 | |||
68 | inline sampleprof_error MergeResult(sampleprof_error &Accumulator, |
||
69 | sampleprof_error Result) { |
||
70 | // Prefer first error encountered as later errors may be secondary effects of |
||
71 | // the initial problem. |
||
72 | if (Accumulator == sampleprof_error::success && |
||
73 | Result != sampleprof_error::success) |
||
74 | Accumulator = Result; |
||
75 | return Accumulator; |
||
76 | } |
||
77 | |||
78 | } // end namespace llvm |
||
79 | |||
80 | namespace std { |
||
81 | |||
82 | template <> |
||
83 | struct is_error_code_enum<llvm::sampleprof_error> : std::true_type {}; |
||
84 | |||
85 | } // end namespace std |
||
86 | |||
87 | namespace llvm { |
||
88 | namespace sampleprof { |
||
89 | |||
90 | enum SampleProfileFormat { |
||
91 | SPF_None = 0, |
||
92 | SPF_Text = 0x1, |
||
93 | SPF_Compact_Binary = 0x2, |
||
94 | SPF_GCC = 0x3, |
||
95 | SPF_Ext_Binary = 0x4, |
||
96 | SPF_Binary = 0xff |
||
97 | }; |
||
98 | |||
99 | static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) { |
||
100 | return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) | |
||
101 | uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) | |
||
102 | uint64_t('F') << (64 - 40) | uint64_t('4') << (64 - 48) | |
||
103 | uint64_t('2') << (64 - 56) | uint64_t(Format); |
||
104 | } |
||
105 | |||
106 | /// Get the proper representation of a string according to whether the |
||
107 | /// current Format uses MD5 to represent the string. |
||
108 | static inline StringRef getRepInFormat(StringRef Name, bool UseMD5, |
||
109 | std::string &GUIDBuf) { |
||
110 | if (Name.empty() || !UseMD5) |
||
111 | return Name; |
||
112 | GUIDBuf = std::to_string(Function::getGUID(Name)); |
||
113 | return GUIDBuf; |
||
114 | } |
||
115 | |||
116 | static inline uint64_t SPVersion() { return 103; } |
||
117 | |||
118 | // Section Type used by SampleProfileExtBinaryBaseReader and |
||
119 | // SampleProfileExtBinaryBaseWriter. Never change the existing |
||
120 | // value of enum. Only append new ones. |
||
121 | enum SecType { |
||
122 | SecInValid = 0, |
||
123 | SecProfSummary = 1, |
||
124 | SecNameTable = 2, |
||
125 | SecProfileSymbolList = 3, |
||
126 | SecFuncOffsetTable = 4, |
||
127 | SecFuncMetadata = 5, |
||
128 | SecCSNameTable = 6, |
||
129 | // marker for the first type of profile. |
||
130 | SecFuncProfileFirst = 32, |
||
131 | SecLBRProfile = SecFuncProfileFirst |
||
132 | }; |
||
133 | |||
134 | static inline std::string getSecName(SecType Type) { |
||
135 | switch ((int)Type) { // Avoid -Wcovered-switch-default |
||
136 | case SecInValid: |
||
137 | return "InvalidSection"; |
||
138 | case SecProfSummary: |
||
139 | return "ProfileSummarySection"; |
||
140 | case SecNameTable: |
||
141 | return "NameTableSection"; |
||
142 | case SecProfileSymbolList: |
||
143 | return "ProfileSymbolListSection"; |
||
144 | case SecFuncOffsetTable: |
||
145 | return "FuncOffsetTableSection"; |
||
146 | case SecFuncMetadata: |
||
147 | return "FunctionMetadata"; |
||
148 | case SecCSNameTable: |
||
149 | return "CSNameTableSection"; |
||
150 | case SecLBRProfile: |
||
151 | return "LBRProfileSection"; |
||
152 | default: |
||
153 | return "UnknownSection"; |
||
154 | } |
||
155 | } |
||
156 | |||
157 | // Entry type of section header table used by SampleProfileExtBinaryBaseReader |
||
158 | // and SampleProfileExtBinaryBaseWriter. |
||
159 | struct SecHdrTableEntry { |
||
160 | SecType Type; |
||
161 | uint64_t Flags; |
||
162 | uint64_t Offset; |
||
163 | uint64_t Size; |
||
164 | // The index indicating the location of the current entry in |
||
165 | // SectionHdrLayout table. |
||
166 | uint32_t LayoutIndex; |
||
167 | }; |
||
168 | |||
169 | // Flags common for all sections are defined here. In SecHdrTableEntry::Flags, |
||
170 | // common flags will be saved in the lower 32bits and section specific flags |
||
171 | // will be saved in the higher 32 bits. |
||
172 | enum class SecCommonFlags : uint32_t { |
||
173 | SecFlagInValid = 0, |
||
174 | SecFlagCompress = (1 << 0), |
||
175 | // Indicate the section contains only profile without context. |
||
176 | SecFlagFlat = (1 << 1) |
||
177 | }; |
||
178 | |||
179 | // Section specific flags are defined here. |
||
180 | // !!!Note: Everytime a new enum class is created here, please add |
||
181 | // a new check in verifySecFlag. |
||
182 | enum class SecNameTableFlags : uint32_t { |
||
183 | SecFlagInValid = 0, |
||
184 | SecFlagMD5Name = (1 << 0), |
||
185 | // Store MD5 in fixed length instead of ULEB128 so NameTable can be |
||
186 | // accessed like an array. |
||
187 | SecFlagFixedLengthMD5 = (1 << 1), |
||
188 | // Profile contains ".__uniq." suffix name. Compiler shouldn't strip |
||
189 | // the suffix when doing profile matching when seeing the flag. |
||
190 | SecFlagUniqSuffix = (1 << 2) |
||
191 | }; |
||
192 | enum class SecProfSummaryFlags : uint32_t { |
||
193 | SecFlagInValid = 0, |
||
194 | /// SecFlagPartial means the profile is for common/shared code. |
||
195 | /// The common profile is usually merged from profiles collected |
||
196 | /// from running other targets. |
||
197 | SecFlagPartial = (1 << 0), |
||
198 | /// SecFlagContext means this is context-sensitive flat profile for |
||
199 | /// CSSPGO |
||
200 | SecFlagFullContext = (1 << 1), |
||
201 | /// SecFlagFSDiscriminator means this profile uses flow-sensitive |
||
202 | /// discriminators. |
||
203 | SecFlagFSDiscriminator = (1 << 2), |
||
204 | /// SecFlagIsPreInlined means this profile contains ShouldBeInlined |
||
205 | /// contexts thus this is CS preinliner computed. |
||
206 | SecFlagIsPreInlined = (1 << 4), |
||
207 | }; |
||
208 | |||
209 | enum class SecFuncMetadataFlags : uint32_t { |
||
210 | SecFlagInvalid = 0, |
||
211 | SecFlagIsProbeBased = (1 << 0), |
||
212 | SecFlagHasAttribute = (1 << 1), |
||
213 | }; |
||
214 | |||
215 | enum class SecFuncOffsetFlags : uint32_t { |
||
216 | SecFlagInvalid = 0, |
||
217 | // Store function offsets in an order of contexts. The order ensures that |
||
218 | // callee contexts of a given context laid out next to it. |
||
219 | SecFlagOrdered = (1 << 0), |
||
220 | }; |
||
221 | |||
222 | // Verify section specific flag is used for the correct section. |
||
223 | template <class SecFlagType> |
||
224 | static inline void verifySecFlag(SecType Type, SecFlagType Flag) { |
||
225 | // No verification is needed for common flags. |
||
226 | if (std::is_same<SecCommonFlags, SecFlagType>()) |
||
227 | return; |
||
228 | |||
229 | // Verification starts here for section specific flag. |
||
230 | bool IsFlagLegal = false; |
||
231 | switch (Type) { |
||
232 | case SecNameTable: |
||
233 | IsFlagLegal = std::is_same<SecNameTableFlags, SecFlagType>(); |
||
234 | break; |
||
235 | case SecProfSummary: |
||
236 | IsFlagLegal = std::is_same<SecProfSummaryFlags, SecFlagType>(); |
||
237 | break; |
||
238 | case SecFuncMetadata: |
||
239 | IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>(); |
||
240 | break; |
||
241 | default: |
||
242 | case SecFuncOffsetTable: |
||
243 | IsFlagLegal = std::is_same<SecFuncOffsetFlags, SecFlagType>(); |
||
244 | break; |
||
245 | } |
||
246 | if (!IsFlagLegal) |
||
247 | llvm_unreachable("Misuse of a flag in an incompatible section"); |
||
248 | } |
||
249 | |||
250 | template <class SecFlagType> |
||
251 | static inline void addSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) { |
||
252 | verifySecFlag(Entry.Type, Flag); |
||
253 | auto FVal = static_cast<uint64_t>(Flag); |
||
254 | bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); |
||
255 | Entry.Flags |= IsCommon ? FVal : (FVal << 32); |
||
256 | } |
||
257 | |||
258 | template <class SecFlagType> |
||
259 | static inline void removeSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) { |
||
260 | verifySecFlag(Entry.Type, Flag); |
||
261 | auto FVal = static_cast<uint64_t>(Flag); |
||
262 | bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); |
||
263 | Entry.Flags &= ~(IsCommon ? FVal : (FVal << 32)); |
||
264 | } |
||
265 | |||
266 | template <class SecFlagType> |
||
267 | static inline bool hasSecFlag(const SecHdrTableEntry &Entry, SecFlagType Flag) { |
||
268 | verifySecFlag(Entry.Type, Flag); |
||
269 | auto FVal = static_cast<uint64_t>(Flag); |
||
270 | bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>(); |
||
271 | return Entry.Flags & (IsCommon ? FVal : (FVal << 32)); |
||
272 | } |
||
273 | |||
274 | /// Represents the relative location of an instruction. |
||
275 | /// |
||
276 | /// Instruction locations are specified by the line offset from the |
||
277 | /// beginning of the function (marked by the line where the function |
||
278 | /// header is) and the discriminator value within that line. |
||
279 | /// |
||
280 | /// The discriminator value is useful to distinguish instructions |
||
281 | /// that are on the same line but belong to different basic blocks |
||
282 | /// (e.g., the two post-increment instructions in "if (p) x++; else y++;"). |
||
283 | struct LineLocation { |
||
284 | LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Discriminator(D) {} |
||
285 | |||
286 | void print(raw_ostream &OS) const; |
||
287 | void dump() const; |
||
288 | |||
289 | bool operator<(const LineLocation &O) const { |
||
290 | return LineOffset < O.LineOffset || |
||
291 | (LineOffset == O.LineOffset && Discriminator < O.Discriminator); |
||
292 | } |
||
293 | |||
294 | bool operator==(const LineLocation &O) const { |
||
295 | return LineOffset == O.LineOffset && Discriminator == O.Discriminator; |
||
296 | } |
||
297 | |||
298 | bool operator!=(const LineLocation &O) const { |
||
299 | return LineOffset != O.LineOffset || Discriminator != O.Discriminator; |
||
300 | } |
||
301 | |||
302 | uint32_t LineOffset; |
||
303 | uint32_t Discriminator; |
||
304 | }; |
||
305 | |||
306 | struct LineLocationHash { |
||
307 | uint64_t operator()(const LineLocation &Loc) const { |
||
308 | return std::hash<std::uint64_t>{}((((uint64_t)Loc.LineOffset) << 32) | |
||
309 | Loc.Discriminator); |
||
310 | } |
||
311 | }; |
||
312 | |||
313 | raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc); |
||
314 | |||
315 | /// Representation of a single sample record. |
||
316 | /// |
||
317 | /// A sample record is represented by a positive integer value, which |
||
318 | /// indicates how frequently was the associated line location executed. |
||
319 | /// |
||
320 | /// Additionally, if the associated location contains a function call, |
||
321 | /// the record will hold a list of all the possible called targets. For |
||
322 | /// direct calls, this will be the exact function being invoked. For |
||
323 | /// indirect calls (function pointers, virtual table dispatch), this |
||
324 | /// will be a list of one or more functions. |
||
325 | class SampleRecord { |
||
326 | public: |
||
327 | using CallTarget = std::pair<StringRef, uint64_t>; |
||
328 | struct CallTargetComparator { |
||
329 | bool operator()(const CallTarget &LHS, const CallTarget &RHS) const { |
||
330 | if (LHS.second != RHS.second) |
||
331 | return LHS.second > RHS.second; |
||
332 | |||
333 | return LHS.first < RHS.first; |
||
334 | } |
||
335 | }; |
||
336 | |||
337 | using SortedCallTargetSet = std::set<CallTarget, CallTargetComparator>; |
||
338 | using CallTargetMap = StringMap<uint64_t>; |
||
339 | SampleRecord() = default; |
||
340 | |||
341 | /// Increment the number of samples for this record by \p S. |
||
342 | /// Optionally scale sample count \p S by \p Weight. |
||
343 | /// |
||
344 | /// Sample counts accumulate using saturating arithmetic, to avoid wrapping |
||
345 | /// around unsigned integers. |
||
346 | sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) { |
||
347 | bool Overflowed; |
||
348 | NumSamples = SaturatingMultiplyAdd(S, Weight, NumSamples, &Overflowed); |
||
349 | return Overflowed ? sampleprof_error::counter_overflow |
||
350 | : sampleprof_error::success; |
||
351 | } |
||
352 | |||
353 | /// Decrease the number of samples for this record by \p S. Return the amout |
||
354 | /// of samples actually decreased. |
||
355 | uint64_t removeSamples(uint64_t S) { |
||
356 | if (S > NumSamples) |
||
357 | S = NumSamples; |
||
358 | NumSamples -= S; |
||
359 | return S; |
||
360 | } |
||
361 | |||
362 | /// Add called function \p F with samples \p S. |
||
363 | /// Optionally scale sample count \p S by \p Weight. |
||
364 | /// |
||
365 | /// Sample counts accumulate using saturating arithmetic, to avoid wrapping |
||
366 | /// around unsigned integers. |
||
367 | sampleprof_error addCalledTarget(StringRef F, uint64_t S, |
||
368 | uint64_t Weight = 1) { |
||
369 | uint64_t &TargetSamples = CallTargets[F]; |
||
370 | bool Overflowed; |
||
371 | TargetSamples = |
||
372 | SaturatingMultiplyAdd(S, Weight, TargetSamples, &Overflowed); |
||
373 | return Overflowed ? sampleprof_error::counter_overflow |
||
374 | : sampleprof_error::success; |
||
375 | } |
||
376 | |||
377 | /// Remove called function from the call target map. Return the target sample |
||
378 | /// count of the called function. |
||
379 | uint64_t removeCalledTarget(StringRef F) { |
||
380 | uint64_t Count = 0; |
||
381 | auto I = CallTargets.find(F); |
||
382 | if (I != CallTargets.end()) { |
||
383 | Count = I->second; |
||
384 | CallTargets.erase(I); |
||
385 | } |
||
386 | return Count; |
||
387 | } |
||
388 | |||
389 | /// Return true if this sample record contains function calls. |
||
390 | bool hasCalls() const { return !CallTargets.empty(); } |
||
391 | |||
392 | uint64_t getSamples() const { return NumSamples; } |
||
393 | const CallTargetMap &getCallTargets() const { return CallTargets; } |
||
394 | const SortedCallTargetSet getSortedCallTargets() const { |
||
395 | return SortCallTargets(CallTargets); |
||
396 | } |
||
397 | |||
398 | uint64_t getCallTargetSum() const { |
||
399 | uint64_t Sum = 0; |
||
400 | for (const auto &I : CallTargets) |
||
401 | Sum += I.second; |
||
402 | return Sum; |
||
403 | } |
||
404 | |||
405 | /// Sort call targets in descending order of call frequency. |
||
406 | static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) { |
||
407 | SortedCallTargetSet SortedTargets; |
||
408 | for (const auto &[Target, Frequency] : Targets) { |
||
409 | SortedTargets.emplace(Target, Frequency); |
||
410 | } |
||
411 | return SortedTargets; |
||
412 | } |
||
413 | |||
414 | /// Prorate call targets by a distribution factor. |
||
415 | static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, |
||
416 | float DistributionFactor) { |
||
417 | CallTargetMap AdjustedTargets; |
||
418 | for (const auto &[Target, Frequency] : Targets) { |
||
419 | AdjustedTargets[Target] = Frequency * DistributionFactor; |
||
420 | } |
||
421 | return AdjustedTargets; |
||
422 | } |
||
423 | |||
424 | /// Merge the samples in \p Other into this record. |
||
425 | /// Optionally scale sample counts by \p Weight. |
||
426 | sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1); |
||
427 | void print(raw_ostream &OS, unsigned Indent) const; |
||
428 | void dump() const; |
||
429 | |||
430 | private: |
||
431 | uint64_t NumSamples = 0; |
||
432 | CallTargetMap CallTargets; |
||
433 | }; |
||
434 | |||
435 | raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample); |
||
436 | |||
437 | // State of context associated with FunctionSamples |
||
438 | enum ContextStateMask { |
||
439 | UnknownContext = 0x0, // Profile without context |
||
440 | RawContext = 0x1, // Full context profile from input profile |
||
441 | SyntheticContext = 0x2, // Synthetic context created for context promotion |
||
442 | InlinedContext = 0x4, // Profile for context that is inlined into caller |
||
443 | MergedContext = 0x8 // Profile for context merged into base profile |
||
444 | }; |
||
445 | |||
446 | // Attribute of context associated with FunctionSamples |
||
447 | enum ContextAttributeMask { |
||
448 | ContextNone = 0x0, |
||
449 | ContextWasInlined = 0x1, // Leaf of context was inlined in previous build |
||
450 | ContextShouldBeInlined = 0x2, // Leaf of context should be inlined |
||
451 | ContextDuplicatedIntoBase = |
||
452 | 0x4, // Leaf of context is duplicated into the base profile |
||
453 | }; |
||
454 | |||
455 | // Represents a context frame with function name and line location |
||
456 | struct SampleContextFrame { |
||
457 | StringRef FuncName; |
||
458 | LineLocation Location; |
||
459 | |||
460 | SampleContextFrame() : Location(0, 0) {} |
||
461 | |||
462 | SampleContextFrame(StringRef FuncName, LineLocation Location) |
||
463 | : FuncName(FuncName), Location(Location) {} |
||
464 | |||
465 | bool operator==(const SampleContextFrame &That) const { |
||
466 | return Location == That.Location && FuncName == That.FuncName; |
||
467 | } |
||
468 | |||
469 | bool operator!=(const SampleContextFrame &That) const { |
||
470 | return !(*this == That); |
||
471 | } |
||
472 | |||
473 | std::string toString(bool OutputLineLocation) const { |
||
474 | std::ostringstream OContextStr; |
||
475 | OContextStr << FuncName.str(); |
||
476 | if (OutputLineLocation) { |
||
477 | OContextStr << ":" << Location.LineOffset; |
||
478 | if (Location.Discriminator) |
||
479 | OContextStr << "." << Location.Discriminator; |
||
480 | } |
||
481 | return OContextStr.str(); |
||
482 | } |
||
483 | }; |
||
484 | |||
485 | static inline hash_code hash_value(const SampleContextFrame &arg) { |
||
486 | return hash_combine(arg.FuncName, arg.Location.LineOffset, |
||
487 | arg.Location.Discriminator); |
||
488 | } |
||
489 | |||
490 | using SampleContextFrameVector = SmallVector<SampleContextFrame, 1>; |
||
491 | using SampleContextFrames = ArrayRef<SampleContextFrame>; |
||
492 | |||
493 | struct SampleContextFrameHash { |
||
494 | uint64_t operator()(const SampleContextFrameVector &S) const { |
||
495 | return hash_combine_range(S.begin(), S.end()); |
||
496 | } |
||
497 | }; |
||
498 | |||
499 | // Sample context for FunctionSamples. It consists of the calling context, |
||
500 | // the function name and context state. Internally sample context is represented |
||
501 | // using ArrayRef, which is also the input for constructing a `SampleContext`. |
||
502 | // It can accept and represent both full context string as well as context-less |
||
503 | // function name. |
||
504 | // For a CS profile, a full context vector can look like: |
||
505 | // `main:3 _Z5funcAi:1 _Z8funcLeafi` |
||
506 | // For a base CS profile without calling context, the context vector should only |
||
507 | // contain the leaf frame name. |
||
508 | // For a non-CS profile, the context vector should be empty. |
||
509 | class SampleContext { |
||
510 | public: |
||
511 | SampleContext() : State(UnknownContext), Attributes(ContextNone) {} |
||
512 | |||
513 | SampleContext(StringRef Name) |
||
514 | : Name(Name), State(UnknownContext), Attributes(ContextNone) {} |
||
515 | |||
516 | SampleContext(SampleContextFrames Context, |
||
517 | ContextStateMask CState = RawContext) |
||
518 | : Attributes(ContextNone) { |
||
519 | assert(!Context.empty() && "Context is empty"); |
||
520 | setContext(Context, CState); |
||
521 | } |
||
522 | |||
523 | // Give a context string, decode and populate internal states like |
||
524 | // Function name, Calling context and context state. Example of input |
||
525 | // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` |
||
526 | SampleContext(StringRef ContextStr, |
||
527 | std::list<SampleContextFrameVector> &CSNameTable, |
||
528 | ContextStateMask CState = RawContext) |
||
529 | : Attributes(ContextNone) { |
||
530 | assert(!ContextStr.empty()); |
||
531 | // Note that `[]` wrapped input indicates a full context string, otherwise |
||
532 | // it's treated as context-less function name only. |
||
533 | bool HasContext = ContextStr.startswith("["); |
||
534 | if (!HasContext) { |
||
535 | State = UnknownContext; |
||
536 | Name = ContextStr; |
||
537 | } else { |
||
538 | CSNameTable.emplace_back(); |
||
539 | SampleContextFrameVector &Context = CSNameTable.back(); |
||
540 | createCtxVectorFromStr(ContextStr, Context); |
||
541 | setContext(Context, CState); |
||
542 | } |
||
543 | } |
||
544 | |||
545 | /// Create a context vector from a given context string and save it in |
||
546 | /// `Context`. |
||
547 | static void createCtxVectorFromStr(StringRef ContextStr, |
||
548 | SampleContextFrameVector &Context) { |
||
549 | // Remove encapsulating '[' and ']' if any |
||
550 | ContextStr = ContextStr.substr(1, ContextStr.size() - 2); |
||
551 | StringRef ContextRemain = ContextStr; |
||
552 | StringRef ChildContext; |
||
553 | StringRef CalleeName; |
||
554 | while (!ContextRemain.empty()) { |
||
555 | auto ContextSplit = ContextRemain.split(" @ "); |
||
556 | ChildContext = ContextSplit.first; |
||
557 | ContextRemain = ContextSplit.second; |
||
558 | LineLocation CallSiteLoc(0, 0); |
||
559 | decodeContextString(ChildContext, CalleeName, CallSiteLoc); |
||
560 | Context.emplace_back(CalleeName, CallSiteLoc); |
||
561 | } |
||
562 | } |
||
563 | |||
564 | // Decode context string for a frame to get function name and location. |
||
565 | // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`. |
||
566 | static void decodeContextString(StringRef ContextStr, StringRef &FName, |
||
567 | LineLocation &LineLoc) { |
||
568 | // Get function name |
||
569 | auto EntrySplit = ContextStr.split(':'); |
||
570 | FName = EntrySplit.first; |
||
571 | |||
572 | LineLoc = {0, 0}; |
||
573 | if (!EntrySplit.second.empty()) { |
||
574 | // Get line offset, use signed int for getAsInteger so string will |
||
575 | // be parsed as signed. |
||
576 | int LineOffset = 0; |
||
577 | auto LocSplit = EntrySplit.second.split('.'); |
||
578 | LocSplit.first.getAsInteger(10, LineOffset); |
||
579 | LineLoc.LineOffset = LineOffset; |
||
580 | |||
581 | // Get discriminator |
||
582 | if (!LocSplit.second.empty()) |
||
583 | LocSplit.second.getAsInteger(10, LineLoc.Discriminator); |
||
584 | } |
||
585 | } |
||
586 | |||
587 | operator SampleContextFrames() const { return FullContext; } |
||
588 | bool hasAttribute(ContextAttributeMask A) { return Attributes & (uint32_t)A; } |
||
589 | void setAttribute(ContextAttributeMask A) { Attributes |= (uint32_t)A; } |
||
590 | uint32_t getAllAttributes() { return Attributes; } |
||
591 | void setAllAttributes(uint32_t A) { Attributes = A; } |
||
592 | bool hasState(ContextStateMask S) { return State & (uint32_t)S; } |
||
593 | void setState(ContextStateMask S) { State |= (uint32_t)S; } |
||
594 | void clearState(ContextStateMask S) { State &= (uint32_t)~S; } |
||
595 | bool hasContext() const { return State != UnknownContext; } |
||
596 | bool isBaseContext() const { return FullContext.size() == 1; } |
||
597 | StringRef getName() const { return Name; } |
||
598 | SampleContextFrames getContextFrames() const { return FullContext; } |
||
599 | |||
600 | static std::string getContextString(SampleContextFrames Context, |
||
601 | bool IncludeLeafLineLocation = false) { |
||
602 | std::ostringstream OContextStr; |
||
603 | for (uint32_t I = 0; I < Context.size(); I++) { |
||
604 | if (OContextStr.str().size()) { |
||
605 | OContextStr << " @ "; |
||
606 | } |
||
607 | OContextStr << Context[I].toString(I != Context.size() - 1 || |
||
608 | IncludeLeafLineLocation); |
||
609 | } |
||
610 | return OContextStr.str(); |
||
611 | } |
||
612 | |||
613 | std::string toString() const { |
||
614 | if (!hasContext()) |
||
615 | return Name.str(); |
||
616 | return getContextString(FullContext, false); |
||
617 | } |
||
618 | |||
619 | uint64_t getHashCode() const { |
||
620 | return hasContext() ? hash_value(getContextFrames()) |
||
621 | : hash_value(getName()); |
||
622 | } |
||
623 | |||
624 | /// Set the name of the function and clear the current context. |
||
625 | void setName(StringRef FunctionName) { |
||
626 | Name = FunctionName; |
||
627 | FullContext = SampleContextFrames(); |
||
628 | State = UnknownContext; |
||
629 | } |
||
630 | |||
631 | void setContext(SampleContextFrames Context, |
||
632 | ContextStateMask CState = RawContext) { |
||
633 | assert(CState != UnknownContext); |
||
634 | FullContext = Context; |
||
635 | Name = Context.back().FuncName; |
||
636 | State = CState; |
||
637 | } |
||
638 | |||
639 | bool operator==(const SampleContext &That) const { |
||
640 | return State == That.State && Name == That.Name && |
||
641 | FullContext == That.FullContext; |
||
642 | } |
||
643 | |||
644 | bool operator!=(const SampleContext &That) const { return !(*this == That); } |
||
645 | |||
646 | bool operator<(const SampleContext &That) const { |
||
647 | if (State != That.State) |
||
648 | return State < That.State; |
||
649 | |||
650 | if (!hasContext()) { |
||
651 | return Name < That.Name; |
||
652 | } |
||
653 | |||
654 | uint64_t I = 0; |
||
655 | while (I < std::min(FullContext.size(), That.FullContext.size())) { |
||
656 | auto &Context1 = FullContext[I]; |
||
657 | auto &Context2 = That.FullContext[I]; |
||
658 | auto V = Context1.FuncName.compare(Context2.FuncName); |
||
659 | if (V) |
||
660 | return V < 0; |
||
661 | if (Context1.Location != Context2.Location) |
||
662 | return Context1.Location < Context2.Location; |
||
663 | I++; |
||
664 | } |
||
665 | |||
666 | return FullContext.size() < That.FullContext.size(); |
||
667 | } |
||
668 | |||
669 | struct Hash { |
||
670 | uint64_t operator()(const SampleContext &Context) const { |
||
671 | return Context.getHashCode(); |
||
672 | } |
||
673 | }; |
||
674 | |||
675 | bool IsPrefixOf(const SampleContext &That) const { |
||
676 | auto ThisContext = FullContext; |
||
677 | auto ThatContext = That.FullContext; |
||
678 | if (ThatContext.size() < ThisContext.size()) |
||
679 | return false; |
||
680 | ThatContext = ThatContext.take_front(ThisContext.size()); |
||
681 | // Compare Leaf frame first |
||
682 | if (ThisContext.back().FuncName != ThatContext.back().FuncName) |
||
683 | return false; |
||
684 | // Compare leading context |
||
685 | return ThisContext.drop_back() == ThatContext.drop_back(); |
||
686 | } |
||
687 | |||
688 | private: |
||
689 | /// Mangled name of the function. |
||
690 | StringRef Name; |
||
691 | // Full context including calling context and leaf function name |
||
692 | SampleContextFrames FullContext; |
||
693 | // State of the associated sample profile |
||
694 | uint32_t State; |
||
695 | // Attribute of the associated sample profile |
||
696 | uint32_t Attributes; |
||
697 | }; |
||
698 | |||
699 | static inline hash_code hash_value(const SampleContext &arg) { |
||
700 | return arg.hasContext() ? hash_value(arg.getContextFrames()) |
||
701 | : hash_value(arg.getName()); |
||
702 | } |
||
703 | |||
704 | class FunctionSamples; |
||
705 | class SampleProfileReaderItaniumRemapper; |
||
706 | |||
707 | using BodySampleMap = std::map<LineLocation, SampleRecord>; |
||
708 | // NOTE: Using a StringMap here makes parsed profiles consume around 17% more |
||
709 | // memory, which is *very* significant for large profiles. |
||
710 | using FunctionSamplesMap = std::map<std::string, FunctionSamples, std::less<>>; |
||
711 | using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>; |
||
712 | |||
713 | /// Representation of the samples collected for a function. |
||
714 | /// |
||
715 | /// This data structure contains all the collected samples for the body |
||
716 | /// of a function. Each sample corresponds to a LineLocation instance |
||
717 | /// within the body of the function. |
||
718 | class FunctionSamples { |
||
719 | public: |
||
720 | FunctionSamples() = default; |
||
721 | |||
722 | void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const; |
||
723 | void dump() const; |
||
724 | |||
725 | sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) { |
||
726 | bool Overflowed; |
||
727 | TotalSamples = |
||
728 | SaturatingMultiplyAdd(Num, Weight, TotalSamples, &Overflowed); |
||
729 | return Overflowed ? sampleprof_error::counter_overflow |
||
730 | : sampleprof_error::success; |
||
731 | } |
||
732 | |||
733 | void removeTotalSamples(uint64_t Num) { |
||
734 | if (TotalSamples < Num) |
||
735 | TotalSamples = 0; |
||
736 | else |
||
737 | TotalSamples -= Num; |
||
738 | } |
||
739 | |||
740 | void setTotalSamples(uint64_t Num) { TotalSamples = Num; } |
||
741 | |||
742 | sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) { |
||
743 | bool Overflowed; |
||
744 | TotalHeadSamples = |
||
745 | SaturatingMultiplyAdd(Num, Weight, TotalHeadSamples, &Overflowed); |
||
746 | return Overflowed ? sampleprof_error::counter_overflow |
||
747 | : sampleprof_error::success; |
||
748 | } |
||
749 | |||
750 | sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator, |
||
751 | uint64_t Num, uint64_t Weight = 1) { |
||
752 | return BodySamples[LineLocation(LineOffset, Discriminator)].addSamples( |
||
753 | Num, Weight); |
||
754 | } |
||
755 | |||
756 | sampleprof_error addCalledTargetSamples(uint32_t LineOffset, |
||
757 | uint32_t Discriminator, |
||
758 | StringRef FName, uint64_t Num, |
||
759 | uint64_t Weight = 1) { |
||
760 | return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget( |
||
761 | FName, Num, Weight); |
||
762 | } |
||
763 | |||
764 | // Remove a call target and decrease the body sample correspondingly. Return |
||
765 | // the number of body samples actually decreased. |
||
766 | uint64_t removeCalledTargetAndBodySample(uint32_t LineOffset, |
||
767 | uint32_t Discriminator, |
||
768 | StringRef FName) { |
||
769 | uint64_t Count = 0; |
||
770 | auto I = BodySamples.find(LineLocation(LineOffset, Discriminator)); |
||
771 | if (I != BodySamples.end()) { |
||
772 | Count = I->second.removeCalledTarget(FName); |
||
773 | Count = I->second.removeSamples(Count); |
||
774 | if (!I->second.getSamples()) |
||
775 | BodySamples.erase(I); |
||
776 | } |
||
777 | return Count; |
||
778 | } |
||
779 | |||
780 | sampleprof_error addBodySamplesForProbe(uint32_t Index, uint64_t Num, |
||
781 | uint64_t Weight = 1) { |
||
782 | SampleRecord S; |
||
783 | S.addSamples(Num, Weight); |
||
784 | return BodySamples[LineLocation(Index, 0)].merge(S, Weight); |
||
785 | } |
||
786 | |||
787 | // Accumulate all call target samples to update the body samples. |
||
788 | void updateCallsiteSamples() { |
||
789 | for (auto &I : BodySamples) { |
||
790 | uint64_t TargetSamples = I.second.getCallTargetSum(); |
||
791 | // It's possible that the body sample count can be greater than the call |
||
792 | // target sum. E.g, if some call targets are external targets, they won't |
||
793 | // be considered valid call targets, but the body sample count which is |
||
794 | // from lbr ranges can actually include them. |
||
795 | if (TargetSamples > I.second.getSamples()) |
||
796 | I.second.addSamples(TargetSamples - I.second.getSamples()); |
||
797 | } |
||
798 | } |
||
799 | |||
800 | // Accumulate all body samples to set total samples. |
||
801 | void updateTotalSamples() { |
||
802 | setTotalSamples(0); |
||
803 | for (const auto &I : BodySamples) |
||
804 | addTotalSamples(I.second.getSamples()); |
||
805 | |||
806 | for (auto &I : CallsiteSamples) { |
||
807 | for (auto &CS : I.second) { |
||
808 | CS.second.updateTotalSamples(); |
||
809 | addTotalSamples(CS.second.getTotalSamples()); |
||
810 | } |
||
811 | } |
||
812 | } |
||
813 | |||
814 | // Set current context and all callee contexts to be synthetic. |
||
815 | void SetContextSynthetic() { |
||
816 | Context.setState(SyntheticContext); |
||
817 | for (auto &I : CallsiteSamples) { |
||
818 | for (auto &CS : I.second) { |
||
819 | CS.second.SetContextSynthetic(); |
||
820 | } |
||
821 | } |
||
822 | } |
||
823 | |||
824 | /// Return the number of samples collected at the given location. |
||
825 | /// Each location is specified by \p LineOffset and \p Discriminator. |
||
826 | /// If the location is not found in profile, return error. |
||
827 | ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset, |
||
828 | uint32_t Discriminator) const { |
||
829 | const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator)); |
||
830 | if (ret == BodySamples.end()) |
||
831 | return std::error_code(); |
||
832 | return ret->second.getSamples(); |
||
833 | } |
||
834 | |||
835 | /// Returns the call target map collected at a given location. |
||
836 | /// Each location is specified by \p LineOffset and \p Discriminator. |
||
837 | /// If the location is not found in profile, return error. |
||
838 | ErrorOr<SampleRecord::CallTargetMap> |
||
839 | findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const { |
||
840 | const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator)); |
||
841 | if (ret == BodySamples.end()) |
||
842 | return std::error_code(); |
||
843 | return ret->second.getCallTargets(); |
||
844 | } |
||
845 | |||
846 | /// Returns the call target map collected at a given location specified by \p |
||
847 | /// CallSite. If the location is not found in profile, return error. |
||
848 | ErrorOr<SampleRecord::CallTargetMap> |
||
849 | findCallTargetMapAt(const LineLocation &CallSite) const { |
||
850 | const auto &Ret = BodySamples.find(CallSite); |
||
851 | if (Ret == BodySamples.end()) |
||
852 | return std::error_code(); |
||
853 | return Ret->second.getCallTargets(); |
||
854 | } |
||
855 | |||
856 | /// Return the function samples at the given callsite location. |
||
857 | FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) { |
||
858 | return CallsiteSamples[Loc]; |
||
859 | } |
||
860 | |||
861 | /// Returns the FunctionSamplesMap at the given \p Loc. |
||
862 | const FunctionSamplesMap * |
||
863 | findFunctionSamplesMapAt(const LineLocation &Loc) const { |
||
864 | auto iter = CallsiteSamples.find(Loc); |
||
865 | if (iter == CallsiteSamples.end()) |
||
866 | return nullptr; |
||
867 | return &iter->second; |
||
868 | } |
||
869 | |||
870 | /// Returns a pointer to FunctionSamples at the given callsite location |
||
871 | /// \p Loc with callee \p CalleeName. If no callsite can be found, relax |
||
872 | /// the restriction to return the FunctionSamples at callsite location |
||
873 | /// \p Loc with the maximum total sample count. If \p Remapper is not |
||
874 | /// nullptr, use \p Remapper to find FunctionSamples with equivalent name |
||
875 | /// as \p CalleeName. |
||
876 | const FunctionSamples * |
||
877 | findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName, |
||
878 | SampleProfileReaderItaniumRemapper *Remapper) const; |
||
879 | |||
880 | bool empty() const { return TotalSamples == 0; } |
||
881 | |||
882 | /// Return the total number of samples collected inside the function. |
||
883 | uint64_t getTotalSamples() const { return TotalSamples; } |
||
884 | |||
885 | /// For top-level functions, return the total number of branch samples that |
||
886 | /// have the function as the branch target (or 0 otherwise). This is the raw |
||
887 | /// data fetched from the profile. This should be equivalent to the sample of |
||
888 | /// the first instruction of the symbol. But as we directly get this info for |
||
889 | /// raw profile without referring to potentially inaccurate debug info, this |
||
890 | /// gives more accurate profile data and is preferred for standalone symbols. |
||
891 | uint64_t getHeadSamples() const { return TotalHeadSamples; } |
||
892 | |||
893 | /// Return an estimate of the sample count of the function entry basic block. |
||
894 | /// The function can be either a standalone symbol or an inlined function. |
||
895 | /// For Context-Sensitive profiles, this will prefer returning the head |
||
896 | /// samples (i.e. getHeadSamples()), if non-zero. Otherwise it estimates from |
||
897 | /// the function body's samples or callsite samples. |
||
898 | uint64_t getHeadSamplesEstimate() const { |
||
899 | if (FunctionSamples::ProfileIsCS && getHeadSamples()) { |
||
900 | // For CS profile, if we already have more accurate head samples |
||
901 | // counted by branch sample from caller, use them as entry samples. |
||
902 | return getHeadSamples(); |
||
903 | } |
||
904 | uint64_t Count = 0; |
||
905 | // Use either BodySamples or CallsiteSamples which ever has the smaller |
||
906 | // lineno. |
||
907 | if (!BodySamples.empty() && |
||
908 | (CallsiteSamples.empty() || |
||
909 | BodySamples.begin()->first < CallsiteSamples.begin()->first)) |
||
910 | Count = BodySamples.begin()->second.getSamples(); |
||
911 | else if (!CallsiteSamples.empty()) { |
||
912 | // An indirect callsite may be promoted to several inlined direct calls. |
||
913 | // We need to get the sum of them. |
||
914 | for (const auto &N_FS : CallsiteSamples.begin()->second) |
||
915 | Count += N_FS.second.getHeadSamplesEstimate(); |
||
916 | } |
||
917 | // Return at least 1 if total sample is not 0. |
||
918 | return Count ? Count : TotalSamples > 0; |
||
919 | } |
||
920 | |||
921 | /// Return all the samples collected in the body of the function. |
||
922 | const BodySampleMap &getBodySamples() const { return BodySamples; } |
||
923 | |||
924 | /// Return all the callsite samples collected in the body of the function. |
||
925 | const CallsiteSampleMap &getCallsiteSamples() const { |
||
926 | return CallsiteSamples; |
||
927 | } |
||
928 | |||
929 | /// Return the maximum of sample counts in a function body. When SkipCallSite |
||
930 | /// is false, which is the default, the return count includes samples in the |
||
931 | /// inlined functions. When SkipCallSite is true, the return count only |
||
932 | /// considers the body samples. |
||
933 | uint64_t getMaxCountInside(bool SkipCallSite = false) const { |
||
934 | uint64_t MaxCount = 0; |
||
935 | for (const auto &L : getBodySamples()) |
||
936 | MaxCount = std::max(MaxCount, L.second.getSamples()); |
||
937 | if (SkipCallSite) |
||
938 | return MaxCount; |
||
939 | for (const auto &C : getCallsiteSamples()) |
||
940 | for (const FunctionSamplesMap::value_type &F : C.second) |
||
941 | MaxCount = std::max(MaxCount, F.second.getMaxCountInside()); |
||
942 | return MaxCount; |
||
943 | } |
||
944 | |||
945 | /// Merge the samples in \p Other into this one. |
||
946 | /// Optionally scale samples by \p Weight. |
||
947 | sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) { |
||
948 | sampleprof_error Result = sampleprof_error::success; |
||
949 | if (!GUIDToFuncNameMap) |
||
950 | GUIDToFuncNameMap = Other.GUIDToFuncNameMap; |
||
951 | if (Context.getName().empty()) |
||
952 | Context = Other.getContext(); |
||
953 | if (FunctionHash == 0) { |
||
954 | // Set the function hash code for the target profile. |
||
955 | FunctionHash = Other.getFunctionHash(); |
||
956 | } else if (FunctionHash != Other.getFunctionHash()) { |
||
957 | // The two profiles coming with different valid hash codes indicates |
||
958 | // either: |
||
959 | // 1. They are same-named static functions from different compilation |
||
960 | // units (without using -unique-internal-linkage-names), or |
||
961 | // 2. They are really the same function but from different compilations. |
||
962 | // Let's bail out in either case for now, which means one profile is |
||
963 | // dropped. |
||
964 | return sampleprof_error::hash_mismatch; |
||
965 | } |
||
966 | |||
967 | MergeResult(Result, addTotalSamples(Other.getTotalSamples(), Weight)); |
||
968 | MergeResult(Result, addHeadSamples(Other.getHeadSamples(), Weight)); |
||
969 | for (const auto &I : Other.getBodySamples()) { |
||
970 | const LineLocation &Loc = I.first; |
||
971 | const SampleRecord &Rec = I.second; |
||
972 | MergeResult(Result, BodySamples[Loc].merge(Rec, Weight)); |
||
973 | } |
||
974 | for (const auto &I : Other.getCallsiteSamples()) { |
||
975 | const LineLocation &Loc = I.first; |
||
976 | FunctionSamplesMap &FSMap = functionSamplesAt(Loc); |
||
977 | for (const auto &Rec : I.second) |
||
978 | MergeResult(Result, FSMap[Rec.first].merge(Rec.second, Weight)); |
||
979 | } |
||
980 | return Result; |
||
981 | } |
||
982 | |||
983 | /// Recursively traverses all children, if the total sample count of the |
||
984 | /// corresponding function is no less than \p Threshold, add its corresponding |
||
985 | /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID |
||
986 | /// to \p S. |
||
987 | void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S, |
||
988 | const StringMap<Function *> &SymbolMap, |
||
989 | uint64_t Threshold) const { |
||
990 | if (TotalSamples <= Threshold) |
||
991 | return; |
||
992 | auto isDeclaration = [](const Function *F) { |
||
993 | return !F || F->isDeclaration(); |
||
994 | }; |
||
995 | if (isDeclaration(SymbolMap.lookup(getFuncName()))) { |
||
996 | // Add to the import list only when it's defined out of module. |
||
997 | S.insert(getGUID(getName())); |
||
998 | } |
||
999 | // Import hot CallTargets, which may not be available in IR because full |
||
1000 | // profile annotation cannot be done until backend compilation in ThinLTO. |
||
1001 | for (const auto &BS : BodySamples) |
||
1002 | for (const auto &TS : BS.second.getCallTargets()) |
||
1003 | if (TS.getValue() > Threshold) { |
||
1004 | const Function *Callee = SymbolMap.lookup(getFuncName(TS.getKey())); |
||
1005 | if (isDeclaration(Callee)) |
||
1006 | S.insert(getGUID(TS.getKey())); |
||
1007 | } |
||
1008 | for (const auto &CS : CallsiteSamples) |
||
1009 | for (const auto &NameFS : CS.second) |
||
1010 | NameFS.second.findInlinedFunctions(S, SymbolMap, Threshold); |
||
1011 | } |
||
1012 | |||
1013 | /// Set the name of the function. |
||
1014 | void setName(StringRef FunctionName) { Context.setName(FunctionName); } |
||
1015 | |||
1016 | /// Return the function name. |
||
1017 | StringRef getName() const { return Context.getName(); } |
||
1018 | |||
1019 | /// Return the original function name. |
||
1020 | StringRef getFuncName() const { return getFuncName(getName()); } |
||
1021 | |||
1022 | void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; } |
||
1023 | |||
1024 | uint64_t getFunctionHash() const { return FunctionHash; } |
||
1025 | |||
1026 | /// Return the canonical name for a function, taking into account |
||
1027 | /// suffix elision policy attributes. |
||
1028 | static StringRef getCanonicalFnName(const Function &F) { |
||
1029 | auto AttrName = "sample-profile-suffix-elision-policy"; |
||
1030 | auto Attr = F.getFnAttribute(AttrName).getValueAsString(); |
||
1031 | return getCanonicalFnName(F.getName(), Attr); |
||
1032 | } |
||
1033 | |||
1034 | /// Name suffixes which canonicalization should handle to avoid |
||
1035 | /// profile mismatch. |
||
1036 | static constexpr const char *LLVMSuffix = ".llvm."; |
||
1037 | static constexpr const char *PartSuffix = ".part."; |
||
1038 | static constexpr const char *UniqSuffix = ".__uniq."; |
||
1039 | |||
1040 | static StringRef getCanonicalFnName(StringRef FnName, |
||
1041 | StringRef Attr = "selected") { |
||
1042 | // Note the sequence of the suffixes in the knownSuffixes array matters. |
||
1043 | // If suffix "A" is appended after the suffix "B", "A" should be in front |
||
1044 | // of "B" in knownSuffixes. |
||
1045 | const char *knownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix}; |
||
1046 | if (Attr == "" || Attr == "all") { |
||
1047 | return FnName.split('.').first; |
||
1048 | } else if (Attr == "selected") { |
||
1049 | StringRef Cand(FnName); |
||
1050 | for (const auto &Suf : knownSuffixes) { |
||
1051 | StringRef Suffix(Suf); |
||
1052 | // If the profile contains ".__uniq." suffix, don't strip the |
||
1053 | // suffix for names in the IR. |
||
1054 | if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix) |
||
1055 | continue; |
||
1056 | auto It = Cand.rfind(Suffix); |
||
1057 | if (It == StringRef::npos) |
||
1058 | continue; |
||
1059 | auto Dit = Cand.rfind('.'); |
||
1060 | if (Dit == It + Suffix.size() - 1) |
||
1061 | Cand = Cand.substr(0, It); |
||
1062 | } |
||
1063 | return Cand; |
||
1064 | } else if (Attr == "none") { |
||
1065 | return FnName; |
||
1066 | } else { |
||
1067 | assert(false && "internal error: unknown suffix elision policy"); |
||
1068 | } |
||
1069 | return FnName; |
||
1070 | } |
||
1071 | |||
1072 | /// Translate \p Name into its original name. |
||
1073 | /// When profile doesn't use MD5, \p Name needs no translation. |
||
1074 | /// When profile uses MD5, \p Name in current FunctionSamples |
||
1075 | /// is actually GUID of the original function name. getFuncName will |
||
1076 | /// translate \p Name in current FunctionSamples into its original name |
||
1077 | /// by looking up in the function map GUIDToFuncNameMap. |
||
1078 | /// If the original name doesn't exist in the map, return empty StringRef. |
||
1079 | StringRef getFuncName(StringRef Name) const { |
||
1080 | if (!UseMD5) |
||
1081 | return Name; |
||
1082 | |||
1083 | assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be populated first"); |
||
1084 | return GUIDToFuncNameMap->lookup(std::stoull(Name.data())); |
||
1085 | } |
||
1086 | |||
1087 | /// Returns the line offset to the start line of the subprogram. |
||
1088 | /// We assume that a single function will not exceed 65535 LOC. |
||
1089 | static unsigned getOffset(const DILocation *DIL); |
||
1090 | |||
1091 | /// Returns a unique call site identifier for a given debug location of a call |
||
1092 | /// instruction. This is wrapper of two scenarios, the probe-based profile and |
||
1093 | /// regular profile, to hide implementation details from the sample loader and |
||
1094 | /// the context tracker. |
||
1095 | static LineLocation getCallSiteIdentifier(const DILocation *DIL, |
||
1096 | bool ProfileIsFS = false); |
||
1097 | |||
1098 | /// Returns a unique hash code for a combination of a callsite location and |
||
1099 | /// the callee function name. |
||
1100 | static uint64_t getCallSiteHash(StringRef CalleeName, |
||
1101 | const LineLocation &Callsite); |
||
1102 | |||
1103 | /// Get the FunctionSamples of the inline instance where DIL originates |
||
1104 | /// from. |
||
1105 | /// |
||
1106 | /// The FunctionSamples of the instruction (Machine or IR) associated to |
||
1107 | /// \p DIL is the inlined instance in which that instruction is coming from. |
||
1108 | /// We traverse the inline stack of that instruction, and match it with the |
||
1109 | /// tree nodes in the profile. |
||
1110 | /// |
||
1111 | /// \returns the FunctionSamples pointer to the inlined instance. |
||
1112 | /// If \p Remapper is not nullptr, it will be used to find matching |
||
1113 | /// FunctionSamples with not exactly the same but equivalent name. |
||
1114 | const FunctionSamples *findFunctionSamples( |
||
1115 | const DILocation *DIL, |
||
1116 | SampleProfileReaderItaniumRemapper *Remapper = nullptr) const; |
||
1117 | |||
1118 | static bool ProfileIsProbeBased; |
||
1119 | |||
1120 | static bool ProfileIsCS; |
||
1121 | |||
1122 | static bool ProfileIsPreInlined; |
||
1123 | |||
1124 | SampleContext &getContext() const { return Context; } |
||
1125 | |||
1126 | void setContext(const SampleContext &FContext) { Context = FContext; } |
||
1127 | |||
1128 | /// Whether the profile uses MD5 to represent string. |
||
1129 | static bool UseMD5; |
||
1130 | |||
1131 | /// Whether the profile contains any ".__uniq." suffix in a name. |
||
1132 | static bool HasUniqSuffix; |
||
1133 | |||
1134 | /// If this profile uses flow sensitive discriminators. |
||
1135 | static bool ProfileIsFS; |
||
1136 | |||
1137 | /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for |
||
1138 | /// all the function symbols defined or declared in current module. |
||
1139 | DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap = nullptr; |
||
1140 | |||
1141 | // Assume the input \p Name is a name coming from FunctionSamples itself. |
||
1142 | // If UseMD5 is true, the name is already a GUID and we |
||
1143 | // don't want to return the GUID of GUID. |
||
1144 | static uint64_t getGUID(StringRef Name) { |
||
1145 | return UseMD5 ? std::stoull(Name.data()) : Function::getGUID(Name); |
||
1146 | } |
||
1147 | |||
1148 | // Find all the names in the current FunctionSamples including names in |
||
1149 | // all the inline instances and names of call targets. |
||
1150 | void findAllNames(DenseSet<StringRef> &NameSet) const; |
||
1151 | |||
1152 | private: |
||
1153 | /// CFG hash value for the function. |
||
1154 | uint64_t FunctionHash = 0; |
||
1155 | |||
1156 | /// Calling context for function profile |
||
1157 | mutable SampleContext Context; |
||
1158 | |||
1159 | /// Total number of samples collected inside this function. |
||
1160 | /// |
||
1161 | /// Samples are cumulative, they include all the samples collected |
||
1162 | /// inside this function and all its inlined callees. |
||
1163 | uint64_t TotalSamples = 0; |
||
1164 | |||
1165 | /// Total number of samples collected at the head of the function. |
||
1166 | /// This is an approximation of the number of calls made to this function |
||
1167 | /// at runtime. |
||
1168 | uint64_t TotalHeadSamples = 0; |
||
1169 | |||
1170 | /// Map instruction locations to collected samples. |
||
1171 | /// |
||
1172 | /// Each entry in this map contains the number of samples |
||
1173 | /// collected at the corresponding line offset. All line locations |
||
1174 | /// are an offset from the start of the function. |
||
1175 | BodySampleMap BodySamples; |
||
1176 | |||
1177 | /// Map call sites to collected samples for the called function. |
||
1178 | /// |
||
1179 | /// Each entry in this map corresponds to all the samples |
||
1180 | /// collected for the inlined function call at the given |
||
1181 | /// location. For example, given: |
||
1182 | /// |
||
1183 | /// void foo() { |
||
1184 | /// 1 bar(); |
||
1185 | /// ... |
||
1186 | /// 8 baz(); |
||
1187 | /// } |
||
1188 | /// |
||
1189 | /// If the bar() and baz() calls were inlined inside foo(), this |
||
1190 | /// map will contain two entries. One for all the samples collected |
||
1191 | /// in the call to bar() at line offset 1, the other for all the samples |
||
1192 | /// collected in the call to baz() at line offset 8. |
||
1193 | CallsiteSampleMap CallsiteSamples; |
||
1194 | }; |
||
1195 | |||
1196 | raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS); |
||
1197 | |||
1198 | using SampleProfileMap = |
||
1199 | std::unordered_map<SampleContext, FunctionSamples, SampleContext::Hash>; |
||
1200 | |||
1201 | using NameFunctionSamples = std::pair<SampleContext, const FunctionSamples *>; |
||
1202 | |||
1203 | void sortFuncProfiles(const SampleProfileMap &ProfileMap, |
||
1204 | std::vector<NameFunctionSamples> &SortedProfiles); |
||
1205 | |||
1206 | /// Sort a LocationT->SampleT map by LocationT. |
||
1207 | /// |
||
1208 | /// It produces a sorted list of <LocationT, SampleT> records by ascending |
||
1209 | /// order of LocationT. |
||
1210 | template <class LocationT, class SampleT> class SampleSorter { |
||
1211 | public: |
||
1212 | using SamplesWithLoc = std::pair<const LocationT, SampleT>; |
||
1213 | using SamplesWithLocList = SmallVector<const SamplesWithLoc *, 20>; |
||
1214 | |||
1215 | SampleSorter(const std::map<LocationT, SampleT> &Samples) { |
||
1216 | for (const auto &I : Samples) |
||
1217 | V.push_back(&I); |
||
1218 | llvm::stable_sort(V, [](const SamplesWithLoc *A, const SamplesWithLoc *B) { |
||
1219 | return A->first < B->first; |
||
1220 | }); |
||
1221 | } |
||
1222 | |||
1223 | const SamplesWithLocList &get() const { return V; } |
||
1224 | |||
1225 | private: |
||
1226 | SamplesWithLocList V; |
||
1227 | }; |
||
1228 | |||
1229 | /// SampleContextTrimmer impelements helper functions to trim, merge cold |
||
1230 | /// context profiles. It also supports context profile canonicalization to make |
||
1231 | /// sure ProfileMap's key is consistent with FunctionSample's name/context. |
||
1232 | class SampleContextTrimmer { |
||
1233 | public: |
||
1234 | SampleContextTrimmer(SampleProfileMap &Profiles) : ProfileMap(Profiles){}; |
||
1235 | // Trim and merge cold context profile when requested. TrimBaseProfileOnly |
||
1236 | // should only be effective when TrimColdContext is true. On top of |
||
1237 | // TrimColdContext, TrimBaseProfileOnly can be used to specify to trim all |
||
1238 | // cold profiles or only cold base profiles. Trimming base profiles only is |
||
1239 | // mainly to honor the preinliner decsion. Note that when MergeColdContext is |
||
1240 | // true, preinliner decsion is not honored anyway so TrimBaseProfileOnly will |
||
1241 | // be ignored. |
||
1242 | void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold, |
||
1243 | bool TrimColdContext, |
||
1244 | bool MergeColdContext, |
||
1245 | uint32_t ColdContextFrameLength, |
||
1246 | bool TrimBaseProfileOnly); |
||
1247 | // Canonicalize context profile name and attributes. |
||
1248 | void canonicalizeContextProfiles(); |
||
1249 | |||
1250 | private: |
||
1251 | SampleProfileMap &ProfileMap; |
||
1252 | }; |
||
1253 | |||
1254 | // CSProfileConverter converts a full context-sensitive flat sample profile into |
||
1255 | // a nested context-sensitive sample profile. |
||
1256 | class CSProfileConverter { |
||
1257 | public: |
||
1258 | CSProfileConverter(SampleProfileMap &Profiles); |
||
1259 | void convertProfiles(); |
||
1260 | struct FrameNode { |
||
1261 | FrameNode(StringRef FName = StringRef(), |
||
1262 | FunctionSamples *FSamples = nullptr, |
||
1263 | LineLocation CallLoc = {0, 0}) |
||
1264 | : FuncName(FName), FuncSamples(FSamples), CallSiteLoc(CallLoc){}; |
||
1265 | |||
1266 | // Map line+discriminator location to child frame |
||
1267 | std::map<uint64_t, FrameNode> AllChildFrames; |
||
1268 | // Function name for current frame |
||
1269 | StringRef FuncName; |
||
1270 | // Function Samples for current frame |
||
1271 | FunctionSamples *FuncSamples; |
||
1272 | // Callsite location in parent context |
||
1273 | LineLocation CallSiteLoc; |
||
1274 | |||
1275 | FrameNode *getOrCreateChildFrame(const LineLocation &CallSite, |
||
1276 | StringRef CalleeName); |
||
1277 | }; |
||
1278 | |||
1279 | private: |
||
1280 | // Nest all children profiles into the profile of Node. |
||
1281 | void convertProfiles(FrameNode &Node); |
||
1282 | FrameNode *getOrCreateContextPath(const SampleContext &Context); |
||
1283 | |||
1284 | SampleProfileMap &ProfileMap; |
||
1285 | FrameNode RootFrame; |
||
1286 | }; |
||
1287 | |||
1288 | /// ProfileSymbolList records the list of function symbols shown up |
||
1289 | /// in the binary used to generate the profile. It is useful to |
||
1290 | /// to discriminate a function being so cold as not to shown up |
||
1291 | /// in the profile and a function newly added. |
||
1292 | class ProfileSymbolList { |
||
1293 | public: |
||
1294 | /// copy indicates whether we need to copy the underlying memory |
||
1295 | /// for the input Name. |
||
1296 | void add(StringRef Name, bool copy = false) { |
||
1297 | if (!copy) { |
||
1298 | Syms.insert(Name); |
||
1299 | return; |
||
1300 | } |
||
1301 | Syms.insert(Name.copy(Allocator)); |
||
1302 | } |
||
1303 | |||
1304 | bool contains(StringRef Name) { return Syms.count(Name); } |
||
1305 | |||
1306 | void merge(const ProfileSymbolList &List) { |
||
1307 | for (auto Sym : List.Syms) |
||
1308 | add(Sym, true); |
||
1309 | } |
||
1310 | |||
1311 | unsigned size() { return Syms.size(); } |
||
1312 | |||
1313 | void setToCompress(bool TC) { ToCompress = TC; } |
||
1314 | bool toCompress() { return ToCompress; } |
||
1315 | |||
1316 | std::error_code read(const uint8_t *Data, uint64_t ListSize); |
||
1317 | std::error_code write(raw_ostream &OS); |
||
1318 | void dump(raw_ostream &OS = dbgs()) const; |
||
1319 | |||
1320 | private: |
||
1321 | // Determine whether or not to compress the symbol list when |
||
1322 | // writing it into profile. The variable is unused when the symbol |
||
1323 | // list is read from an existing profile. |
||
1324 | bool ToCompress = false; |
||
1325 | DenseSet<StringRef> Syms; |
||
1326 | BumpPtrAllocator Allocator; |
||
1327 | }; |
||
1328 | |||
1329 | } // end namespace sampleprof |
||
1330 | |||
1331 | using namespace sampleprof; |
||
1332 | // Provide DenseMapInfo for SampleContext. |
||
1333 | template <> struct DenseMapInfo<SampleContext> { |
||
1334 | static inline SampleContext getEmptyKey() { return SampleContext(); } |
||
1335 | |||
1336 | static inline SampleContext getTombstoneKey() { return SampleContext("@"); } |
||
1337 | |||
1338 | static unsigned getHashValue(const SampleContext &Val) { |
||
1339 | return Val.getHashCode(); |
||
1340 | } |
||
1341 | |||
1342 | static bool isEqual(const SampleContext &LHS, const SampleContext &RHS) { |
||
1343 | return LHS == RHS; |
||
1344 | } |
||
1345 | }; |
||
1346 | |||
1347 | // Prepend "__uniq" before the hash for tools like profilers to understand |
||
1348 | // that this symbol is of internal linkage type. The "__uniq" is the |
||
1349 | // pre-determined prefix that is used to tell tools that this symbol was |
||
1350 | // created with -funique-internal-linakge-symbols and the tools can strip or |
||
1351 | // keep the prefix as needed. |
||
1352 | inline std::string getUniqueInternalLinkagePostfix(const StringRef &FName) { |
||
1353 | llvm::MD5 Md5; |
||
1354 | Md5.update(FName); |
||
1355 | llvm::MD5::MD5Result R; |
||
1356 | Md5.final(R); |
||
1357 | SmallString<32> Str; |
||
1358 | llvm::MD5::stringifyResult(R, Str); |
||
1359 | // Convert MD5hash to Decimal. Demangler suffixes can either contain |
||
1360 | // numbers or characters but not both. |
||
1361 | llvm::APInt IntHash(128, Str.str(), 16); |
||
1362 | return toString(IntHash, /* Radix = */ 10, /* Signed = */ false) |
||
1363 | .insert(0, FunctionSamples::UniqSuffix); |
||
1364 | } |
||
1365 | |||
1366 | } // end namespace llvm |
||
1367 | |||
1368 | #endif // LLVM_PROFILEDATA_SAMPLEPROF_H |