Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line | 
|---|---|---|---|
| 14 | pmbaty | 1 | //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===// | 
| 2 | // | ||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | ||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| 6 | // | ||
| 7 | //===----------------------------------------------------------------------===// | ||
| 8 | /// \file | ||
| 9 | /// This pass exposes codegen information to IR-level passes. Every | ||
| 10 | /// transformation that uses codegen information is broken into three parts: | ||
| 11 | /// 1. The IR-level analysis pass. | ||
| 12 | /// 2. The IR-level transformation interface which provides the needed | ||
| 13 | ///    information. | ||
| 14 | /// 3. Codegen-level implementation which uses target-specific hooks. | ||
| 15 | /// | ||
| 16 | /// This file defines #2, which is the interface that IR-level transformations | ||
| 17 | /// use for querying the codegen. | ||
| 18 | /// | ||
| 19 | //===----------------------------------------------------------------------===// | ||
| 20 | |||
| 21 | #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H | ||
| 22 | #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H | ||
| 23 | |||
| 24 | #include "llvm/ADT/SmallBitVector.h" | ||
| 25 | #include "llvm/IR/FMF.h" | ||
| 26 | #include "llvm/IR/InstrTypes.h" | ||
| 27 | #include "llvm/IR/PassManager.h" | ||
| 28 | #include "llvm/Pass.h" | ||
| 29 | #include "llvm/Support/AtomicOrdering.h" | ||
| 30 | #include "llvm/Support/BranchProbability.h" | ||
| 31 | #include "llvm/Support/InstructionCost.h" | ||
| 32 | #include <functional> | ||
| 33 | #include <optional> | ||
| 34 | #include <utility> | ||
| 35 | |||
| 36 | namespace llvm { | ||
| 37 | |||
| 38 | namespace Intrinsic { | ||
| 39 | typedef unsigned ID; | ||
| 40 | } | ||
| 41 | |||
| 42 | class AssumptionCache; | ||
| 43 | class BlockFrequencyInfo; | ||
| 44 | class DominatorTree; | ||
| 45 | class BranchInst; | ||
| 46 | class CallBase; | ||
| 47 | class Function; | ||
| 48 | class GlobalValue; | ||
| 49 | class InstCombiner; | ||
| 50 | class OptimizationRemarkEmitter; | ||
| 51 | class InterleavedAccessInfo; | ||
| 52 | class IntrinsicInst; | ||
| 53 | class LoadInst; | ||
| 54 | class Loop; | ||
| 55 | class LoopInfo; | ||
| 56 | class LoopVectorizationLegality; | ||
| 57 | class ProfileSummaryInfo; | ||
| 58 | class RecurrenceDescriptor; | ||
| 59 | class SCEV; | ||
| 60 | class ScalarEvolution; | ||
| 61 | class StoreInst; | ||
| 62 | class SwitchInst; | ||
| 63 | class TargetLibraryInfo; | ||
| 64 | class Type; | ||
| 65 | class User; | ||
| 66 | class Value; | ||
| 67 | class VPIntrinsic; | ||
| 68 | struct KnownBits; | ||
| 69 | |||
| 70 | /// Information about a load/store intrinsic defined by the target. | ||
| 71 | struct MemIntrinsicInfo { | ||
| 72 |   /// This is the pointer that the intrinsic is loading from or storing to. | ||
| 73 |   /// If this is non-null, then analysis/optimization passes can assume that | ||
| 74 |   /// this intrinsic is functionally equivalent to a load/store from this | ||
| 75 |   /// pointer. | ||
| 76 | Value *PtrVal = nullptr; | ||
| 77 | |||
| 78 |   // Ordering for atomic operations. | ||
| 79 | AtomicOrdering Ordering = AtomicOrdering::NotAtomic; | ||
| 80 | |||
| 81 |   // Same Id is set by the target for corresponding load/store intrinsics. | ||
| 82 | unsigned short MatchingId = 0; | ||
| 83 | |||
| 84 | bool ReadMem = false; | ||
| 85 | bool WriteMem = false; | ||
| 86 | bool IsVolatile = false; | ||
| 87 | |||
| 88 | bool isUnordered() const { | ||
| 89 | return (Ordering == AtomicOrdering::NotAtomic || | ||
| 90 | Ordering == AtomicOrdering::Unordered) && | ||
| 91 | !IsVolatile; | ||
| 92 |   } | ||
| 93 | }; | ||
| 94 | |||
| 95 | /// Attributes of a target dependent hardware loop. | ||
| 96 | struct HardwareLoopInfo { | ||
| 97 | HardwareLoopInfo() = delete; | ||
| 98 | HardwareLoopInfo(Loop *L) : L(L) {} | ||
| 99 | Loop *L = nullptr; | ||
| 100 | BasicBlock *ExitBlock = nullptr; | ||
| 101 | BranchInst *ExitBranch = nullptr; | ||
| 102 | const SCEV *ExitCount = nullptr; | ||
| 103 | IntegerType *CountType = nullptr; | ||
| 104 | Value *LoopDecrement = nullptr; // Decrement the loop counter by this | ||
| 105 |                                   // value in every iteration. | ||
| 106 | bool IsNestingLegal = false; // Can a hardware loop be a parent to | ||
| 107 |                                   // another hardware loop? | ||
| 108 | bool CounterInReg = false; // Should loop counter be updated in | ||
| 109 |                                   // the loop via a phi? | ||
| 110 | bool PerformEntryTest = false; // Generate the intrinsic which also performs | ||
| 111 |                                   // icmp ne zero on the loop counter value and | ||
| 112 |                                   // produces an i1 to guard the loop entry. | ||
| 113 | bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, | ||
| 114 | DominatorTree &DT, bool ForceNestedLoop = false, | ||
| 115 | bool ForceHardwareLoopPHI = false); | ||
| 116 | bool canAnalyze(LoopInfo &LI); | ||
| 117 | }; | ||
| 118 | |||
| 119 | class IntrinsicCostAttributes { | ||
| 120 | const IntrinsicInst *II = nullptr; | ||
| 121 | Type *RetTy = nullptr; | ||
| 122 | Intrinsic::ID IID; | ||
| 123 | SmallVector<Type *, 4> ParamTys; | ||
| 124 | SmallVector<const Value *, 4> Arguments; | ||
| 125 |   FastMathFlags FMF; | ||
| 126 |   // If ScalarizationCost is UINT_MAX, the cost of scalarizing the | ||
| 127 |   // arguments and the return value will be computed based on types. | ||
| 128 | InstructionCost ScalarizationCost = InstructionCost::getInvalid(); | ||
| 129 | |||
| 130 | public: | ||
| 131 |   IntrinsicCostAttributes( | ||
| 132 | Intrinsic::ID Id, const CallBase &CI, | ||
| 133 | InstructionCost ScalarCost = InstructionCost::getInvalid(), | ||
| 134 | bool TypeBasedOnly = false); | ||
| 135 | |||
| 136 |   IntrinsicCostAttributes( | ||
| 137 | Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys, | ||
| 138 | FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr, | ||
| 139 | InstructionCost ScalarCost = InstructionCost::getInvalid()); | ||
| 140 | |||
| 141 | IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, | ||
| 142 | ArrayRef<const Value *> Args); | ||
| 143 | |||
| 144 |   IntrinsicCostAttributes( | ||
| 145 | Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args, | ||
| 146 | ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags(), | ||
| 147 | const IntrinsicInst *I = nullptr, | ||
| 148 | InstructionCost ScalarCost = InstructionCost::getInvalid()); | ||
| 149 | |||
| 150 | Intrinsic::ID getID() const { return IID; } | ||
| 151 | const IntrinsicInst *getInst() const { return II; } | ||
| 152 | Type *getReturnType() const { return RetTy; } | ||
| 153 | FastMathFlags getFlags() const { return FMF; } | ||
| 154 | InstructionCost getScalarizationCost() const { return ScalarizationCost; } | ||
| 155 | const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; } | ||
| 156 | const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; } | ||
| 157 | |||
| 158 | bool isTypeBasedOnly() const { | ||
| 159 | return Arguments.empty(); | ||
| 160 |   } | ||
| 161 | |||
| 162 | bool skipScalarizationCost() const { return ScalarizationCost.isValid(); } | ||
| 163 | }; | ||
| 164 | |||
| 165 | enum class PredicationStyle { None, Data, DataAndControlFlow }; | ||
| 166 | |||
| 167 | class TargetTransformInfo; | ||
| 168 | typedef TargetTransformInfo TTI; | ||
| 169 | |||
| 170 | /// This pass provides access to the codegen interfaces that are needed | ||
| 171 | /// for IR-level transformations. | ||
| 172 | class TargetTransformInfo { | ||
| 173 | public: | ||
| 174 |   /// Construct a TTI object using a type implementing the \c Concept | ||
| 175 |   /// API below. | ||
| 176 |   /// | ||
| 177 |   /// This is used by targets to construct a TTI wrapping their target-specific | ||
| 178 |   /// implementation that encodes appropriate costs for their target. | ||
| 179 | template <typename T> TargetTransformInfo(T Impl); | ||
| 180 | |||
| 181 |   /// Construct a baseline TTI object using a minimal implementation of | ||
| 182 |   /// the \c Concept API below. | ||
| 183 |   /// | ||
| 184 |   /// The TTI implementation will reflect the information in the DataLayout | ||
| 185 |   /// provided if non-null. | ||
| 186 | explicit TargetTransformInfo(const DataLayout &DL); | ||
| 187 | |||
| 188 |   // Provide move semantics. | ||
| 189 | TargetTransformInfo(TargetTransformInfo &&Arg); | ||
| 190 | TargetTransformInfo &operator=(TargetTransformInfo &&RHS); | ||
| 191 | |||
| 192 |   // We need to define the destructor out-of-line to define our sub-classes | ||
| 193 |   // out-of-line. | ||
| 194 | ~TargetTransformInfo(); | ||
| 195 | |||
| 196 |   /// Handle the invalidation of this information. | ||
| 197 |   /// | ||
| 198 |   /// When used as a result of \c TargetIRAnalysis this method will be called | ||
| 199 |   /// when the function this was computed for changes. When it returns false, | ||
| 200 |   /// the information is preserved across those changes. | ||
| 201 | bool invalidate(Function &, const PreservedAnalyses &, | ||
| 202 | FunctionAnalysisManager::Invalidator &) { | ||
| 203 |     // FIXME: We should probably in some way ensure that the subtarget | ||
| 204 |     // information for a function hasn't changed. | ||
| 205 | return false; | ||
| 206 |   } | ||
| 207 | |||
| 208 |   /// \name Generic Target Information | ||
| 209 |   /// @{ | ||
| 210 | |||
| 211 |   /// The kind of cost model. | ||
| 212 |   /// | ||
| 213 |   /// There are several different cost models that can be customized by the | ||
| 214 |   /// target. The normalization of each cost model may be target specific. | ||
| 215 |   /// e.g. TCK_SizeAndLatency should be comparable to target thresholds such as | ||
| 216 |   /// those derived from MCSchedModel::LoopMicroOpBufferSize etc. | ||
| 217 | enum TargetCostKind { | ||
| 218 |     TCK_RecipThroughput, ///< Reciprocal throughput. | ||
| 219 |     TCK_Latency,         ///< The latency of instruction. | ||
| 220 |     TCK_CodeSize,        ///< Instruction code size. | ||
| 221 |     TCK_SizeAndLatency   ///< The weighted sum of size and latency. | ||
| 222 | }; | ||
| 223 | |||
| 224 |   /// Underlying constants for 'cost' values in this interface. | ||
| 225 |   /// | ||
| 226 |   /// Many APIs in this interface return a cost. This enum defines the | ||
| 227 |   /// fundamental values that should be used to interpret (and produce) those | ||
| 228 |   /// costs. The costs are returned as an int rather than a member of this | ||
| 229 |   /// enumeration because it is expected that the cost of one IR instruction | ||
| 230 |   /// may have a multiplicative factor to it or otherwise won't fit directly | ||
| 231 |   /// into the enum. Moreover, it is common to sum or average costs which works | ||
| 232 |   /// better as simple integral values. Thus this enum only provides constants. | ||
| 233 |   /// Also note that the returned costs are signed integers to make it natural | ||
| 234 |   /// to add, subtract, and test with zero (a common boundary condition). It is | ||
| 235 |   /// not expected that 2^32 is a realistic cost to be modeling at any point. | ||
| 236 |   /// | ||
| 237 |   /// Note that these costs should usually reflect the intersection of code-size | ||
| 238 |   /// cost and execution cost. A free instruction is typically one that folds | ||
| 239 |   /// into another instruction. For example, reg-to-reg moves can often be | ||
| 240 |   /// skipped by renaming the registers in the CPU, but they still are encoded | ||
| 241 |   /// and thus wouldn't be considered 'free' here. | ||
| 242 | enum TargetCostConstants { | ||
| 243 | TCC_Free = 0, ///< Expected to fold away in lowering. | ||
| 244 | TCC_Basic = 1, ///< The cost of a typical 'add' instruction. | ||
| 245 | TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86. | ||
| 246 | }; | ||
| 247 | |||
| 248 |   /// Estimate the cost of a GEP operation when lowered. | ||
| 249 | InstructionCost | ||
| 250 | getGEPCost(Type *PointeeType, const Value *Ptr, | ||
| 251 | ArrayRef<const Value *> Operands, | ||
| 252 | TargetCostKind CostKind = TCK_SizeAndLatency) const; | ||
| 253 | |||
| 254 |   /// \returns A value by which our inlining threshold should be multiplied. | ||
| 255 |   /// This is primarily used to bump up the inlining threshold wholesale on | ||
| 256 |   /// targets where calls are unusually expensive. | ||
| 257 |   /// | ||
| 258 |   /// TODO: This is a rather blunt instrument.  Perhaps altering the costs of | ||
| 259 |   /// individual classes of instructions would be better. | ||
| 260 | unsigned getInliningThresholdMultiplier() const; | ||
| 261 | |||
| 262 |   /// \returns A value to be added to the inlining threshold. | ||
| 263 | unsigned adjustInliningThreshold(const CallBase *CB) const; | ||
| 264 | |||
| 265 |   /// \returns Vector bonus in percent. | ||
| 266 |   /// | ||
| 267 |   /// Vector bonuses: We want to more aggressively inline vector-dense kernels | ||
| 268 |   /// and apply this bonus based on the percentage of vector instructions. A | ||
| 269 |   /// bonus is applied if the vector instructions exceed 50% and half that | ||
| 270 |   /// amount is applied if it exceeds 10%. Note that these bonuses are some what | ||
| 271 |   /// arbitrary and evolved over time by accident as much as because they are | ||
| 272 |   /// principled bonuses. | ||
| 273 |   /// FIXME: It would be nice to base the bonus values on something more | ||
| 274 |   /// scientific. A target may has no bonus on vector instructions. | ||
| 275 | int getInlinerVectorBonusPercent() const; | ||
| 276 | |||
| 277 |   /// \return the expected cost of a memcpy, which could e.g. depend on the | ||
| 278 |   /// source/destination type and alignment and the number of bytes copied. | ||
| 279 | InstructionCost getMemcpyCost(const Instruction *I) const; | ||
| 280 | |||
| 281 |   /// \return The estimated number of case clusters when lowering \p 'SI'. | ||
| 282 |   /// \p JTSize Set a jump table size only when \p SI is suitable for a jump | ||
| 283 |   /// table. | ||
| 284 | unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, | ||
| 285 | unsigned &JTSize, | ||
| 286 |                                             ProfileSummaryInfo *PSI, | ||
| 287 | BlockFrequencyInfo *BFI) const; | ||
| 288 | |||
| 289 |   /// Estimate the cost of a given IR user when lowered. | ||
| 290 |   /// | ||
| 291 |   /// This can estimate the cost of either a ConstantExpr or Instruction when | ||
| 292 |   /// lowered. | ||
| 293 |   /// | ||
| 294 |   /// \p Operands is a list of operands which can be a result of transformations | ||
| 295 |   /// of the current operands. The number of the operands on the list must equal | ||
| 296 |   /// to the number of the current operands the IR user has. Their order on the | ||
| 297 |   /// list must be the same as the order of the current operands the IR user | ||
| 298 |   /// has. | ||
| 299 |   /// | ||
| 300 |   /// The returned cost is defined in terms of \c TargetCostConstants, see its | ||
| 301 |   /// comments for a detailed explanation of the cost values. | ||
| 302 | InstructionCost getInstructionCost(const User *U, | ||
| 303 | ArrayRef<const Value *> Operands, | ||
| 304 | TargetCostKind CostKind) const; | ||
| 305 | |||
| 306 |   /// This is a helper function which calls the three-argument | ||
| 307 |   /// getInstructionCost with \p Operands which are the current operands U has. | ||
| 308 | InstructionCost getInstructionCost(const User *U, | ||
| 309 | TargetCostKind CostKind) const { | ||
| 310 | SmallVector<const Value *, 4> Operands(U->operand_values()); | ||
| 311 | return getInstructionCost(U, Operands, CostKind); | ||
| 312 |   } | ||
| 313 | |||
| 314 |   /// If a branch or a select condition is skewed in one direction by more than | ||
| 315 |   /// this factor, it is very likely to be predicted correctly. | ||
| 316 | BranchProbability getPredictableBranchThreshold() const; | ||
| 317 | |||
| 318 |   /// Return true if branch divergence exists. | ||
| 319 |   /// | ||
| 320 |   /// Branch divergence has a significantly negative impact on GPU performance | ||
| 321 |   /// when threads in the same wavefront take different paths due to conditional | ||
| 322 |   /// branches. | ||
| 323 | bool hasBranchDivergence() const; | ||
| 324 | |||
| 325 |   /// Return true if the target prefers to use GPU divergence analysis to | ||
| 326 |   /// replace the legacy version. | ||
| 327 | bool useGPUDivergenceAnalysis() const; | ||
| 328 | |||
| 329 |   /// Returns whether V is a source of divergence. | ||
| 330 |   /// | ||
| 331 |   /// This function provides the target-dependent information for | ||
| 332 |   /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis | ||
| 333 |   /// first builds the dependency graph, and then runs the reachability | ||
| 334 |   /// algorithm starting with the sources of divergence. | ||
| 335 | bool isSourceOfDivergence(const Value *V) const; | ||
| 336 | |||
| 337 |   // Returns true for the target specific | ||
| 338 |   // set of operations which produce uniform result | ||
| 339 |   // even taking non-uniform arguments | ||
| 340 | bool isAlwaysUniform(const Value *V) const; | ||
| 341 | |||
| 342 |   /// Returns the address space ID for a target's 'flat' address space. Note | ||
| 343 |   /// this is not necessarily the same as addrspace(0), which LLVM sometimes | ||
| 344 |   /// refers to as the generic address space. The flat address space is a | ||
| 345 |   /// generic address space that can be used access multiple segments of memory | ||
| 346 |   /// with different address spaces. Access of a memory location through a | ||
| 347 |   /// pointer with this address space is expected to be legal but slower | ||
| 348 |   /// compared to the same memory location accessed through a pointer with a | ||
| 349 |   /// different address space. | ||
| 350 |   // | ||
| 351 |   /// This is for targets with different pointer representations which can | ||
| 352 |   /// be converted with the addrspacecast instruction. If a pointer is converted | ||
| 353 |   /// to this address space, optimizations should attempt to replace the access | ||
| 354 |   /// with the source address space. | ||
| 355 |   /// | ||
| 356 |   /// \returns ~0u if the target does not have such a flat address space to | ||
| 357 |   /// optimize away. | ||
| 358 | unsigned getFlatAddressSpace() const; | ||
| 359 | |||
| 360 |   /// Return any intrinsic address operand indexes which may be rewritten if | ||
| 361 |   /// they use a flat address space pointer. | ||
| 362 |   /// | ||
| 363 |   /// \returns true if the intrinsic was handled. | ||
| 364 | bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, | ||
| 365 | Intrinsic::ID IID) const; | ||
| 366 | |||
| 367 | bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const; | ||
| 368 | |||
| 369 |   /// Return true if globals in this address space can have initializers other | ||
| 370 |   /// than `undef`. | ||
| 371 | bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const; | ||
| 372 | |||
| 373 | unsigned getAssumedAddrSpace(const Value *V) const; | ||
| 374 | |||
| 375 | bool isSingleThreaded() const; | ||
| 376 | |||
| 377 | std::pair<const Value *, unsigned> | ||
| 378 | getPredicatedAddrSpace(const Value *V) const; | ||
| 379 | |||
| 380 |   /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p | ||
| 381 |   /// NewV, which has a different address space. This should happen for every | ||
| 382 |   /// operand index that collectFlatAddressOperands returned for the intrinsic. | ||
| 383 |   /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the | ||
| 384 |   /// new value (which may be the original \p II with modified operands). | ||
| 385 | Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, | ||
| 386 | Value *NewV) const; | ||
| 387 | |||
| 388 |   /// Test whether calls to a function lower to actual program function | ||
| 389 |   /// calls. | ||
| 390 |   /// | ||
| 391 |   /// The idea is to test whether the program is likely to require a 'call' | ||
| 392 |   /// instruction or equivalent in order to call the given function. | ||
| 393 |   /// | ||
| 394 |   /// FIXME: It's not clear that this is a good or useful query API. Client's | ||
| 395 |   /// should probably move to simpler cost metrics using the above. | ||
| 396 |   /// Alternatively, we could split the cost interface into distinct code-size | ||
| 397 |   /// and execution-speed costs. This would allow modelling the core of this | ||
| 398 |   /// query more accurately as a call is a single small instruction, but | ||
| 399 |   /// incurs significant execution cost. | ||
| 400 | bool isLoweredToCall(const Function *F) const; | ||
| 401 | |||
| 402 | struct LSRCost { | ||
| 403 |     /// TODO: Some of these could be merged. Also, a lexical ordering | ||
| 404 |     /// isn't always optimal. | ||
| 405 | unsigned Insns; | ||
| 406 | unsigned NumRegs; | ||
| 407 | unsigned AddRecCost; | ||
| 408 | unsigned NumIVMuls; | ||
| 409 | unsigned NumBaseAdds; | ||
| 410 | unsigned ImmCost; | ||
| 411 | unsigned SetupCost; | ||
| 412 | unsigned ScaleCost; | ||
| 413 | }; | ||
| 414 | |||
| 415 |   /// Parameters that control the generic loop unrolling transformation. | ||
| 416 | struct UnrollingPreferences { | ||
| 417 |     /// The cost threshold for the unrolled loop. Should be relative to the | ||
| 418 |     /// getInstructionCost values returned by this API, and the expectation is | ||
| 419 |     /// that the unrolled loop's instructions when run through that interface | ||
| 420 |     /// should not exceed this cost. However, this is only an estimate. Also, | ||
| 421 |     /// specific loops may be unrolled even with a cost above this threshold if | ||
| 422 |     /// deemed profitable. Set this to UINT_MAX to disable the loop body cost | ||
| 423 |     /// restriction. | ||
| 424 | unsigned Threshold; | ||
| 425 |     /// If complete unrolling will reduce the cost of the loop, we will boost | ||
| 426 |     /// the Threshold by a certain percent to allow more aggressive complete | ||
| 427 |     /// unrolling. This value provides the maximum boost percentage that we | ||
| 428 |     /// can apply to Threshold (The value should be no less than 100). | ||
| 429 |     /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost, | ||
| 430 |     ///                                    MaxPercentThresholdBoost / 100) | ||
| 431 |     /// E.g. if complete unrolling reduces the loop execution time by 50% | ||
| 432 |     /// then we boost the threshold by the factor of 2x. If unrolling is not | ||
| 433 |     /// expected to reduce the running time, then we do not increase the | ||
| 434 |     /// threshold. | ||
| 435 | unsigned MaxPercentThresholdBoost; | ||
| 436 |     /// The cost threshold for the unrolled loop when optimizing for size (set | ||
| 437 |     /// to UINT_MAX to disable). | ||
| 438 | unsigned OptSizeThreshold; | ||
| 439 |     /// The cost threshold for the unrolled loop, like Threshold, but used | ||
| 440 |     /// for partial/runtime unrolling (set to UINT_MAX to disable). | ||
| 441 | unsigned PartialThreshold; | ||
| 442 |     /// The cost threshold for the unrolled loop when optimizing for size, like | ||
| 443 |     /// OptSizeThreshold, but used for partial/runtime unrolling (set to | ||
| 444 |     /// UINT_MAX to disable). | ||
| 445 | unsigned PartialOptSizeThreshold; | ||
| 446 |     /// A forced unrolling factor (the number of concatenated bodies of the | ||
| 447 |     /// original loop in the unrolled loop body). When set to 0, the unrolling | ||
| 448 |     /// transformation will select an unrolling factor based on the current cost | ||
| 449 |     /// threshold and other factors. | ||
| 450 | unsigned Count; | ||
| 451 |     /// Default unroll count for loops with run-time trip count. | ||
| 452 | unsigned DefaultUnrollRuntimeCount; | ||
| 453 |     // Set the maximum unrolling factor. The unrolling factor may be selected | ||
| 454 |     // using the appropriate cost threshold, but may not exceed this number | ||
| 455 |     // (set to UINT_MAX to disable). This does not apply in cases where the | ||
| 456 |     // loop is being fully unrolled. | ||
| 457 | unsigned MaxCount; | ||
| 458 |     /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but | ||
| 459 |     /// applies even if full unrolling is selected. This allows a target to fall | ||
| 460 |     /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount. | ||
| 461 | unsigned FullUnrollMaxCount; | ||
| 462 |     // Represents number of instructions optimized when "back edge" | ||
| 463 |     // becomes "fall through" in unrolled loop. | ||
| 464 |     // For now we count a conditional branch on a backedge and a comparison | ||
| 465 |     // feeding it. | ||
| 466 | unsigned BEInsns; | ||
| 467 |     /// Allow partial unrolling (unrolling of loops to expand the size of the | ||
| 468 |     /// loop body, not only to eliminate small constant-trip-count loops). | ||
| 469 | bool Partial; | ||
| 470 |     /// Allow runtime unrolling (unrolling of loops to expand the size of the | ||
| 471 |     /// loop body even when the number of loop iterations is not known at | ||
| 472 |     /// compile time). | ||
| 473 | bool Runtime; | ||
| 474 |     /// Allow generation of a loop remainder (extra iterations after unroll). | ||
| 475 | bool AllowRemainder; | ||
| 476 |     /// Allow emitting expensive instructions (such as divisions) when computing | ||
| 477 |     /// the trip count of a loop for runtime unrolling. | ||
| 478 | bool AllowExpensiveTripCount; | ||
| 479 |     /// Apply loop unroll on any kind of loop | ||
| 480 |     /// (mainly to loops that fail runtime unrolling). | ||
| 481 | bool Force; | ||
| 482 |     /// Allow using trip count upper bound to unroll loops. | ||
| 483 | bool UpperBound; | ||
| 484 |     /// Allow unrolling of all the iterations of the runtime loop remainder. | ||
| 485 | bool UnrollRemainder; | ||
| 486 |     /// Allow unroll and jam. Used to enable unroll and jam for the target. | ||
| 487 | bool UnrollAndJam; | ||
| 488 |     /// Threshold for unroll and jam, for inner loop size. The 'Threshold' | ||
| 489 |     /// value above is used during unroll and jam for the outer loop size. | ||
| 490 |     /// This value is used in the same manner to limit the size of the inner | ||
| 491 |     /// loop. | ||
| 492 | unsigned UnrollAndJamInnerLoopThreshold; | ||
| 493 |     /// Don't allow loop unrolling to simulate more than this number of | ||
| 494 |     /// iterations when checking full unroll profitability | ||
| 495 | unsigned MaxIterationsCountToAnalyze; | ||
| 496 | }; | ||
| 497 | |||
| 498 |   /// Get target-customized preferences for the generic loop unrolling | ||
| 499 |   /// transformation. The caller will initialize UP with the current | ||
| 500 |   /// target-independent defaults. | ||
| 501 | void getUnrollingPreferences(Loop *L, ScalarEvolution &, | ||
| 502 |                                UnrollingPreferences &UP, | ||
| 503 | OptimizationRemarkEmitter *ORE) const; | ||
| 504 | |||
| 505 |   /// Query the target whether it would be profitable to convert the given loop | ||
| 506 |   /// into a hardware loop. | ||
| 507 | bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, | ||
| 508 | AssumptionCache &AC, TargetLibraryInfo *LibInfo, | ||
| 509 | HardwareLoopInfo &HWLoopInfo) const; | ||
| 510 | |||
| 511 |   /// Query the target whether it would be prefered to create a predicated | ||
| 512 |   /// vector loop, which can avoid the need to emit a scalar epilogue loop. | ||
| 513 | bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, | ||
| 514 | AssumptionCache &AC, TargetLibraryInfo *TLI, | ||
| 515 |                                    DominatorTree *DT, | ||
| 516 |                                    LoopVectorizationLegality *LVL, | ||
| 517 | InterleavedAccessInfo *IAI) const; | ||
| 518 | |||
| 519 |   /// Query the target whether lowering of the llvm.get.active.lane.mask | ||
| 520 |   /// intrinsic is supported and how the mask should be used. A return value | ||
| 521 |   /// of PredicationStyle::Data indicates the mask is used as data only, | ||
| 522 |   /// whereas PredicationStyle::DataAndControlFlow indicates we should also use | ||
| 523 |   /// the mask for control flow in the loop. If unsupported the return value is | ||
| 524 |   /// PredicationStyle::None. | ||
| 525 | PredicationStyle emitGetActiveLaneMask() const; | ||
| 526 | |||
| 527 |   // Parameters that control the loop peeling transformation | ||
| 528 | struct PeelingPreferences { | ||
| 529 |     /// A forced peeling factor (the number of bodied of the original loop | ||
| 530 |     /// that should be peeled off before the loop body). When set to 0, the | ||
| 531 |     /// a peeling factor based on profile information and other factors. | ||
| 532 | unsigned PeelCount; | ||
| 533 |     /// Allow peeling off loop iterations. | ||
| 534 | bool AllowPeeling; | ||
| 535 |     /// Allow peeling off loop iterations for loop nests. | ||
| 536 | bool AllowLoopNestsPeeling; | ||
| 537 |     /// Allow peeling basing on profile. Uses to enable peeling off all | ||
| 538 |     /// iterations basing on provided profile. | ||
| 539 |     /// If the value is true the peeling cost model can decide to peel only | ||
| 540 |     /// some iterations and in this case it will set this to false. | ||
| 541 | bool PeelProfiledIterations; | ||
| 542 | }; | ||
| 543 | |||
| 544 |   /// Get target-customized preferences for the generic loop peeling | ||
| 545 |   /// transformation. The caller will initialize \p PP with the current | ||
| 546 |   /// target-independent defaults with information from \p L and \p SE. | ||
| 547 | void getPeelingPreferences(Loop *L, ScalarEvolution &SE, | ||
| 548 | PeelingPreferences &PP) const; | ||
| 549 | |||
| 550 |   /// Targets can implement their own combinations for target-specific | ||
| 551 |   /// intrinsics. This function will be called from the InstCombine pass every | ||
| 552 |   /// time a target-specific intrinsic is encountered. | ||
| 553 |   /// | ||
| 554 |   /// \returns std::nullopt to not do anything target specific or a value that | ||
| 555 |   /// will be returned from the InstCombiner. It is possible to return null and | ||
| 556 |   /// stop further processing of the intrinsic by returning nullptr. | ||
| 557 | std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC, | ||
| 558 | IntrinsicInst & II) const; | ||
| 559 |   /// Can be used to implement target-specific instruction combining. | ||
| 560 |   /// \see instCombineIntrinsic | ||
| 561 | std::optional<Value *> simplifyDemandedUseBitsIntrinsic( | ||
| 562 | InstCombiner & IC, IntrinsicInst & II, APInt DemandedMask, | ||
| 563 | KnownBits & Known, bool &KnownBitsComputed) const; | ||
| 564 |   /// Can be used to implement target-specific instruction combining. | ||
| 565 |   /// \see instCombineIntrinsic | ||
| 566 | std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( | ||
| 567 | InstCombiner & IC, IntrinsicInst & II, APInt DemandedElts, | ||
| 568 | APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3, | ||
| 569 | std::function<void(Instruction *, unsigned, APInt, APInt &)> | ||
| 570 | SimplifyAndSetOp) const; | ||
| 571 |   /// @} | ||
| 572 | |||
| 573 |   /// \name Scalar Target Information | ||
| 574 |   /// @{ | ||
| 575 | |||
| 576 |   /// Flags indicating the kind of support for population count. | ||
| 577 |   /// | ||
| 578 |   /// Compared to the SW implementation, HW support is supposed to | ||
| 579 |   /// significantly boost the performance when the population is dense, and it | ||
| 580 |   /// may or may not degrade performance if the population is sparse. A HW | ||
| 581 |   /// support is considered as "Fast" if it can outperform, or is on a par | ||
| 582 |   /// with, SW implementation when the population is sparse; otherwise, it is | ||
| 583 |   /// considered as "Slow". | ||
| 584 | enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware }; | ||
| 585 | |||
| 586 |   /// Return true if the specified immediate is legal add immediate, that | ||
| 587 |   /// is the target has add instructions which can add a register with the | ||
| 588 |   /// immediate without having to materialize the immediate into a register. | ||
| 589 | bool isLegalAddImmediate(int64_t Imm) const; | ||
| 590 | |||
| 591 |   /// Return true if the specified immediate is legal icmp immediate, | ||
| 592 |   /// that is the target has icmp instructions which can compare a register | ||
| 593 |   /// against the immediate without having to materialize the immediate into a | ||
| 594 |   /// register. | ||
| 595 | bool isLegalICmpImmediate(int64_t Imm) const; | ||
| 596 | |||
| 597 |   /// Return true if the addressing mode represented by AM is legal for | ||
| 598 |   /// this target, for a load/store of the specified type. | ||
| 599 |   /// The type may be VoidTy, in which case only return true if the addressing | ||
| 600 |   /// mode is legal for a load/store of any legal type. | ||
| 601 |   /// If target returns true in LSRWithInstrQueries(), I may be valid. | ||
| 602 |   /// TODO: Handle pre/postinc as well. | ||
| 603 | bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, | ||
| 604 | bool HasBaseReg, int64_t Scale, | ||
| 605 | unsigned AddrSpace = 0, | ||
| 606 | Instruction *I = nullptr) const; | ||
| 607 | |||
| 608 |   /// Return true if LSR cost of C1 is lower than C2. | ||
| 609 | bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, | ||
| 610 | const TargetTransformInfo::LSRCost &C2) const; | ||
| 611 | |||
| 612 |   /// Return true if LSR major cost is number of registers. Targets which | ||
| 613 |   /// implement their own isLSRCostLess and unset number of registers as major | ||
| 614 |   /// cost should return false, otherwise return true. | ||
| 615 | bool isNumRegsMajorCostOfLSR() const; | ||
| 616 | |||
| 617 |   /// \returns true if LSR should not optimize a chain that includes \p I. | ||
| 618 | bool isProfitableLSRChainElement(Instruction *I) const; | ||
| 619 | |||
| 620 |   /// Return true if the target can fuse a compare and branch. | ||
| 621 |   /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost | ||
| 622 |   /// calculation for the instructions in a loop. | ||
| 623 | bool canMacroFuseCmp() const; | ||
| 624 | |||
| 625 |   /// Return true if the target can save a compare for loop count, for example | ||
| 626 |   /// hardware loop saves a compare. | ||
| 627 | bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, | ||
| 628 | DominatorTree *DT, AssumptionCache *AC, | ||
| 629 | TargetLibraryInfo *LibInfo) const; | ||
| 630 | |||
| 631 | enum AddressingModeKind { | ||
| 632 | AMK_PreIndexed, | ||
| 633 | AMK_PostIndexed, | ||
| 634 | AMK_None | ||
| 635 | }; | ||
| 636 | |||
| 637 |   /// Return the preferred addressing mode LSR should make efforts to generate. | ||
| 638 | AddressingModeKind getPreferredAddressingMode(const Loop *L, | ||
| 639 | ScalarEvolution *SE) const; | ||
| 640 | |||
| 641 |   /// Return true if the target supports masked store. | ||
| 642 | bool isLegalMaskedStore(Type *DataType, Align Alignment) const; | ||
| 643 |   /// Return true if the target supports masked load. | ||
| 644 | bool isLegalMaskedLoad(Type *DataType, Align Alignment) const; | ||
| 645 | |||
| 646 |   /// Return true if the target supports nontemporal store. | ||
| 647 | bool isLegalNTStore(Type *DataType, Align Alignment) const; | ||
| 648 |   /// Return true if the target supports nontemporal load. | ||
| 649 | bool isLegalNTLoad(Type *DataType, Align Alignment) const; | ||
| 650 | |||
| 651 |   /// \Returns true if the target supports broadcasting a load to a vector of | ||
| 652 |   /// type <NumElements x ElementTy>. | ||
| 653 | bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const; | ||
| 654 | |||
| 655 |   /// Return true if the target supports masked scatter. | ||
| 656 | bool isLegalMaskedScatter(Type *DataType, Align Alignment) const; | ||
| 657 |   /// Return true if the target supports masked gather. | ||
| 658 | bool isLegalMaskedGather(Type *DataType, Align Alignment) const; | ||
| 659 |   /// Return true if the target forces scalarizing of llvm.masked.gather | ||
| 660 |   /// intrinsics. | ||
| 661 | bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const; | ||
| 662 |   /// Return true if the target forces scalarizing of llvm.masked.scatter | ||
| 663 |   /// intrinsics. | ||
| 664 | bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const; | ||
| 665 | |||
| 666 |   /// Return true if the target supports masked compress store. | ||
| 667 | bool isLegalMaskedCompressStore(Type *DataType) const; | ||
| 668 |   /// Return true if the target supports masked expand load. | ||
| 669 | bool isLegalMaskedExpandLoad(Type *DataType) const; | ||
| 670 | |||
| 671 |   /// Return true if this is an alternating opcode pattern that can be lowered | ||
| 672 |   /// to a single instruction on the target. In X86 this is for the addsub | ||
| 673 |   /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR. | ||
| 674 |   /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being | ||
| 675 |   /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0` | ||
| 676 |   /// when \p Opcode0 is selected and `1` when Opcode1 is selected. | ||
| 677 |   /// \p VecTy is the vector type of the instruction to be generated. | ||
| 678 | bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, | ||
| 679 | const SmallBitVector &OpcodeMask) const; | ||
| 680 | |||
| 681 |   /// Return true if we should be enabling ordered reductions for the target. | ||
| 682 | bool enableOrderedReductions() const; | ||
| 683 | |||
| 684 |   /// Return true if the target has a unified operation to calculate division | ||
| 685 |   /// and remainder. If so, the additional implicit multiplication and | ||
| 686 |   /// subtraction required to calculate a remainder from division are free. This | ||
| 687 |   /// can enable more aggressive transformations for division and remainder than | ||
| 688 |   /// would typically be allowed using throughput or size cost models. | ||
| 689 | bool hasDivRemOp(Type *DataType, bool IsSigned) const; | ||
| 690 | |||
| 691 |   /// Return true if the given instruction (assumed to be a memory access | ||
| 692 |   /// instruction) has a volatile variant. If that's the case then we can avoid | ||
| 693 |   /// addrspacecast to generic AS for volatile loads/stores. Default | ||
| 694 |   /// implementation returns false, which prevents address space inference for | ||
| 695 |   /// volatile loads/stores. | ||
| 696 | bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const; | ||
| 697 | |||
| 698 |   /// Return true if target doesn't mind addresses in vectors. | ||
| 699 | bool prefersVectorizedAddressing() const; | ||
| 700 | |||
| 701 |   /// Return the cost of the scaling factor used in the addressing | ||
| 702 |   /// mode represented by AM for this target, for a load/store | ||
| 703 |   /// of the specified type. | ||
| 704 |   /// If the AM is supported, the return value must be >= 0. | ||
| 705 |   /// If the AM is not supported, it returns a negative value. | ||
| 706 |   /// TODO: Handle pre/postinc as well. | ||
| 707 | InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, | ||
| 708 | int64_t BaseOffset, bool HasBaseReg, | ||
| 709 |                                        int64_t Scale, | ||
| 710 | unsigned AddrSpace = 0) const; | ||
| 711 | |||
| 712 |   /// Return true if the loop strength reduce pass should make | ||
| 713 |   /// Instruction* based TTI queries to isLegalAddressingMode(). This is | ||
| 714 |   /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned | ||
| 715 |   /// immediate offset and no index register. | ||
| 716 | bool LSRWithInstrQueries() const; | ||
| 717 | |||
| 718 |   /// Return true if it's free to truncate a value of type Ty1 to type | ||
| 719 |   /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 | ||
| 720 |   /// by referencing its sub-register AX. | ||
| 721 | bool isTruncateFree(Type *Ty1, Type *Ty2) const; | ||
| 722 | |||
| 723 |   /// Return true if it is profitable to hoist instruction in the | ||
| 724 |   /// then/else to before if. | ||
| 725 | bool isProfitableToHoist(Instruction *I) const; | ||
| 726 | |||
| 727 | bool useAA() const; | ||
| 728 | |||
| 729 |   /// Return true if this type is legal. | ||
| 730 | bool isTypeLegal(Type *Ty) const; | ||
| 731 | |||
| 732 |   /// Returns the estimated number of registers required to represent \p Ty. | ||
| 733 | unsigned getRegUsageForType(Type *Ty) const; | ||
| 734 | |||
| 735 |   /// Return true if switches should be turned into lookup tables for the | ||
| 736 |   /// target. | ||
| 737 | bool shouldBuildLookupTables() const; | ||
| 738 | |||
| 739 |   /// Return true if switches should be turned into lookup tables | ||
| 740 |   /// containing this constant value for the target. | ||
| 741 | bool shouldBuildLookupTablesForConstant(Constant *C) const; | ||
| 742 | |||
| 743 |   /// Return true if lookup tables should be turned into relative lookup tables. | ||
| 744 | bool shouldBuildRelLookupTables() const; | ||
| 745 | |||
| 746 |   /// Return true if the input function which is cold at all call sites, | ||
| 747 |   ///  should use coldcc calling convention. | ||
| 748 | bool useColdCCForColdCall(Function &F) const; | ||
| 749 | |||
| 750 |   /// Estimate the overhead of scalarizing an instruction. Insert and Extract | ||
| 751 |   /// are set if the demanded result elements need to be inserted and/or | ||
| 752 |   /// extracted from vectors. | ||
| 753 | InstructionCost getScalarizationOverhead(VectorType *Ty, | ||
| 754 | const APInt &DemandedElts, | ||
| 755 | bool Insert, bool Extract, | ||
| 756 | TTI::TargetCostKind CostKind) const; | ||
| 757 | |||
| 758 |   /// Estimate the overhead of scalarizing an instructions unique | ||
| 759 |   /// non-constant operands. The (potentially vector) types to use for each of | ||
| 760 |   /// argument are passes via Tys. | ||
| 761 | InstructionCost | ||
| 762 | getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, | ||
| 763 | ArrayRef<Type *> Tys, | ||
| 764 | TTI::TargetCostKind CostKind) const; | ||
| 765 | |||
| 766 |   /// If target has efficient vector element load/store instructions, it can | ||
| 767 |   /// return true here so that insertion/extraction costs are not added to | ||
| 768 |   /// the scalarization cost of a load/store. | ||
| 769 | bool supportsEfficientVectorElementLoadStore() const; | ||
| 770 | |||
| 771 |   /// If the target supports tail calls. | ||
| 772 | bool supportsTailCalls() const; | ||
| 773 | |||
| 774 |   /// If target supports tail call on \p CB | ||
| 775 | bool supportsTailCallFor(const CallBase *CB) const; | ||
| 776 | |||
| 777 |   /// Don't restrict interleaved unrolling to small loops. | ||
| 778 | bool enableAggressiveInterleaving(bool LoopHasReductions) const; | ||
| 779 | |||
| 780 |   /// Returns options for expansion of memcmp. IsZeroCmp is | ||
| 781 |   // true if this is the expansion of memcmp(p1, p2, s) == 0. | ||
| 782 | struct MemCmpExpansionOptions { | ||
| 783 |     // Return true if memcmp expansion is enabled. | ||
| 784 | operator bool() const { return MaxNumLoads > 0; } | ||
| 785 | |||
| 786 |     // Maximum number of load operations. | ||
| 787 | unsigned MaxNumLoads = 0; | ||
| 788 | |||
| 789 |     // The list of available load sizes (in bytes), sorted in decreasing order. | ||
| 790 | SmallVector<unsigned, 8> LoadSizes; | ||
| 791 | |||
| 792 |     // For memcmp expansion when the memcmp result is only compared equal or | ||
| 793 |     // not-equal to 0, allow up to this number of load pairs per block. As an | ||
| 794 |     // example, this may allow 'memcmp(a, b, 3) == 0' in a single block: | ||
| 795 |     //   a0 = load2bytes &a[0] | ||
| 796 |     //   b0 = load2bytes &b[0] | ||
| 797 |     //   a2 = load1byte  &a[2] | ||
| 798 |     //   b2 = load1byte  &b[2] | ||
| 799 |     //   r  = cmp eq (a0 ^ b0 | a2 ^ b2), 0 | ||
| 800 | unsigned NumLoadsPerBlock = 1; | ||
| 801 | |||
| 802 |     // Set to true to allow overlapping loads. For example, 7-byte compares can | ||
| 803 |     // be done with two 4-byte compares instead of 4+2+1-byte compares. This | ||
| 804 |     // requires all loads in LoadSizes to be doable in an unaligned way. | ||
| 805 | bool AllowOverlappingLoads = false; | ||
| 806 | }; | ||
| 807 | MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, | ||
| 808 | bool IsZeroCmp) const; | ||
| 809 | |||
| 810 |   /// Should the Select Optimization pass be enabled and ran. | ||
| 811 | bool enableSelectOptimize() const; | ||
| 812 | |||
| 813 |   /// Enable matching of interleaved access groups. | ||
| 814 | bool enableInterleavedAccessVectorization() const; | ||
| 815 | |||
| 816 |   /// Enable matching of interleaved access groups that contain predicated | ||
| 817 |   /// accesses or gaps and therefore vectorized using masked | ||
| 818 |   /// vector loads/stores. | ||
| 819 | bool enableMaskedInterleavedAccessVectorization() const; | ||
| 820 | |||
| 821 |   /// Indicate that it is potentially unsafe to automatically vectorize | ||
| 822 |   /// floating-point operations because the semantics of vector and scalar | ||
| 823 |   /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math | ||
| 824 |   /// does not support IEEE-754 denormal numbers, while depending on the | ||
| 825 |   /// platform, scalar floating-point math does. | ||
| 826 |   /// This applies to floating-point math operations and calls, not memory | ||
| 827 |   /// operations, shuffles, or casts. | ||
| 828 | bool isFPVectorizationPotentiallyUnsafe() const; | ||
| 829 | |||
| 830 |   /// Determine if the target supports unaligned memory accesses. | ||
| 831 | bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, | ||
| 832 | unsigned AddressSpace = 0, | ||
| 833 | Align Alignment = Align(1), | ||
| 834 | unsigned *Fast = nullptr) const; | ||
| 835 | |||
| 836 |   /// Return hardware support for population count. | ||
| 837 | PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; | ||
| 838 | |||
| 839 |   /// Return true if the hardware has a fast square-root instruction. | ||
| 840 | bool haveFastSqrt(Type *Ty) const; | ||
| 841 | |||
| 842 |   /// Return true if the cost of the instruction is too high to speculatively | ||
| 843 |   /// execute and should be kept behind a branch. | ||
| 844 |   /// This normally just wraps around a getInstructionCost() call, but some | ||
| 845 |   /// targets might report a low TCK_SizeAndLatency value that is incompatible | ||
| 846 |   /// with the fixed TCC_Expensive value. | ||
| 847 |   /// NOTE: This assumes the instruction passes isSafeToSpeculativelyExecute(). | ||
| 848 | bool isExpensiveToSpeculativelyExecute(const Instruction *I) const; | ||
| 849 | |||
| 850 |   /// Return true if it is faster to check if a floating-point value is NaN | ||
| 851 |   /// (or not-NaN) versus a comparison against a constant FP zero value. | ||
| 852 |   /// Targets should override this if materializing a 0.0 for comparison is | ||
| 853 |   /// generally as cheap as checking for ordered/unordered. | ||
| 854 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const; | ||
| 855 | |||
| 856 |   /// Return the expected cost of supporting the floating point operation | ||
| 857 |   /// of the specified type. | ||
| 858 | InstructionCost getFPOpCost(Type *Ty) const; | ||
| 859 | |||
| 860 |   /// Return the expected cost of materializing for the given integer | ||
| 861 |   /// immediate of the specified type. | ||
| 862 | InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, | ||
| 863 | TargetCostKind CostKind) const; | ||
| 864 | |||
| 865 |   /// Return the expected cost of materialization for the given integer | ||
| 866 |   /// immediate of the specified type for a given instruction. The cost can be | ||
| 867 |   /// zero if the immediate can be folded into the specified instruction. | ||
| 868 | InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, | ||
| 869 | const APInt &Imm, Type *Ty, | ||
| 870 | TargetCostKind CostKind, | ||
| 871 | Instruction *Inst = nullptr) const; | ||
| 872 | InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, | ||
| 873 | const APInt &Imm, Type *Ty, | ||
| 874 | TargetCostKind CostKind) const; | ||
| 875 | |||
| 876 |   /// Return the expected cost for the given integer when optimising | ||
| 877 |   /// for size. This is different than the other integer immediate cost | ||
| 878 |   /// functions in that it is subtarget agnostic. This is useful when you e.g. | ||
| 879 |   /// target one ISA such as Aarch32 but smaller encodings could be possible | ||
| 880 |   /// with another such as Thumb. This return value is used as a penalty when | ||
| 881 |   /// the total costs for a constant is calculated (the bigger the cost, the | ||
| 882 |   /// more beneficial constant hoisting is). | ||
| 883 | InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, | ||
| 884 | const APInt &Imm, Type *Ty) const; | ||
| 885 |   /// @} | ||
| 886 | |||
| 887 |   /// \name Vector Target Information | ||
| 888 |   /// @{ | ||
| 889 | |||
| 890 |   /// The various kinds of shuffle patterns for vector queries. | ||
| 891 | enum ShuffleKind { | ||
| 892 |     SK_Broadcast,        ///< Broadcast element 0 to all other elements. | ||
| 893 |     SK_Reverse,          ///< Reverse the order of the vector. | ||
| 894 |     SK_Select,           ///< Selects elements from the corresponding lane of | ||
| 895 |                          ///< either source operand. This is equivalent to a | ||
| 896 |                          ///< vector select with a constant condition operand. | ||
| 897 |     SK_Transpose,        ///< Transpose two vectors. | ||
| 898 |     SK_InsertSubvector,  ///< InsertSubvector. Index indicates start offset. | ||
| 899 |     SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset. | ||
| 900 |     SK_PermuteTwoSrc,    ///< Merge elements from two source vectors into one | ||
| 901 |                          ///< with any shuffle mask. | ||
| 902 |     SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any | ||
| 903 |                          ///< shuffle mask. | ||
| 904 |     SK_Splice            ///< Concatenates elements from the first input vector | ||
| 905 |                          ///< with elements of the second input vector. Returning | ||
| 906 |                          ///< a vector of the same type as the input vectors. | ||
| 907 |                          ///< Index indicates start offset in first input vector. | ||
| 908 | }; | ||
| 909 | |||
| 910 |   /// Additional information about an operand's possible values. | ||
| 911 | enum OperandValueKind { | ||
| 912 |     OK_AnyValue,               // Operand can have any value. | ||
| 913 |     OK_UniformValue,           // Operand is uniform (splat of a value). | ||
| 914 |     OK_UniformConstantValue,   // Operand is uniform constant. | ||
| 915 |     OK_NonUniformConstantValue // Operand is a non uniform constant value. | ||
| 916 | }; | ||
| 917 | |||
| 918 |   /// Additional properties of an operand's values. | ||
| 919 | enum OperandValueProperties { | ||
| 920 | OP_None = 0, | ||
| 921 | OP_PowerOf2 = 1, | ||
| 922 | OP_NegatedPowerOf2 = 2, | ||
| 923 | }; | ||
| 924 | |||
| 925 |   // Describe the values an operand can take.  We're in the process | ||
| 926 |   // of migrating uses of OperandValueKind and OperandValueProperties | ||
| 927 |   // to use this class, and then will change the internal representation. | ||
| 928 | struct OperandValueInfo { | ||
| 929 | OperandValueKind Kind = OK_AnyValue; | ||
| 930 | OperandValueProperties Properties = OP_None; | ||
| 931 | |||
| 932 | bool isConstant() const { | ||
| 933 | return Kind == OK_UniformConstantValue || Kind == OK_NonUniformConstantValue; | ||
| 934 |     } | ||
| 935 | bool isUniform() const { | ||
| 936 | return Kind == OK_UniformConstantValue || Kind == OK_UniformValue; | ||
| 937 |     } | ||
| 938 | bool isPowerOf2() const { | ||
| 939 | return Properties == OP_PowerOf2; | ||
| 940 |     } | ||
| 941 | bool isNegatedPowerOf2() const { | ||
| 942 | return Properties == OP_NegatedPowerOf2; | ||
| 943 |     } | ||
| 944 | |||
| 945 | OperandValueInfo getNoProps() const { | ||
| 946 | return {Kind, OP_None}; | ||
| 947 |     } | ||
| 948 | }; | ||
| 949 | |||
| 950 |   /// \return the number of registers in the target-provided register class. | ||
| 951 | unsigned getNumberOfRegisters(unsigned ClassID) const; | ||
| 952 | |||
| 953 |   /// \return the target-provided register class ID for the provided type, | ||
| 954 |   /// accounting for type promotion and other type-legalization techniques that | ||
| 955 |   /// the target might apply. However, it specifically does not account for the | ||
| 956 |   /// scalarization or splitting of vector types. Should a vector type require | ||
| 957 |   /// scalarization or splitting into multiple underlying vector registers, that | ||
| 958 |   /// type should be mapped to a register class containing no registers. | ||
| 959 |   /// Specifically, this is designed to provide a simple, high-level view of the | ||
| 960 |   /// register allocation later performed by the backend. These register classes | ||
| 961 |   /// don't necessarily map onto the register classes used by the backend. | ||
| 962 |   /// FIXME: It's not currently possible to determine how many registers | ||
| 963 |   /// are used by the provided type. | ||
| 964 | unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const; | ||
| 965 | |||
| 966 |   /// \return the target-provided register class name | ||
| 967 | const char *getRegisterClassName(unsigned ClassID) const; | ||
| 968 | |||
| 969 | enum RegisterKind { RGK_Scalar, RGK_FixedWidthVector, RGK_ScalableVector }; | ||
| 970 | |||
| 971 |   /// \return The width of the largest scalar or vector register type. | ||
| 972 | TypeSize getRegisterBitWidth(RegisterKind K) const; | ||
| 973 | |||
| 974 |   /// \return The width of the smallest vector register type. | ||
| 975 | unsigned getMinVectorRegisterBitWidth() const; | ||
| 976 | |||
| 977 |   /// \return The maximum value of vscale if the target specifies an | ||
| 978 |   ///  architectural maximum vector length, and std::nullopt otherwise. | ||
| 979 | std::optional<unsigned> getMaxVScale() const; | ||
| 980 | |||
| 981 |   /// \return the value of vscale to tune the cost model for. | ||
| 982 | std::optional<unsigned> getVScaleForTuning() const; | ||
| 983 | |||
| 984 |   /// \return True if the vectorization factor should be chosen to | ||
| 985 |   /// make the vector of the smallest element type match the size of a | ||
| 986 |   /// vector register. For wider element types, this could result in | ||
| 987 |   /// creating vectors that span multiple vector registers. | ||
| 988 |   /// If false, the vectorization factor will be chosen based on the | ||
| 989 |   /// size of the widest element type. | ||
| 990 |   /// \p K Register Kind for vectorization. | ||
| 991 | bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const; | ||
| 992 | |||
| 993 |   /// \return The minimum vectorization factor for types of given element | ||
| 994 |   /// bit width, or 0 if there is no minimum VF. The returned value only | ||
| 995 |   /// applies when shouldMaximizeVectorBandwidth returns true. | ||
| 996 |   /// If IsScalable is true, the returned ElementCount must be a scalable VF. | ||
| 997 | ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const; | ||
| 998 | |||
| 999 |   /// \return The maximum vectorization factor for types of given element | ||
| 1000 |   /// bit width and opcode, or 0 if there is no maximum VF. | ||
| 1001 |   /// Currently only used by the SLP vectorizer. | ||
| 1002 | unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const; | ||
| 1003 | |||
| 1004 |   /// \return The minimum vectorization factor for the store instruction. Given | ||
| 1005 |   /// the initial estimation of the minimum vector factor and store value type, | ||
| 1006 |   /// it tries to find possible lowest VF, which still might be profitable for | ||
| 1007 |   /// the vectorization. | ||
| 1008 |   /// \param VF Initial estimation of the minimum vector factor. | ||
| 1009 |   /// \param ScalarMemTy Scalar memory type of the store operation. | ||
| 1010 |   /// \param ScalarValTy Scalar type of the stored value. | ||
| 1011 |   /// Currently only used by the SLP vectorizer. | ||
| 1012 | unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, | ||
| 1013 | Type *ScalarValTy) const; | ||
| 1014 | |||
| 1015 |   /// \return True if it should be considered for address type promotion. | ||
| 1016 |   /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is | ||
| 1017 |   /// profitable without finding other extensions fed by the same input. | ||
| 1018 | bool shouldConsiderAddressTypePromotion( | ||
| 1019 | const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const; | ||
| 1020 | |||
| 1021 |   /// \return The size of a cache line in bytes. | ||
| 1022 | unsigned getCacheLineSize() const; | ||
| 1023 | |||
| 1024 |   /// The possible cache levels | ||
| 1025 | enum class CacheLevel { | ||
| 1026 |     L1D, // The L1 data cache | ||
| 1027 |     L2D, // The L2 data cache | ||
| 1028 | |||
| 1029 |     // We currently do not model L3 caches, as their sizes differ widely between | ||
| 1030 |     // microarchitectures. Also, we currently do not have a use for L3 cache | ||
| 1031 |     // size modeling yet. | ||
| 1032 | }; | ||
| 1033 | |||
| 1034 |   /// \return The size of the cache level in bytes, if available. | ||
| 1035 | std::optional<unsigned> getCacheSize(CacheLevel Level) const; | ||
| 1036 | |||
| 1037 |   /// \return The associativity of the cache level, if available. | ||
| 1038 | std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const; | ||
| 1039 | |||
| 1040 |   /// \return How much before a load we should place the prefetch | ||
| 1041 |   /// instruction.  This is currently measured in number of | ||
| 1042 |   /// instructions. | ||
| 1043 | unsigned getPrefetchDistance() const; | ||
| 1044 | |||
| 1045 |   /// Some HW prefetchers can handle accesses up to a certain constant stride. | ||
| 1046 |   /// Sometimes prefetching is beneficial even below the HW prefetcher limit, | ||
| 1047 |   /// and the arguments provided are meant to serve as a basis for deciding this | ||
| 1048 |   /// for a particular loop. | ||
| 1049 |   /// | ||
| 1050 |   /// \param NumMemAccesses        Number of memory accesses in the loop. | ||
| 1051 |   /// \param NumStridedMemAccesses Number of the memory accesses that | ||
| 1052 |   ///                              ScalarEvolution could find a known stride | ||
| 1053 |   ///                              for. | ||
| 1054 |   /// \param NumPrefetches         Number of software prefetches that will be | ||
| 1055 |   ///                              emitted as determined by the addresses | ||
| 1056 |   ///                              involved and the cache line size. | ||
| 1057 |   /// \param HasCall               True if the loop contains a call. | ||
| 1058 |   /// | ||
| 1059 |   /// \return This is the minimum stride in bytes where it makes sense to start | ||
| 1060 |   ///         adding SW prefetches. The default is 1, i.e. prefetch with any | ||
| 1061 |   ///         stride. | ||
| 1062 | unsigned getMinPrefetchStride(unsigned NumMemAccesses, | ||
| 1063 |                                 unsigned NumStridedMemAccesses, | ||
| 1064 | unsigned NumPrefetches, bool HasCall) const; | ||
| 1065 | |||
| 1066 |   /// \return The maximum number of iterations to prefetch ahead.  If | ||
| 1067 |   /// the required number of iterations is more than this number, no | ||
| 1068 |   /// prefetching is performed. | ||
| 1069 | unsigned getMaxPrefetchIterationsAhead() const; | ||
| 1070 | |||
| 1071 |   /// \return True if prefetching should also be done for writes. | ||
| 1072 | bool enableWritePrefetching() const; | ||
| 1073 | |||
| 1074 |   /// \return if target want to issue a prefetch in address space \p AS. | ||
| 1075 | bool shouldPrefetchAddressSpace(unsigned AS) const; | ||
| 1076 | |||
| 1077 |   /// \return The maximum interleave factor that any transform should try to | ||
| 1078 |   /// perform for this target. This number depends on the level of parallelism | ||
| 1079 |   /// and the number of execution units in the CPU. | ||
| 1080 | unsigned getMaxInterleaveFactor(unsigned VF) const; | ||
| 1081 | |||
| 1082 |   /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2. | ||
| 1083 | static OperandValueInfo getOperandInfo(const Value *V); | ||
| 1084 | |||
| 1085 |   /// This is an approximation of reciprocal throughput of a math/logic op. | ||
| 1086 |   /// A higher cost indicates less expected throughput. | ||
| 1087 |   /// From Agner Fog's guides, reciprocal throughput is "the average number of | ||
| 1088 |   /// clock cycles per instruction when the instructions are not part of a | ||
| 1089 |   /// limiting dependency chain." | ||
| 1090 |   /// Therefore, costs should be scaled to account for multiple execution units | ||
| 1091 |   /// on the target that can process this type of instruction. For example, if | ||
| 1092 |   /// there are 5 scalar integer units and 2 vector integer units that can | ||
| 1093 |   /// calculate an 'add' in a single cycle, this model should indicate that the | ||
| 1094 |   /// cost of the vector add instruction is 2.5 times the cost of the scalar | ||
| 1095 |   /// add instruction. | ||
| 1096 |   /// \p Args is an optional argument which holds the instruction operands | ||
| 1097 |   /// values so the TTI can analyze those values searching for special | ||
| 1098 |   /// cases or optimizations based on those values. | ||
| 1099 |   /// \p CxtI is the optional original context instruction, if one exists, to | ||
| 1100 |   /// provide even more information. | ||
| 1101 |   InstructionCost getArithmeticInstrCost( | ||
| 1102 | unsigned Opcode, Type *Ty, | ||
| 1103 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, | ||
| 1104 | TTI::OperandValueInfo Opd1Info = {TTI::OK_AnyValue, TTI::OP_None}, | ||
| 1105 | TTI::OperandValueInfo Opd2Info = {TTI::OK_AnyValue, TTI::OP_None}, | ||
| 1106 | ArrayRef<const Value *> Args = ArrayRef<const Value *>(), | ||
| 1107 | const Instruction *CxtI = nullptr) const; | ||
| 1108 | |||
| 1109 |   /// \return The cost of a shuffle instruction of kind Kind and of type Tp. | ||
| 1110 |   /// The exact mask may be passed as Mask, or else the array will be empty. | ||
| 1111 |   /// The index and subtype parameters are used by the subvector insertion and | ||
| 1112 |   /// extraction shuffle kinds to show the insert/extract point and the type of | ||
| 1113 |   /// the subvector being inserted/extracted. The operands of the shuffle can be | ||
| 1114 |   /// passed through \p Args, which helps improve the cost estimation in some | ||
| 1115 |   /// cases, like in broadcast loads. | ||
| 1116 |   /// NOTE: For subvector extractions Tp represents the source type. | ||
| 1117 | InstructionCost | ||
| 1118 | getShuffleCost(ShuffleKind Kind, VectorType *Tp, | ||
| 1119 | ArrayRef<int> Mask = std::nullopt, | ||
| 1120 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, | ||
| 1121 | int Index = 0, VectorType *SubTp = nullptr, | ||
| 1122 | ArrayRef<const Value *> Args = std::nullopt) const; | ||
| 1123 | |||
| 1124 |   /// Represents a hint about the context in which a cast is used. | ||
| 1125 |   /// | ||
| 1126 |   /// For zext/sext, the context of the cast is the operand, which must be a | ||
| 1127 |   /// load of some kind. For trunc, the context is of the cast is the single | ||
| 1128 |   /// user of the instruction, which must be a store of some kind. | ||
| 1129 |   /// | ||
| 1130 |   /// This enum allows the vectorizer to give getCastInstrCost an idea of the | ||
| 1131 |   /// type of cast it's dealing with, as not every cast is equal. For instance, | ||
| 1132 |   /// the zext of a load may be free, but the zext of an interleaving load can | ||
| 1133 |   //// be (very) expensive! | ||
| 1134 |   /// | ||
| 1135 |   /// See \c getCastContextHint to compute a CastContextHint from a cast | ||
| 1136 |   /// Instruction*. Callers can use it if they don't need to override the | ||
| 1137 |   /// context and just want it to be calculated from the instruction. | ||
| 1138 |   /// | ||
| 1139 |   /// FIXME: This handles the types of load/store that the vectorizer can | ||
| 1140 |   /// produce, which are the cases where the context instruction is most | ||
| 1141 |   /// likely to be incorrect. There are other situations where that can happen | ||
| 1142 |   /// too, which might be handled here but in the long run a more general | ||
| 1143 |   /// solution of costing multiple instructions at the same times may be better. | ||
| 1144 | enum class CastContextHint : uint8_t { | ||
| 1145 |     None,          ///< The cast is not used with a load/store of any kind. | ||
| 1146 |     Normal,        ///< The cast is used with a normal load/store. | ||
| 1147 |     Masked,        ///< The cast is used with a masked load/store. | ||
| 1148 |     GatherScatter, ///< The cast is used with a gather/scatter. | ||
| 1149 |     Interleave,    ///< The cast is used with an interleaved load/store. | ||
| 1150 |     Reversed,      ///< The cast is used with a reversed load/store. | ||
| 1151 | }; | ||
| 1152 | |||
| 1153 |   /// Calculates a CastContextHint from \p I. | ||
| 1154 |   /// This should be used by callers of getCastInstrCost if they wish to | ||
| 1155 |   /// determine the context from some instruction. | ||
| 1156 |   /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr, | ||
| 1157 |   /// or if it's another type of cast. | ||
| 1158 | static CastContextHint getCastContextHint(const Instruction *I); | ||
| 1159 | |||
| 1160 |   /// \return The expected cost of cast instructions, such as bitcast, trunc, | ||
| 1161 |   /// zext, etc. If there is an existing instruction that holds Opcode, it | ||
| 1162 |   /// may be passed in the 'I' parameter. | ||
| 1163 | InstructionCost | ||
| 1164 | getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, | ||
| 1165 | TTI::CastContextHint CCH, | ||
| 1166 | TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, | ||
| 1167 | const Instruction *I = nullptr) const; | ||
| 1168 | |||
| 1169 |   /// \return The expected cost of a sign- or zero-extended vector extract. Use | ||
| 1170 |   /// Index = -1 to indicate that there is no information about the index value. | ||
| 1171 | InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, | ||
| 1172 |                                            VectorType *VecTy, | ||
| 1173 | unsigned Index) const; | ||
| 1174 | |||
| 1175 |   /// \return The expected cost of control-flow related instructions such as | ||
| 1176 |   /// Phi, Ret, Br, Switch. | ||
| 1177 | InstructionCost | ||
| 1178 | getCFInstrCost(unsigned Opcode, | ||
| 1179 | TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, | ||
| 1180 | const Instruction *I = nullptr) const; | ||
| 1181 | |||
| 1182 |   /// \returns The expected cost of compare and select instructions. If there | ||
| 1183 |   /// is an existing instruction that holds Opcode, it may be passed in the | ||
| 1184 |   /// 'I' parameter. The \p VecPred parameter can be used to indicate the select | ||
| 1185 |   /// is using a compare with the specified predicate as condition. When vector | ||
| 1186 |   /// types are passed, \p VecPred must be used for all lanes. | ||
| 1187 | InstructionCost | ||
| 1188 | getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, | ||
| 1189 | CmpInst::Predicate VecPred, | ||
| 1190 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, | ||
| 1191 | const Instruction *I = nullptr) const; | ||
| 1192 | |||
| 1193 |   /// \return The expected cost of vector Insert and Extract. | ||
| 1194 |   /// Use -1 to indicate that there is no information on the index value. | ||
| 1195 |   /// This is used when the instruction is not available; a typical use | ||
| 1196 |   /// case is to provision the cost of vectorization/scalarization in | ||
| 1197 |   /// vectorizer passes. | ||
| 1198 | InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, | ||
| 1199 | TTI::TargetCostKind CostKind, | ||
| 1200 | unsigned Index = -1, Value *Op0 = nullptr, | ||
| 1201 | Value *Op1 = nullptr) const; | ||
| 1202 | |||
| 1203 |   /// \return The expected cost of vector Insert and Extract. | ||
| 1204 |   /// This is used when instruction is available, and implementation | ||
| 1205 |   /// asserts 'I' is not nullptr. | ||
| 1206 |   /// | ||
| 1207 |   /// A typical suitable use case is cost estimation when vector instruction | ||
| 1208 |   /// exists (e.g., from basic blocks during transformation). | ||
| 1209 | InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, | ||
| 1210 | TTI::TargetCostKind CostKind, | ||
| 1211 | unsigned Index = -1) const; | ||
| 1212 | |||
| 1213 |   /// \return The cost of replication shuffle of \p VF elements typed \p EltTy | ||
| 1214 |   /// \p ReplicationFactor times. | ||
| 1215 |   /// | ||
| 1216 |   /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is: | ||
| 1217 |   ///   <0,0,0,1,1,1,2,2,2,3,3,3> | ||
| 1218 | InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, | ||
| 1219 |                                             int VF, | ||
| 1220 | const APInt &DemandedDstElts, | ||
| 1221 | TTI::TargetCostKind CostKind); | ||
| 1222 | |||
| 1223 |   /// \return The cost of Load and Store instructions. | ||
| 1224 | InstructionCost | ||
| 1225 | getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, | ||
| 1226 |                   unsigned AddressSpace, | ||
| 1227 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, | ||
| 1228 | OperandValueInfo OpdInfo = {OK_AnyValue, OP_None}, | ||
| 1229 | const Instruction *I = nullptr) const; | ||
| 1230 | |||
| 1231 |   /// \return The cost of VP Load and Store instructions. | ||
| 1232 | InstructionCost | ||
| 1233 | getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, | ||
| 1234 |                     unsigned AddressSpace, | ||
| 1235 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, | ||
| 1236 | const Instruction *I = nullptr) const; | ||
| 1237 | |||
| 1238 |   /// \return The cost of masked Load and Store instructions. | ||
| 1239 |   InstructionCost getMaskedMemoryOpCost( | ||
| 1240 | unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, | ||
| 1241 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; | ||
| 1242 | |||
| 1243 |   /// \return The cost of Gather or Scatter operation | ||
| 1244 |   /// \p Opcode - is a type of memory access Load or Store | ||
| 1245 |   /// \p DataTy - a vector type of the data to be loaded or stored | ||
| 1246 |   /// \p Ptr - pointer [or vector of pointers] - address[es] in memory | ||
| 1247 |   /// \p VariableMask - true when the memory access is predicated with a mask | ||
| 1248 |   ///                   that is not a compile-time constant | ||
| 1249 |   /// \p Alignment - alignment of single element | ||
| 1250 |   /// \p I - the optional original context instruction, if one exists, e.g. the | ||
| 1251 |   ///        load/store to transform or the call to the gather/scatter intrinsic | ||
| 1252 |   InstructionCost getGatherScatterOpCost( | ||
| 1253 | unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, | ||
| 1254 | Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, | ||
| 1255 | const Instruction *I = nullptr) const; | ||
| 1256 | |||
| 1257 |   /// \return The cost of the interleaved memory operation. | ||
| 1258 |   /// \p Opcode is the memory operation code | ||
| 1259 |   /// \p VecTy is the vector type of the interleaved access. | ||
| 1260 |   /// \p Factor is the interleave factor | ||
| 1261 |   /// \p Indices is the indices for interleaved load members (as interleaved | ||
| 1262 |   ///    load allows gaps) | ||
| 1263 |   /// \p Alignment is the alignment of the memory operation | ||
| 1264 |   /// \p AddressSpace is address space of the pointer. | ||
| 1265 |   /// \p UseMaskForCond indicates if the memory access is predicated. | ||
| 1266 |   /// \p UseMaskForGaps indicates if gaps should be masked. | ||
| 1267 |   InstructionCost getInterleavedMemoryOpCost( | ||
| 1268 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, | ||
| 1269 |       Align Alignment, unsigned AddressSpace, | ||
| 1270 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, | ||
| 1271 | bool UseMaskForCond = false, bool UseMaskForGaps = false) const; | ||
| 1272 | |||
| 1273 |   /// A helper function to determine the type of reduction algorithm used | ||
| 1274 |   /// for a given \p Opcode and set of FastMathFlags \p FMF. | ||
| 1275 | static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) { | ||
| 1276 | return FMF && !(*FMF).allowReassoc(); | ||
| 1277 |   } | ||
| 1278 | |||
| 1279 |   /// Calculate the cost of vector reduction intrinsics. | ||
| 1280 |   /// | ||
| 1281 |   /// This is the cost of reducing the vector value of type \p Ty to a scalar | ||
| 1282 |   /// value using the operation denoted by \p Opcode. The FastMathFlags | ||
| 1283 |   /// parameter \p FMF indicates what type of reduction we are performing: | ||
| 1284 |   ///   1. Tree-wise. This is the typical 'fast' reduction performed that | ||
| 1285 |   ///   involves successively splitting a vector into half and doing the | ||
| 1286 |   ///   operation on the pair of halves until you have a scalar value. For | ||
| 1287 |   ///   example: | ||
| 1288 |   ///     (v0, v1, v2, v3) | ||
| 1289 |   ///     ((v0+v2), (v1+v3), undef, undef) | ||
| 1290 |   ///     ((v0+v2+v1+v3), undef, undef, undef) | ||
| 1291 |   ///   This is the default behaviour for integer operations, whereas for | ||
| 1292 |   ///   floating point we only do this if \p FMF indicates that | ||
| 1293 |   ///   reassociation is allowed. | ||
| 1294 |   ///   2. Ordered. For a vector with N elements this involves performing N | ||
| 1295 |   ///   operations in lane order, starting with an initial scalar value, i.e. | ||
| 1296 |   ///     result = InitVal + v0 | ||
| 1297 |   ///     result = result + v1 | ||
| 1298 |   ///     result = result + v2 | ||
| 1299 |   ///     result = result + v3 | ||
| 1300 |   ///   This is only the case for FP operations and when reassociation is not | ||
| 1301 |   ///   allowed. | ||
| 1302 |   /// | ||
| 1303 |   InstructionCost getArithmeticReductionCost( | ||
| 1304 | unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF, | ||
| 1305 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; | ||
| 1306 | |||
| 1307 |   InstructionCost getMinMaxReductionCost( | ||
| 1308 | VectorType *Ty, VectorType *CondTy, bool IsUnsigned, | ||
| 1309 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; | ||
| 1310 | |||
| 1311 |   /// Calculate the cost of an extended reduction pattern, similar to | ||
| 1312 |   /// getArithmeticReductionCost of an Add reduction with multiply and optional | ||
| 1313 |   /// extensions. This is the cost of as: | ||
| 1314 |   /// ResTy vecreduce.add(mul (A, B)). | ||
| 1315 |   /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)). | ||
| 1316 |   InstructionCost getMulAccReductionCost( | ||
| 1317 | bool IsUnsigned, Type *ResTy, VectorType *Ty, | ||
| 1318 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; | ||
| 1319 | |||
| 1320 |   /// Calculate the cost of an extended reduction pattern, similar to | ||
| 1321 |   /// getArithmeticReductionCost of a reduction with an extension. | ||
| 1322 |   /// This is the cost of as: | ||
| 1323 |   /// ResTy vecreduce.opcode(ext(Ty A)). | ||
| 1324 |   InstructionCost getExtendedReductionCost( | ||
| 1325 | unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, | ||
| 1326 | std::optional<FastMathFlags> FMF, | ||
| 1327 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; | ||
| 1328 | |||
| 1329 |   /// \returns The cost of Intrinsic instructions. Analyses the real arguments. | ||
| 1330 |   /// Three cases are handled: 1. scalar instruction 2. vector instruction | ||
| 1331 |   /// 3. scalar instruction which is to be vectorized. | ||
| 1332 | InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, | ||
| 1333 | TTI::TargetCostKind CostKind) const; | ||
| 1334 | |||
| 1335 |   /// \returns The cost of Call instructions. | ||
| 1336 |   InstructionCost getCallInstrCost( | ||
| 1337 | Function *F, Type *RetTy, ArrayRef<Type *> Tys, | ||
| 1338 | TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const; | ||
| 1339 | |||
| 1340 |   /// \returns The number of pieces into which the provided type must be | ||
| 1341 |   /// split during legalization. Zero is returned when the answer is unknown. | ||
| 1342 | unsigned getNumberOfParts(Type *Tp) const; | ||
| 1343 | |||
| 1344 |   /// \returns The cost of the address computation. For most targets this can be | ||
| 1345 |   /// merged into the instruction indexing mode. Some targets might want to | ||
| 1346 |   /// distinguish between address computation for memory operations on vector | ||
| 1347 |   /// types and scalar types. Such targets should override this function. | ||
| 1348 |   /// The 'SE' parameter holds pointer for the scalar evolution object which | ||
| 1349 |   /// is used in order to get the Ptr step value in case of constant stride. | ||
| 1350 |   /// The 'Ptr' parameter holds SCEV of the access pointer. | ||
| 1351 | InstructionCost getAddressComputationCost(Type *Ty, | ||
| 1352 | ScalarEvolution *SE = nullptr, | ||
| 1353 | const SCEV *Ptr = nullptr) const; | ||
| 1354 | |||
| 1355 |   /// \returns The cost, if any, of keeping values of the given types alive | ||
| 1356 |   /// over a callsite. | ||
| 1357 |   /// | ||
| 1358 |   /// Some types may require the use of register classes that do not have | ||
| 1359 |   /// any callee-saved registers, so would require a spill and fill. | ||
| 1360 | InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const; | ||
| 1361 | |||
| 1362 |   /// \returns True if the intrinsic is a supported memory intrinsic.  Info | ||
| 1363 |   /// will contain additional information - whether the intrinsic may write | ||
| 1364 |   /// or read to memory, volatility and the pointer.  Info is undefined | ||
| 1365 |   /// if false is returned. | ||
| 1366 | bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; | ||
| 1367 | |||
| 1368 |   /// \returns The maximum element size, in bytes, for an element | ||
| 1369 |   /// unordered-atomic memory intrinsic. | ||
| 1370 | unsigned getAtomicMemIntrinsicMaxElementSize() const; | ||
| 1371 | |||
| 1372 |   /// \returns A value which is the result of the given memory intrinsic.  New | ||
| 1373 |   /// instructions may be created to extract the result from the given intrinsic | ||
| 1374 |   /// memory operation.  Returns nullptr if the target cannot create a result | ||
| 1375 |   /// from the given intrinsic. | ||
| 1376 | Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, | ||
| 1377 | Type *ExpectedType) const; | ||
| 1378 | |||
| 1379 |   /// \returns The type to use in a loop expansion of a memcpy call. | ||
| 1380 | Type *getMemcpyLoopLoweringType( | ||
| 1381 | LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, | ||
| 1382 | unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, | ||
| 1383 | std::optional<uint32_t> AtomicElementSize = std::nullopt) const; | ||
| 1384 | |||
| 1385 |   /// \param[out] OpsOut The operand types to copy RemainingBytes of memory. | ||
| 1386 |   /// \param RemainingBytes The number of bytes to copy. | ||
| 1387 |   /// | ||
| 1388 |   /// Calculates the operand types to use when copying \p RemainingBytes of | ||
| 1389 |   /// memory, where source and destination alignments are \p SrcAlign and | ||
| 1390 |   /// \p DestAlign respectively. | ||
| 1391 | void getMemcpyLoopResidualLoweringType( | ||
| 1392 | SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, | ||
| 1393 | unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, | ||
| 1394 | unsigned SrcAlign, unsigned DestAlign, | ||
| 1395 | std::optional<uint32_t> AtomicCpySize = std::nullopt) const; | ||
| 1396 | |||
| 1397 |   /// \returns True if the two functions have compatible attributes for inlining | ||
| 1398 |   /// purposes. | ||
| 1399 | bool areInlineCompatible(const Function *Caller, | ||
| 1400 | const Function *Callee) const; | ||
| 1401 | |||
| 1402 |   /// \returns True if the caller and callee agree on how \p Types will be | ||
| 1403 |   /// passed to or returned from the callee. | ||
| 1404 |   /// to the callee. | ||
| 1405 |   /// \param Types List of types to check. | ||
| 1406 | bool areTypesABICompatible(const Function *Caller, const Function *Callee, | ||
| 1407 | const ArrayRef<Type *> &Types) const; | ||
| 1408 | |||
| 1409 |   /// The type of load/store indexing. | ||
| 1410 | enum MemIndexedMode { | ||
| 1411 |     MIM_Unindexed, ///< No indexing. | ||
| 1412 |     MIM_PreInc,    ///< Pre-incrementing. | ||
| 1413 |     MIM_PreDec,    ///< Pre-decrementing. | ||
| 1414 |     MIM_PostInc,   ///< Post-incrementing. | ||
| 1415 |     MIM_PostDec    ///< Post-decrementing. | ||
| 1416 | }; | ||
| 1417 | |||
| 1418 |   /// \returns True if the specified indexed load for the given type is legal. | ||
| 1419 | bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const; | ||
| 1420 | |||
| 1421 |   /// \returns True if the specified indexed store for the given type is legal. | ||
| 1422 | bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const; | ||
| 1423 | |||
| 1424 |   /// \returns The bitwidth of the largest vector type that should be used to | ||
| 1425 |   /// load/store in the given address space. | ||
| 1426 | unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; | ||
| 1427 | |||
| 1428 |   /// \returns True if the load instruction is legal to vectorize. | ||
| 1429 | bool isLegalToVectorizeLoad(LoadInst *LI) const; | ||
| 1430 | |||
| 1431 |   /// \returns True if the store instruction is legal to vectorize. | ||
| 1432 | bool isLegalToVectorizeStore(StoreInst *SI) const; | ||
| 1433 | |||
| 1434 |   /// \returns True if it is legal to vectorize the given load chain. | ||
| 1435 | bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, | ||
| 1436 | unsigned AddrSpace) const; | ||
| 1437 | |||
| 1438 |   /// \returns True if it is legal to vectorize the given store chain. | ||
| 1439 | bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, | ||
| 1440 | unsigned AddrSpace) const; | ||
| 1441 | |||
| 1442 |   /// \returns True if it is legal to vectorize the given reduction kind. | ||
| 1443 | bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, | ||
| 1444 | ElementCount VF) const; | ||
| 1445 | |||
| 1446 |   /// \returns True if the given type is supported for scalable vectors | ||
| 1447 | bool isElementTypeLegalForScalableVector(Type *Ty) const; | ||
| 1448 | |||
| 1449 |   /// \returns The new vector factor value if the target doesn't support \p | ||
| 1450 |   /// SizeInBytes loads or has a better vector factor. | ||
| 1451 | unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, | ||
| 1452 |                                unsigned ChainSizeInBytes, | ||
| 1453 | VectorType *VecTy) const; | ||
| 1454 | |||
| 1455 |   /// \returns The new vector factor value if the target doesn't support \p | ||
| 1456 |   /// SizeInBytes stores or has a better vector factor. | ||
| 1457 | unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, | ||
| 1458 |                                 unsigned ChainSizeInBytes, | ||
| 1459 | VectorType *VecTy) const; | ||
| 1460 | |||
| 1461 |   /// Flags describing the kind of vector reduction. | ||
| 1462 | struct ReductionFlags { | ||
| 1463 | ReductionFlags() = default; | ||
| 1464 | bool IsMaxOp = | ||
| 1465 | false; ///< If the op a min/max kind, true if it's a max operation. | ||
| 1466 | bool IsSigned = false; ///< Whether the operation is a signed int reduction. | ||
| 1467 | bool NoNaN = | ||
| 1468 | false; ///< If op is an fp min/max, whether NaNs may be present. | ||
| 1469 | }; | ||
| 1470 | |||
| 1471 |   /// \returns True if the target prefers reductions in loop. | ||
| 1472 | bool preferInLoopReduction(unsigned Opcode, Type *Ty, | ||
| 1473 | ReductionFlags Flags) const; | ||
| 1474 | |||
| 1475 |   /// \returns True if the target prefers reductions select kept in the loop | ||
| 1476 |   /// when tail folding. i.e. | ||
| 1477 |   /// loop: | ||
| 1478 |   ///   p = phi (0, s) | ||
| 1479 |   ///   a = add (p, x) | ||
| 1480 |   ///   s = select (mask, a, p) | ||
| 1481 |   /// vecreduce.add(s) | ||
| 1482 |   /// | ||
| 1483 |   /// As opposed to the normal scheme of p = phi (0, a) which allows the select | ||
| 1484 |   /// to be pulled out of the loop. If the select(.., add, ..) can be predicated | ||
| 1485 |   /// by the target, this can lead to cleaner code generation. | ||
| 1486 | bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, | ||
| 1487 | ReductionFlags Flags) const; | ||
| 1488 | |||
| 1489 |   /// Return true if the loop vectorizer should consider vectorizing an | ||
| 1490 |   /// otherwise scalar epilogue loop. | ||
| 1491 | bool preferEpilogueVectorization() const; | ||
| 1492 | |||
| 1493 |   /// \returns True if the target wants to expand the given reduction intrinsic | ||
| 1494 |   /// into a shuffle sequence. | ||
| 1495 | bool shouldExpandReduction(const IntrinsicInst *II) const; | ||
| 1496 | |||
| 1497 |   /// \returns the size cost of rematerializing a GlobalValue address relative | ||
| 1498 |   /// to a stack reload. | ||
| 1499 | unsigned getGISelRematGlobalCost() const; | ||
| 1500 | |||
| 1501 |   /// \returns the lower bound of a trip count to decide on vectorization | ||
| 1502 |   /// while tail-folding. | ||
| 1503 | unsigned getMinTripCountTailFoldingThreshold() const; | ||
| 1504 | |||
| 1505 |   /// \returns True if the target supports scalable vectors. | ||
| 1506 | bool supportsScalableVectors() const; | ||
| 1507 | |||
| 1508 |   /// \return true when scalable vectorization is preferred. | ||
| 1509 | bool enableScalableVectorization() const; | ||
| 1510 | |||
| 1511 |   /// \name Vector Predication Information | ||
| 1512 |   /// @{ | ||
| 1513 |   /// Whether the target supports the %evl parameter of VP intrinsic efficiently | ||
| 1514 |   /// in hardware, for the given opcode and type/alignment. (see LLVM Language | ||
| 1515 |   /// Reference - "Vector Predication Intrinsics"). | ||
| 1516 |   /// Use of %evl is discouraged when that is not the case. | ||
| 1517 | bool hasActiveVectorLength(unsigned Opcode, Type *DataType, | ||
| 1518 | Align Alignment) const; | ||
| 1519 | |||
| 1520 | struct VPLegalization { | ||
| 1521 | enum VPTransform { | ||
| 1522 |       // keep the predicating parameter | ||
| 1523 | Legal = 0, | ||
| 1524 |       // where legal, discard the predicate parameter | ||
| 1525 | Discard = 1, | ||
| 1526 |       // transform into something else that is also predicating | ||
| 1527 | Convert = 2 | ||
| 1528 | }; | ||
| 1529 | |||
| 1530 |     // How to transform the EVL parameter. | ||
| 1531 |     // Legal:   keep the EVL parameter as it is. | ||
| 1532 |     // Discard: Ignore the EVL parameter where it is safe to do so. | ||
| 1533 |     // Convert: Fold the EVL into the mask parameter. | ||
| 1534 |     VPTransform EVLParamStrategy; | ||
| 1535 | |||
| 1536 |     // How to transform the operator. | ||
| 1537 |     // Legal:   The target supports this operator. | ||
| 1538 |     // Convert: Convert this to a non-VP operation. | ||
| 1539 |     // The 'Discard' strategy is invalid. | ||
| 1540 |     VPTransform OpStrategy; | ||
| 1541 | |||
| 1542 | bool shouldDoNothing() const { | ||
| 1543 | return (EVLParamStrategy == Legal) && (OpStrategy == Legal); | ||
| 1544 |     } | ||
| 1545 | VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy) | ||
| 1546 | : EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {} | ||
| 1547 | }; | ||
| 1548 | |||
| 1549 |   /// \returns How the target needs this vector-predicated operation to be | ||
| 1550 |   /// transformed. | ||
| 1551 | VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const; | ||
| 1552 |   /// @} | ||
| 1553 | |||
| 1554 |   /// @} | ||
| 1555 | |||
| 1556 | private: | ||
| 1557 |   /// The abstract base class used to type erase specific TTI | ||
| 1558 |   /// implementations. | ||
| 1559 | class Concept; | ||
| 1560 | |||
| 1561 |   /// The template model for the base class which wraps a concrete | ||
| 1562 |   /// implementation in a type erased interface. | ||
| 1563 | template <typename T> class Model; | ||
| 1564 | |||
| 1565 | std::unique_ptr<Concept> TTIImpl; | ||
| 1566 | }; | ||
| 1567 | |||
| 1568 | class TargetTransformInfo::Concept { | ||
| 1569 | public: | ||
| 1570 | virtual ~Concept() = 0; | ||
| 1571 | virtual const DataLayout &getDataLayout() const = 0; | ||
| 1572 | virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, | ||
| 1573 | ArrayRef<const Value *> Operands, | ||
| 1574 | TTI::TargetCostKind CostKind) = 0; | ||
| 1575 | virtual unsigned getInliningThresholdMultiplier() = 0; | ||
| 1576 | virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0; | ||
| 1577 | virtual int getInlinerVectorBonusPercent() = 0; | ||
| 1578 | virtual InstructionCost getMemcpyCost(const Instruction *I) = 0; | ||
| 1579 | virtual unsigned | ||
| 1580 | getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, | ||
| 1581 |                                    ProfileSummaryInfo *PSI, | ||
| 1582 | BlockFrequencyInfo *BFI) = 0; | ||
| 1583 | virtual InstructionCost getInstructionCost(const User *U, | ||
| 1584 | ArrayRef<const Value *> Operands, | ||
| 1585 | TargetCostKind CostKind) = 0; | ||
| 1586 | virtual BranchProbability getPredictableBranchThreshold() = 0; | ||
| 1587 | virtual bool hasBranchDivergence() = 0; | ||
| 1588 | virtual bool useGPUDivergenceAnalysis() = 0; | ||
| 1589 | virtual bool isSourceOfDivergence(const Value *V) = 0; | ||
| 1590 | virtual bool isAlwaysUniform(const Value *V) = 0; | ||
| 1591 | virtual unsigned getFlatAddressSpace() = 0; | ||
| 1592 | virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, | ||
| 1593 | Intrinsic::ID IID) const = 0; | ||
| 1594 | virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0; | ||
| 1595 | virtual bool | ||
| 1596 | canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0; | ||
| 1597 | virtual unsigned getAssumedAddrSpace(const Value *V) const = 0; | ||
| 1598 | virtual bool isSingleThreaded() const = 0; | ||
| 1599 | virtual std::pair<const Value *, unsigned> | ||
| 1600 | getPredicatedAddrSpace(const Value *V) const = 0; | ||
| 1601 | virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, | ||
| 1602 |                                                   Value *OldV, | ||
| 1603 | Value *NewV) const = 0; | ||
| 1604 | virtual bool isLoweredToCall(const Function *F) = 0; | ||
| 1605 | virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, | ||
| 1606 |                                        UnrollingPreferences &UP, | ||
| 1607 | OptimizationRemarkEmitter *ORE) = 0; | ||
| 1608 | virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, | ||
| 1609 | PeelingPreferences &PP) = 0; | ||
| 1610 | virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, | ||
| 1611 |                                         AssumptionCache &AC, | ||
| 1612 |                                         TargetLibraryInfo *LibInfo, | ||
| 1613 | HardwareLoopInfo &HWLoopInfo) = 0; | ||
| 1614 | virtual bool | ||
| 1615 | preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, | ||
| 1616 | AssumptionCache &AC, TargetLibraryInfo *TLI, | ||
| 1617 | DominatorTree *DT, LoopVectorizationLegality *LVL, | ||
| 1618 | InterleavedAccessInfo *IAI) = 0; | ||
| 1619 | virtual PredicationStyle emitGetActiveLaneMask() = 0; | ||
| 1620 | virtual std::optional<Instruction *> instCombineIntrinsic( | ||
| 1621 | InstCombiner &IC, IntrinsicInst &II) = 0; | ||
| 1622 | virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic( | ||
| 1623 | InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, | ||
| 1624 | KnownBits & Known, bool &KnownBitsComputed) = 0; | ||
| 1625 | virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( | ||
| 1626 | InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, | ||
| 1627 | APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, | ||
| 1628 | std::function<void(Instruction *, unsigned, APInt, APInt &)> | ||
| 1629 | SimplifyAndSetOp) = 0; | ||
| 1630 | virtual bool isLegalAddImmediate(int64_t Imm) = 0; | ||
| 1631 | virtual bool isLegalICmpImmediate(int64_t Imm) = 0; | ||
| 1632 | virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, | ||
| 1633 | int64_t BaseOffset, bool HasBaseReg, | ||
| 1634 | int64_t Scale, unsigned AddrSpace, | ||
| 1635 | Instruction *I) = 0; | ||
| 1636 | virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, | ||
| 1637 | const TargetTransformInfo::LSRCost &C2) = 0; | ||
| 1638 | virtual bool isNumRegsMajorCostOfLSR() = 0; | ||
| 1639 | virtual bool isProfitableLSRChainElement(Instruction *I) = 0; | ||
| 1640 | virtual bool canMacroFuseCmp() = 0; | ||
| 1641 | virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, | ||
| 1642 | LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, | ||
| 1643 | TargetLibraryInfo *LibInfo) = 0; | ||
| 1644 |   virtual AddressingModeKind | ||
| 1645 | getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0; | ||
| 1646 | virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0; | ||
| 1647 | virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0; | ||
| 1648 | virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0; | ||
| 1649 | virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0; | ||
| 1650 | virtual bool isLegalBroadcastLoad(Type *ElementTy, | ||
| 1651 | ElementCount NumElements) const = 0; | ||
| 1652 | virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0; | ||
| 1653 | virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0; | ||
| 1654 | virtual bool forceScalarizeMaskedGather(VectorType *DataType, | ||
| 1655 | Align Alignment) = 0; | ||
| 1656 | virtual bool forceScalarizeMaskedScatter(VectorType *DataType, | ||
| 1657 | Align Alignment) = 0; | ||
| 1658 | virtual bool isLegalMaskedCompressStore(Type *DataType) = 0; | ||
| 1659 | virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0; | ||
| 1660 | virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, | ||
| 1661 |                                unsigned Opcode1, | ||
| 1662 | const SmallBitVector &OpcodeMask) const = 0; | ||
| 1663 | virtual bool enableOrderedReductions() = 0; | ||
| 1664 | virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0; | ||
| 1665 | virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0; | ||
| 1666 | virtual bool prefersVectorizedAddressing() = 0; | ||
| 1667 | virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, | ||
| 1668 |                                                int64_t BaseOffset, | ||
| 1669 | bool HasBaseReg, int64_t Scale, | ||
| 1670 | unsigned AddrSpace) = 0; | ||
| 1671 | virtual bool LSRWithInstrQueries() = 0; | ||
| 1672 | virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0; | ||
| 1673 | virtual bool isProfitableToHoist(Instruction *I) = 0; | ||
| 1674 | virtual bool useAA() = 0; | ||
| 1675 | virtual bool isTypeLegal(Type *Ty) = 0; | ||
| 1676 | virtual unsigned getRegUsageForType(Type *Ty) = 0; | ||
| 1677 | virtual bool shouldBuildLookupTables() = 0; | ||
| 1678 | virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; | ||
| 1679 | virtual bool shouldBuildRelLookupTables() = 0; | ||
| 1680 | virtual bool useColdCCForColdCall(Function &F) = 0; | ||
| 1681 | virtual InstructionCost getScalarizationOverhead(VectorType *Ty, | ||
| 1682 | const APInt &DemandedElts, | ||
| 1683 | bool Insert, bool Extract, | ||
| 1684 | TargetCostKind CostKind) = 0; | ||
| 1685 |   virtual InstructionCost | ||
| 1686 | getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, | ||
| 1687 | ArrayRef<Type *> Tys, | ||
| 1688 | TargetCostKind CostKind) = 0; | ||
| 1689 | virtual bool supportsEfficientVectorElementLoadStore() = 0; | ||
| 1690 | virtual bool supportsTailCalls() = 0; | ||
| 1691 | virtual bool supportsTailCallFor(const CallBase *CB) = 0; | ||
| 1692 | virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; | ||
| 1693 |   virtual MemCmpExpansionOptions | ||
| 1694 | enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0; | ||
| 1695 | virtual bool enableSelectOptimize() = 0; | ||
| 1696 | virtual bool enableInterleavedAccessVectorization() = 0; | ||
| 1697 | virtual bool enableMaskedInterleavedAccessVectorization() = 0; | ||
| 1698 | virtual bool isFPVectorizationPotentiallyUnsafe() = 0; | ||
| 1699 | virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, | ||
| 1700 |                                               unsigned BitWidth, | ||
| 1701 |                                               unsigned AddressSpace, | ||
| 1702 | Align Alignment, | ||
| 1703 | unsigned *Fast) = 0; | ||
| 1704 | virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; | ||
| 1705 | virtual bool haveFastSqrt(Type *Ty) = 0; | ||
| 1706 | virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I) = 0; | ||
| 1707 | virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0; | ||
| 1708 | virtual InstructionCost getFPOpCost(Type *Ty) = 0; | ||
| 1709 | virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, | ||
| 1710 | const APInt &Imm, Type *Ty) = 0; | ||
| 1711 | virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, | ||
| 1712 | TargetCostKind CostKind) = 0; | ||
| 1713 | virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, | ||
| 1714 | const APInt &Imm, Type *Ty, | ||
| 1715 | TargetCostKind CostKind, | ||
| 1716 | Instruction *Inst = nullptr) = 0; | ||
| 1717 | virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, | ||
| 1718 | const APInt &Imm, Type *Ty, | ||
| 1719 | TargetCostKind CostKind) = 0; | ||
| 1720 | virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0; | ||
| 1721 | virtual unsigned getRegisterClassForType(bool Vector, | ||
| 1722 | Type *Ty = nullptr) const = 0; | ||
| 1723 | virtual const char *getRegisterClassName(unsigned ClassID) const = 0; | ||
| 1724 | virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0; | ||
| 1725 | virtual unsigned getMinVectorRegisterBitWidth() const = 0; | ||
| 1726 | virtual std::optional<unsigned> getMaxVScale() const = 0; | ||
| 1727 | virtual std::optional<unsigned> getVScaleForTuning() const = 0; | ||
| 1728 | virtual bool | ||
| 1729 | shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const = 0; | ||
| 1730 | virtual ElementCount getMinimumVF(unsigned ElemWidth, | ||
| 1731 | bool IsScalable) const = 0; | ||
| 1732 | virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0; | ||
| 1733 | virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, | ||
| 1734 | Type *ScalarValTy) const = 0; | ||
| 1735 | virtual bool shouldConsiderAddressTypePromotion( | ||
| 1736 | const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; | ||
| 1737 | virtual unsigned getCacheLineSize() const = 0; | ||
| 1738 | virtual std::optional<unsigned> getCacheSize(CacheLevel Level) const = 0; | ||
| 1739 | virtual std::optional<unsigned> getCacheAssociativity(CacheLevel Level) | ||
| 1740 | const = 0; | ||
| 1741 | |||
| 1742 |   /// \return How much before a load we should place the prefetch | ||
| 1743 |   /// instruction.  This is currently measured in number of | ||
| 1744 |   /// instructions. | ||
| 1745 | virtual unsigned getPrefetchDistance() const = 0; | ||
| 1746 | |||
| 1747 |   /// \return Some HW prefetchers can handle accesses up to a certain | ||
| 1748 |   /// constant stride.  This is the minimum stride in bytes where it | ||
| 1749 |   /// makes sense to start adding SW prefetches.  The default is 1, | ||
| 1750 |   /// i.e. prefetch with any stride.  Sometimes prefetching is beneficial | ||
| 1751 |   /// even below the HW prefetcher limit, and the arguments provided are | ||
| 1752 |   /// meant to serve as a basis for deciding this for a particular loop. | ||
| 1753 | virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, | ||
| 1754 |                                         unsigned NumStridedMemAccesses, | ||
| 1755 |                                         unsigned NumPrefetches, | ||
| 1756 | bool HasCall) const = 0; | ||
| 1757 | |||
| 1758 |   /// \return The maximum number of iterations to prefetch ahead.  If | ||
| 1759 |   /// the required number of iterations is more than this number, no | ||
| 1760 |   /// prefetching is performed. | ||
| 1761 | virtual unsigned getMaxPrefetchIterationsAhead() const = 0; | ||
| 1762 | |||
| 1763 |   /// \return True if prefetching should also be done for writes. | ||
| 1764 | virtual bool enableWritePrefetching() const = 0; | ||
| 1765 | |||
| 1766 |   /// \return if target want to issue a prefetch in address space \p AS. | ||
| 1767 | virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0; | ||
| 1768 | |||
| 1769 | virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; | ||
| 1770 | virtual InstructionCost getArithmeticInstrCost( | ||
| 1771 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, | ||
| 1772 | OperandValueInfo Opd1Info, OperandValueInfo Opd2Info, | ||
| 1773 | ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0; | ||
| 1774 | |||
| 1775 | virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, | ||
| 1776 | ArrayRef<int> Mask, | ||
| 1777 | TTI::TargetCostKind CostKind, | ||
| 1778 | int Index, VectorType *SubTp, | ||
| 1779 | ArrayRef<const Value *> Args) = 0; | ||
| 1780 | virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, | ||
| 1781 |                                            Type *Src, CastContextHint CCH, | ||
| 1782 | TTI::TargetCostKind CostKind, | ||
| 1783 | const Instruction *I) = 0; | ||
| 1784 | virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, | ||
| 1785 |                                                    VectorType *VecTy, | ||
| 1786 | unsigned Index) = 0; | ||
| 1787 | virtual InstructionCost getCFInstrCost(unsigned Opcode, | ||
| 1788 | TTI::TargetCostKind CostKind, | ||
| 1789 | const Instruction *I = nullptr) = 0; | ||
| 1790 | virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, | ||
| 1791 |                                              Type *CondTy, | ||
| 1792 | CmpInst::Predicate VecPred, | ||
| 1793 | TTI::TargetCostKind CostKind, | ||
| 1794 | const Instruction *I) = 0; | ||
| 1795 | virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, | ||
| 1796 | TTI::TargetCostKind CostKind, | ||
| 1797 | unsigned Index, Value *Op0, | ||
| 1798 | Value *Op1) = 0; | ||
| 1799 | virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, | ||
| 1800 | TTI::TargetCostKind CostKind, | ||
| 1801 | unsigned Index) = 0; | ||
| 1802 | |||
| 1803 |   virtual InstructionCost | ||
| 1804 | getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, | ||
| 1805 | const APInt &DemandedDstElts, | ||
| 1806 | TTI::TargetCostKind CostKind) = 0; | ||
| 1807 | |||
| 1808 |   virtual InstructionCost | ||
| 1809 | getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, | ||
| 1810 | unsigned AddressSpace, TTI::TargetCostKind CostKind, | ||
| 1811 | OperandValueInfo OpInfo, const Instruction *I) = 0; | ||
| 1812 | virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, | ||
| 1813 | Align Alignment, | ||
| 1814 |                                             unsigned AddressSpace, | ||
| 1815 | TTI::TargetCostKind CostKind, | ||
| 1816 | const Instruction *I) = 0; | ||
| 1817 |   virtual InstructionCost | ||
| 1818 | getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, | ||
| 1819 |                         unsigned AddressSpace, | ||
| 1820 | TTI::TargetCostKind CostKind) = 0; | ||
| 1821 |   virtual InstructionCost | ||
| 1822 | getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, | ||
| 1823 |                          bool VariableMask, Align Alignment, | ||
| 1824 | TTI::TargetCostKind CostKind, | ||
| 1825 | const Instruction *I = nullptr) = 0; | ||
| 1826 | |||
| 1827 | virtual InstructionCost getInterleavedMemoryOpCost( | ||
| 1828 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, | ||
| 1829 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, | ||
| 1830 | bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0; | ||
| 1831 |   virtual InstructionCost | ||
| 1832 | getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, | ||
| 1833 | std::optional<FastMathFlags> FMF, | ||
| 1834 | TTI::TargetCostKind CostKind) = 0; | ||
| 1835 |   virtual InstructionCost | ||
| 1836 | getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, | ||
| 1837 | TTI::TargetCostKind CostKind) = 0; | ||
| 1838 | virtual InstructionCost getExtendedReductionCost( | ||
| 1839 | unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, | ||
| 1840 | std::optional<FastMathFlags> FMF, | ||
| 1841 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0; | ||
| 1842 | virtual InstructionCost getMulAccReductionCost( | ||
| 1843 | bool IsUnsigned, Type *ResTy, VectorType *Ty, | ||
| 1844 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0; | ||
| 1845 |   virtual InstructionCost | ||
| 1846 | getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, | ||
| 1847 | TTI::TargetCostKind CostKind) = 0; | ||
| 1848 | virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, | ||
| 1849 | ArrayRef<Type *> Tys, | ||
| 1850 | TTI::TargetCostKind CostKind) = 0; | ||
| 1851 | virtual unsigned getNumberOfParts(Type *Tp) = 0; | ||
| 1852 |   virtual InstructionCost | ||
| 1853 | getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr) = 0; | ||
| 1854 |   virtual InstructionCost | ||
| 1855 | getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0; | ||
| 1856 | virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, | ||
| 1857 | MemIntrinsicInfo &Info) = 0; | ||
| 1858 | virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0; | ||
| 1859 | virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, | ||
| 1860 | Type *ExpectedType) = 0; | ||
| 1861 | virtual Type *getMemcpyLoopLoweringType( | ||
| 1862 | LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, | ||
| 1863 | unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, | ||
| 1864 | std::optional<uint32_t> AtomicElementSize) const = 0; | ||
| 1865 | |||
| 1866 | virtual void getMemcpyLoopResidualLoweringType( | ||
| 1867 | SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, | ||
| 1868 | unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, | ||
| 1869 | unsigned SrcAlign, unsigned DestAlign, | ||
| 1870 | std::optional<uint32_t> AtomicCpySize) const = 0; | ||
| 1871 | virtual bool areInlineCompatible(const Function *Caller, | ||
| 1872 | const Function *Callee) const = 0; | ||
| 1873 | virtual bool areTypesABICompatible(const Function *Caller, | ||
| 1874 | const Function *Callee, | ||
| 1875 | const ArrayRef<Type *> &Types) const = 0; | ||
| 1876 | virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0; | ||
| 1877 | virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0; | ||
| 1878 | virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0; | ||
| 1879 | virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0; | ||
| 1880 | virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0; | ||
| 1881 | virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, | ||
| 1882 | Align Alignment, | ||
| 1883 | unsigned AddrSpace) const = 0; | ||
| 1884 | virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, | ||
| 1885 | Align Alignment, | ||
| 1886 | unsigned AddrSpace) const = 0; | ||
| 1887 | virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, | ||
| 1888 | ElementCount VF) const = 0; | ||
| 1889 | virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0; | ||
| 1890 | virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, | ||
| 1891 |                                        unsigned ChainSizeInBytes, | ||
| 1892 | VectorType *VecTy) const = 0; | ||
| 1893 | virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, | ||
| 1894 |                                         unsigned ChainSizeInBytes, | ||
| 1895 | VectorType *VecTy) const = 0; | ||
| 1896 | virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, | ||
| 1897 | ReductionFlags) const = 0; | ||
| 1898 | virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, | ||
| 1899 | ReductionFlags) const = 0; | ||
| 1900 | virtual bool preferEpilogueVectorization() const = 0; | ||
| 1901 | |||
| 1902 | virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0; | ||
| 1903 | virtual unsigned getGISelRematGlobalCost() const = 0; | ||
| 1904 | virtual unsigned getMinTripCountTailFoldingThreshold() const = 0; | ||
| 1905 | virtual bool enableScalableVectorization() const = 0; | ||
| 1906 | virtual bool supportsScalableVectors() const = 0; | ||
| 1907 | virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType, | ||
| 1908 | Align Alignment) const = 0; | ||
| 1909 |   virtual VPLegalization | ||
| 1910 | getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0; | ||
| 1911 | }; | ||
| 1912 | |||
| 1913 | template <typename T> | ||
| 1914 | class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { | ||
| 1915 |   T Impl; | ||
| 1916 | |||
| 1917 | public: | ||
| 1918 | Model(T Impl) : Impl(std::move(Impl)) {} | ||
| 1919 | ~Model() override = default; | ||
| 1920 | |||
| 1921 | const DataLayout &getDataLayout() const override { | ||
| 1922 | return Impl.getDataLayout(); | ||
| 1923 |   } | ||
| 1924 | |||
| 1925 | InstructionCost | ||
| 1926 | getGEPCost(Type *PointeeType, const Value *Ptr, | ||
| 1927 | ArrayRef<const Value *> Operands, | ||
| 1928 | TargetTransformInfo::TargetCostKind CostKind) override { | ||
| 1929 | return Impl.getGEPCost(PointeeType, Ptr, Operands, CostKind); | ||
| 1930 |   } | ||
| 1931 | unsigned getInliningThresholdMultiplier() override { | ||
| 1932 | return Impl.getInliningThresholdMultiplier(); | ||
| 1933 |   } | ||
| 1934 | unsigned adjustInliningThreshold(const CallBase *CB) override { | ||
| 1935 | return Impl.adjustInliningThreshold(CB); | ||
| 1936 |   } | ||
| 1937 | int getInlinerVectorBonusPercent() override { | ||
| 1938 | return Impl.getInlinerVectorBonusPercent(); | ||
| 1939 |   } | ||
| 1940 | InstructionCost getMemcpyCost(const Instruction *I) override { | ||
| 1941 | return Impl.getMemcpyCost(I); | ||
| 1942 |   } | ||
| 1943 | InstructionCost getInstructionCost(const User *U, | ||
| 1944 | ArrayRef<const Value *> Operands, | ||
| 1945 | TargetCostKind CostKind) override { | ||
| 1946 | return Impl.getInstructionCost(U, Operands, CostKind); | ||
| 1947 |   } | ||
| 1948 | BranchProbability getPredictableBranchThreshold() override { | ||
| 1949 | return Impl.getPredictableBranchThreshold(); | ||
| 1950 |   } | ||
| 1951 | bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); } | ||
| 1952 | bool useGPUDivergenceAnalysis() override { | ||
| 1953 | return Impl.useGPUDivergenceAnalysis(); | ||
| 1954 |   } | ||
| 1955 | bool isSourceOfDivergence(const Value *V) override { | ||
| 1956 | return Impl.isSourceOfDivergence(V); | ||
| 1957 |   } | ||
| 1958 | |||
| 1959 | bool isAlwaysUniform(const Value *V) override { | ||
| 1960 | return Impl.isAlwaysUniform(V); | ||
| 1961 |   } | ||
| 1962 | |||
| 1963 | unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); } | ||
| 1964 | |||
| 1965 | bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, | ||
| 1966 | Intrinsic::ID IID) const override { | ||
| 1967 | return Impl.collectFlatAddressOperands(OpIndexes, IID); | ||
| 1968 |   } | ||
| 1969 | |||
| 1970 | bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override { | ||
| 1971 | return Impl.isNoopAddrSpaceCast(FromAS, ToAS); | ||
| 1972 |   } | ||
| 1973 | |||
| 1974 |   bool | ||
| 1975 | canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override { | ||
| 1976 | return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS); | ||
| 1977 |   } | ||
| 1978 | |||
| 1979 | unsigned getAssumedAddrSpace(const Value *V) const override { | ||
| 1980 | return Impl.getAssumedAddrSpace(V); | ||
| 1981 |   } | ||
| 1982 | |||
| 1983 | bool isSingleThreaded() const override { return Impl.isSingleThreaded(); } | ||
| 1984 | |||
| 1985 | std::pair<const Value *, unsigned> | ||
| 1986 | getPredicatedAddrSpace(const Value *V) const override { | ||
| 1987 | return Impl.getPredicatedAddrSpace(V); | ||
| 1988 |   } | ||
| 1989 | |||
| 1990 | Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, | ||
| 1991 | Value *NewV) const override { | ||
| 1992 | return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV); | ||
| 1993 |   } | ||
| 1994 | |||
| 1995 | bool isLoweredToCall(const Function *F) override { | ||
| 1996 | return Impl.isLoweredToCall(F); | ||
| 1997 |   } | ||
| 1998 | void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, | ||
| 1999 |                                UnrollingPreferences &UP, | ||
| 2000 | OptimizationRemarkEmitter *ORE) override { | ||
| 2001 | return Impl.getUnrollingPreferences(L, SE, UP, ORE); | ||
| 2002 |   } | ||
| 2003 | void getPeelingPreferences(Loop *L, ScalarEvolution &SE, | ||
| 2004 | PeelingPreferences &PP) override { | ||
| 2005 | return Impl.getPeelingPreferences(L, SE, PP); | ||
| 2006 |   } | ||
| 2007 | bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, | ||
| 2008 | AssumptionCache &AC, TargetLibraryInfo *LibInfo, | ||
| 2009 | HardwareLoopInfo &HWLoopInfo) override { | ||
| 2010 | return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); | ||
| 2011 |   } | ||
| 2012 | bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, | ||
| 2013 | AssumptionCache &AC, TargetLibraryInfo *TLI, | ||
| 2014 |                                    DominatorTree *DT, | ||
| 2015 |                                    LoopVectorizationLegality *LVL, | ||
| 2016 | InterleavedAccessInfo *IAI) override { | ||
| 2017 | return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI); | ||
| 2018 |   } | ||
| 2019 | PredicationStyle emitGetActiveLaneMask() override { | ||
| 2020 | return Impl.emitGetActiveLaneMask(); | ||
| 2021 |   } | ||
| 2022 | std::optional<Instruction *> | ||
| 2023 | instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override { | ||
| 2024 | return Impl.instCombineIntrinsic(IC, II); | ||
| 2025 |   } | ||
| 2026 | std::optional<Value *> | ||
| 2027 | simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, | ||
| 2028 |                                    APInt DemandedMask, KnownBits &Known, | ||
| 2029 | bool &KnownBitsComputed) override { | ||
| 2030 | return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known, | ||
| 2031 | KnownBitsComputed); | ||
| 2032 |   } | ||
| 2033 | std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( | ||
| 2034 | InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, | ||
| 2035 | APInt &UndefElts2, APInt &UndefElts3, | ||
| 2036 | std::function<void(Instruction *, unsigned, APInt, APInt &)> | ||
| 2037 | SimplifyAndSetOp) override { | ||
| 2038 | return Impl.simplifyDemandedVectorEltsIntrinsic( | ||
| 2039 | IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3, | ||
| 2040 | SimplifyAndSetOp); | ||
| 2041 |   } | ||
| 2042 | bool isLegalAddImmediate(int64_t Imm) override { | ||
| 2043 | return Impl.isLegalAddImmediate(Imm); | ||
| 2044 |   } | ||
| 2045 | bool isLegalICmpImmediate(int64_t Imm) override { | ||
| 2046 | return Impl.isLegalICmpImmediate(Imm); | ||
| 2047 |   } | ||
| 2048 | bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, | ||
| 2049 | bool HasBaseReg, int64_t Scale, unsigned AddrSpace, | ||
| 2050 | Instruction *I) override { | ||
| 2051 | return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, | ||
| 2052 | AddrSpace, I); | ||
| 2053 |   } | ||
| 2054 | bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, | ||
| 2055 | const TargetTransformInfo::LSRCost &C2) override { | ||
| 2056 | return Impl.isLSRCostLess(C1, C2); | ||
| 2057 |   } | ||
| 2058 | bool isNumRegsMajorCostOfLSR() override { | ||
| 2059 | return Impl.isNumRegsMajorCostOfLSR(); | ||
| 2060 |   } | ||
| 2061 | bool isProfitableLSRChainElement(Instruction *I) override { | ||
| 2062 | return Impl.isProfitableLSRChainElement(I); | ||
| 2063 |   } | ||
| 2064 | bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); } | ||
| 2065 | bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, | ||
| 2066 | DominatorTree *DT, AssumptionCache *AC, | ||
| 2067 | TargetLibraryInfo *LibInfo) override { | ||
| 2068 | return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo); | ||
| 2069 |   } | ||
| 2070 | AddressingModeKind | ||
| 2071 | getPreferredAddressingMode(const Loop *L, | ||
| 2072 | ScalarEvolution *SE) const override { | ||
| 2073 | return Impl.getPreferredAddressingMode(L, SE); | ||
| 2074 |   } | ||
| 2075 | bool isLegalMaskedStore(Type *DataType, Align Alignment) override { | ||
| 2076 | return Impl.isLegalMaskedStore(DataType, Alignment); | ||
| 2077 |   } | ||
| 2078 | bool isLegalMaskedLoad(Type *DataType, Align Alignment) override { | ||
| 2079 | return Impl.isLegalMaskedLoad(DataType, Alignment); | ||
| 2080 |   } | ||
| 2081 | bool isLegalNTStore(Type *DataType, Align Alignment) override { | ||
| 2082 | return Impl.isLegalNTStore(DataType, Alignment); | ||
| 2083 |   } | ||
| 2084 | bool isLegalNTLoad(Type *DataType, Align Alignment) override { | ||
| 2085 | return Impl.isLegalNTLoad(DataType, Alignment); | ||
| 2086 |   } | ||
| 2087 | bool isLegalBroadcastLoad(Type *ElementTy, | ||
| 2088 | ElementCount NumElements) const override { | ||
| 2089 | return Impl.isLegalBroadcastLoad(ElementTy, NumElements); | ||
| 2090 |   } | ||
| 2091 | bool isLegalMaskedScatter(Type *DataType, Align Alignment) override { | ||
| 2092 | return Impl.isLegalMaskedScatter(DataType, Alignment); | ||
| 2093 |   } | ||
| 2094 | bool isLegalMaskedGather(Type *DataType, Align Alignment) override { | ||
| 2095 | return Impl.isLegalMaskedGather(DataType, Alignment); | ||
| 2096 |   } | ||
| 2097 | bool forceScalarizeMaskedGather(VectorType *DataType, | ||
| 2098 | Align Alignment) override { | ||
| 2099 | return Impl.forceScalarizeMaskedGather(DataType, Alignment); | ||
| 2100 |   } | ||
| 2101 | bool forceScalarizeMaskedScatter(VectorType *DataType, | ||
| 2102 | Align Alignment) override { | ||
| 2103 | return Impl.forceScalarizeMaskedScatter(DataType, Alignment); | ||
| 2104 |   } | ||
| 2105 | bool isLegalMaskedCompressStore(Type *DataType) override { | ||
| 2106 | return Impl.isLegalMaskedCompressStore(DataType); | ||
| 2107 |   } | ||
| 2108 | bool isLegalMaskedExpandLoad(Type *DataType) override { | ||
| 2109 | return Impl.isLegalMaskedExpandLoad(DataType); | ||
| 2110 |   } | ||
| 2111 | bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, | ||
| 2112 | const SmallBitVector &OpcodeMask) const override { | ||
| 2113 | return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask); | ||
| 2114 |   } | ||
| 2115 | bool enableOrderedReductions() override { | ||
| 2116 | return Impl.enableOrderedReductions(); | ||
| 2117 |   } | ||
| 2118 | bool hasDivRemOp(Type *DataType, bool IsSigned) override { | ||
| 2119 | return Impl.hasDivRemOp(DataType, IsSigned); | ||
| 2120 |   } | ||
| 2121 | bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override { | ||
| 2122 | return Impl.hasVolatileVariant(I, AddrSpace); | ||
| 2123 |   } | ||
| 2124 | bool prefersVectorizedAddressing() override { | ||
| 2125 | return Impl.prefersVectorizedAddressing(); | ||
| 2126 |   } | ||
| 2127 | InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, | ||
| 2128 | int64_t BaseOffset, bool HasBaseReg, | ||
| 2129 |                                        int64_t Scale, | ||
| 2130 | unsigned AddrSpace) override { | ||
| 2131 | return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, | ||
| 2132 | AddrSpace); | ||
| 2133 |   } | ||
| 2134 | bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); } | ||
| 2135 | bool isTruncateFree(Type *Ty1, Type *Ty2) override { | ||
| 2136 | return Impl.isTruncateFree(Ty1, Ty2); | ||
| 2137 |   } | ||
| 2138 | bool isProfitableToHoist(Instruction *I) override { | ||
| 2139 | return Impl.isProfitableToHoist(I); | ||
| 2140 |   } | ||
| 2141 | bool useAA() override { return Impl.useAA(); } | ||
| 2142 | bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); } | ||
| 2143 | unsigned getRegUsageForType(Type *Ty) override { | ||
| 2144 | return Impl.getRegUsageForType(Ty); | ||
| 2145 |   } | ||
| 2146 | bool shouldBuildLookupTables() override { | ||
| 2147 | return Impl.shouldBuildLookupTables(); | ||
| 2148 |   } | ||
| 2149 | bool shouldBuildLookupTablesForConstant(Constant *C) override { | ||
| 2150 | return Impl.shouldBuildLookupTablesForConstant(C); | ||
| 2151 |   } | ||
| 2152 | bool shouldBuildRelLookupTables() override { | ||
| 2153 | return Impl.shouldBuildRelLookupTables(); | ||
| 2154 |   } | ||
| 2155 | bool useColdCCForColdCall(Function &F) override { | ||
| 2156 | return Impl.useColdCCForColdCall(F); | ||
| 2157 |   } | ||
| 2158 | |||
| 2159 | InstructionCost getScalarizationOverhead(VectorType *Ty, | ||
| 2160 | const APInt &DemandedElts, | ||
| 2161 | bool Insert, bool Extract, | ||
| 2162 | TargetCostKind CostKind) override { | ||
| 2163 | return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract, | ||
| 2164 | CostKind); | ||
| 2165 |   } | ||
| 2166 | InstructionCost | ||
| 2167 | getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, | ||
| 2168 | ArrayRef<Type *> Tys, | ||
| 2169 | TargetCostKind CostKind) override { | ||
| 2170 | return Impl.getOperandsScalarizationOverhead(Args, Tys, CostKind); | ||
| 2171 |   } | ||
| 2172 | |||
| 2173 | bool supportsEfficientVectorElementLoadStore() override { | ||
| 2174 | return Impl.supportsEfficientVectorElementLoadStore(); | ||
| 2175 |   } | ||
| 2176 | |||
| 2177 | bool supportsTailCalls() override { return Impl.supportsTailCalls(); } | ||
| 2178 | bool supportsTailCallFor(const CallBase *CB) override { | ||
| 2179 | return Impl.supportsTailCallFor(CB); | ||
| 2180 |   } | ||
| 2181 | |||
| 2182 | bool enableAggressiveInterleaving(bool LoopHasReductions) override { | ||
| 2183 | return Impl.enableAggressiveInterleaving(LoopHasReductions); | ||
| 2184 |   } | ||
| 2185 | MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, | ||
| 2186 | bool IsZeroCmp) const override { | ||
| 2187 | return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp); | ||
| 2188 |   } | ||
| 2189 | bool enableInterleavedAccessVectorization() override { | ||
| 2190 | return Impl.enableInterleavedAccessVectorization(); | ||
| 2191 |   } | ||
| 2192 | bool enableSelectOptimize() override { | ||
| 2193 | return Impl.enableSelectOptimize(); | ||
| 2194 |   } | ||
| 2195 | bool enableMaskedInterleavedAccessVectorization() override { | ||
| 2196 | return Impl.enableMaskedInterleavedAccessVectorization(); | ||
| 2197 |   } | ||
| 2198 | bool isFPVectorizationPotentiallyUnsafe() override { | ||
| 2199 | return Impl.isFPVectorizationPotentiallyUnsafe(); | ||
| 2200 |   } | ||
| 2201 | bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, | ||
| 2202 |                                       unsigned AddressSpace, Align Alignment, | ||
| 2203 | unsigned *Fast) override { | ||
| 2204 | return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace, | ||
| 2205 | Alignment, Fast); | ||
| 2206 |   } | ||
| 2207 | PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override { | ||
| 2208 | return Impl.getPopcntSupport(IntTyWidthInBit); | ||
| 2209 |   } | ||
| 2210 | bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); } | ||
| 2211 | |||
| 2212 | bool isExpensiveToSpeculativelyExecute(const Instruction* I) override { | ||
| 2213 | return Impl.isExpensiveToSpeculativelyExecute(I); | ||
| 2214 |   } | ||
| 2215 | |||
| 2216 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override { | ||
| 2217 | return Impl.isFCmpOrdCheaperThanFCmpZero(Ty); | ||
| 2218 |   } | ||
| 2219 | |||
| 2220 | InstructionCost getFPOpCost(Type *Ty) override { | ||
| 2221 | return Impl.getFPOpCost(Ty); | ||
| 2222 |   } | ||
| 2223 | |||
| 2224 | InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, | ||
| 2225 | const APInt &Imm, Type *Ty) override { | ||
| 2226 | return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty); | ||
| 2227 |   } | ||
| 2228 | InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, | ||
| 2229 | TargetCostKind CostKind) override { | ||
| 2230 | return Impl.getIntImmCost(Imm, Ty, CostKind); | ||
| 2231 |   } | ||
| 2232 | InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, | ||
| 2233 | const APInt &Imm, Type *Ty, | ||
| 2234 | TargetCostKind CostKind, | ||
| 2235 | Instruction *Inst = nullptr) override { | ||
| 2236 | return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst); | ||
| 2237 |   } | ||
| 2238 | InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, | ||
| 2239 | const APInt &Imm, Type *Ty, | ||
| 2240 | TargetCostKind CostKind) override { | ||
| 2241 | return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind); | ||
| 2242 |   } | ||
| 2243 | unsigned getNumberOfRegisters(unsigned ClassID) const override { | ||
| 2244 | return Impl.getNumberOfRegisters(ClassID); | ||
| 2245 |   } | ||
| 2246 | unsigned getRegisterClassForType(bool Vector, | ||
| 2247 | Type *Ty = nullptr) const override { | ||
| 2248 | return Impl.getRegisterClassForType(Vector, Ty); | ||
| 2249 |   } | ||
| 2250 | const char *getRegisterClassName(unsigned ClassID) const override { | ||
| 2251 | return Impl.getRegisterClassName(ClassID); | ||
| 2252 |   } | ||
| 2253 | TypeSize getRegisterBitWidth(RegisterKind K) const override { | ||
| 2254 | return Impl.getRegisterBitWidth(K); | ||
| 2255 |   } | ||
| 2256 | unsigned getMinVectorRegisterBitWidth() const override { | ||
| 2257 | return Impl.getMinVectorRegisterBitWidth(); | ||
| 2258 |   } | ||
| 2259 | std::optional<unsigned> getMaxVScale() const override { | ||
| 2260 | return Impl.getMaxVScale(); | ||
| 2261 |   } | ||
| 2262 | std::optional<unsigned> getVScaleForTuning() const override { | ||
| 2263 | return Impl.getVScaleForTuning(); | ||
| 2264 |   } | ||
| 2265 | bool shouldMaximizeVectorBandwidth( | ||
| 2266 | TargetTransformInfo::RegisterKind K) const override { | ||
| 2267 | return Impl.shouldMaximizeVectorBandwidth(K); | ||
| 2268 |   } | ||
| 2269 | ElementCount getMinimumVF(unsigned ElemWidth, | ||
| 2270 | bool IsScalable) const override { | ||
| 2271 | return Impl.getMinimumVF(ElemWidth, IsScalable); | ||
| 2272 |   } | ||
| 2273 | unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override { | ||
| 2274 | return Impl.getMaximumVF(ElemWidth, Opcode); | ||
| 2275 |   } | ||
| 2276 | unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, | ||
| 2277 | Type *ScalarValTy) const override { | ||
| 2278 | return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy); | ||
| 2279 |   } | ||
| 2280 | bool shouldConsiderAddressTypePromotion( | ||
| 2281 | const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override { | ||
| 2282 | return Impl.shouldConsiderAddressTypePromotion( | ||
| 2283 | I, AllowPromotionWithoutCommonHeader); | ||
| 2284 |   } | ||
| 2285 | unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); } | ||
| 2286 | std::optional<unsigned> getCacheSize(CacheLevel Level) const override { | ||
| 2287 | return Impl.getCacheSize(Level); | ||
| 2288 |   } | ||
| 2289 | std::optional<unsigned> | ||
| 2290 | getCacheAssociativity(CacheLevel Level) const override { | ||
| 2291 | return Impl.getCacheAssociativity(Level); | ||
| 2292 |   } | ||
| 2293 | |||
| 2294 |   /// Return the preferred prefetch distance in terms of instructions. | ||
| 2295 |   /// | ||
| 2296 | unsigned getPrefetchDistance() const override { | ||
| 2297 | return Impl.getPrefetchDistance(); | ||
| 2298 |   } | ||
| 2299 | |||
| 2300 |   /// Return the minimum stride necessary to trigger software | ||
| 2301 |   /// prefetching. | ||
| 2302 |   /// | ||
| 2303 | unsigned getMinPrefetchStride(unsigned NumMemAccesses, | ||
| 2304 |                                 unsigned NumStridedMemAccesses, | ||
| 2305 |                                 unsigned NumPrefetches, | ||
| 2306 | bool HasCall) const override { | ||
| 2307 | return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses, | ||
| 2308 | NumPrefetches, HasCall); | ||
| 2309 |   } | ||
| 2310 | |||
| 2311 |   /// Return the maximum prefetch distance in terms of loop | ||
| 2312 |   /// iterations. | ||
| 2313 |   /// | ||
| 2314 | unsigned getMaxPrefetchIterationsAhead() const override { | ||
| 2315 | return Impl.getMaxPrefetchIterationsAhead(); | ||
| 2316 |   } | ||
| 2317 | |||
| 2318 |   /// \return True if prefetching should also be done for writes. | ||
| 2319 | bool enableWritePrefetching() const override { | ||
| 2320 | return Impl.enableWritePrefetching(); | ||
| 2321 |   } | ||
| 2322 | |||
| 2323 |   /// \return if target want to issue a prefetch in address space \p AS. | ||
| 2324 | bool shouldPrefetchAddressSpace(unsigned AS) const override { | ||
| 2325 | return Impl.shouldPrefetchAddressSpace(AS); | ||
| 2326 |   } | ||
| 2327 | |||
| 2328 | unsigned getMaxInterleaveFactor(unsigned VF) override { | ||
| 2329 | return Impl.getMaxInterleaveFactor(VF); | ||
| 2330 |   } | ||
| 2331 | unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, | ||
| 2332 | unsigned &JTSize, | ||
| 2333 |                                             ProfileSummaryInfo *PSI, | ||
| 2334 | BlockFrequencyInfo *BFI) override { | ||
| 2335 | return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI); | ||
| 2336 |   } | ||
| 2337 |   InstructionCost getArithmeticInstrCost( | ||
| 2338 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, | ||
| 2339 | OperandValueInfo Opd1Info, OperandValueInfo Opd2Info, | ||
| 2340 | ArrayRef<const Value *> Args, | ||
| 2341 | const Instruction *CxtI = nullptr) override { | ||
| 2342 | return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info, | ||
| 2343 | Args, CxtI); | ||
| 2344 |   } | ||
| 2345 | |||
| 2346 | InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, | ||
| 2347 | ArrayRef<int> Mask, | ||
| 2348 | TTI::TargetCostKind CostKind, int Index, | ||
| 2349 |                                  VectorType *SubTp, | ||
| 2350 | ArrayRef<const Value *> Args) override { | ||
| 2351 | return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args); | ||
| 2352 |   } | ||
| 2353 | InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, | ||
| 2354 | CastContextHint CCH, | ||
| 2355 | TTI::TargetCostKind CostKind, | ||
| 2356 | const Instruction *I) override { | ||
| 2357 | return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); | ||
| 2358 |   } | ||
| 2359 | InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, | ||
| 2360 |                                            VectorType *VecTy, | ||
| 2361 | unsigned Index) override { | ||
| 2362 | return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index); | ||
| 2363 |   } | ||
| 2364 | InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, | ||
| 2365 | const Instruction *I = nullptr) override { | ||
| 2366 | return Impl.getCFInstrCost(Opcode, CostKind, I); | ||
| 2367 |   } | ||
| 2368 | InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, | ||
| 2369 | CmpInst::Predicate VecPred, | ||
| 2370 | TTI::TargetCostKind CostKind, | ||
| 2371 | const Instruction *I) override { | ||
| 2372 | return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); | ||
| 2373 |   } | ||
| 2374 | InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, | ||
| 2375 | TTI::TargetCostKind CostKind, | ||
| 2376 | unsigned Index, Value *Op0, | ||
| 2377 | Value *Op1) override { | ||
| 2378 | return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1); | ||
| 2379 |   } | ||
| 2380 | InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, | ||
| 2381 | TTI::TargetCostKind CostKind, | ||
| 2382 | unsigned Index) override { | ||
| 2383 | return Impl.getVectorInstrCost(I, Val, CostKind, Index); | ||
| 2384 |   } | ||
| 2385 | InstructionCost | ||
| 2386 | getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, | ||
| 2387 | const APInt &DemandedDstElts, | ||
| 2388 | TTI::TargetCostKind CostKind) override { | ||
| 2389 | return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF, | ||
| 2390 | DemandedDstElts, CostKind); | ||
| 2391 |   } | ||
| 2392 | InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, | ||
| 2393 |                                   unsigned AddressSpace, | ||
| 2394 | TTI::TargetCostKind CostKind, | ||
| 2395 | OperandValueInfo OpInfo, | ||
| 2396 | const Instruction *I) override { | ||
| 2397 | return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind, | ||
| 2398 | OpInfo, I); | ||
| 2399 |   } | ||
| 2400 | InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, | ||
| 2401 |                                     unsigned AddressSpace, | ||
| 2402 | TTI::TargetCostKind CostKind, | ||
| 2403 | const Instruction *I) override { | ||
| 2404 | return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace, | ||
| 2405 | CostKind, I); | ||
| 2406 |   } | ||
| 2407 | InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, | ||
| 2408 |                                         Align Alignment, unsigned AddressSpace, | ||
| 2409 | TTI::TargetCostKind CostKind) override { | ||
| 2410 | return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, | ||
| 2411 | CostKind); | ||
| 2412 |   } | ||
| 2413 | InstructionCost | ||
| 2414 | getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, | ||
| 2415 |                          bool VariableMask, Align Alignment, | ||
| 2416 | TTI::TargetCostKind CostKind, | ||
| 2417 | const Instruction *I = nullptr) override { | ||
| 2418 | return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, | ||
| 2419 | Alignment, CostKind, I); | ||
| 2420 |   } | ||
| 2421 |   InstructionCost getInterleavedMemoryOpCost( | ||
| 2422 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, | ||
| 2423 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, | ||
| 2424 | bool UseMaskForCond, bool UseMaskForGaps) override { | ||
| 2425 | return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, | ||
| 2426 | Alignment, AddressSpace, CostKind, | ||
| 2427 | UseMaskForCond, UseMaskForGaps); | ||
| 2428 |   } | ||
| 2429 | InstructionCost | ||
| 2430 | getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, | ||
| 2431 | std::optional<FastMathFlags> FMF, | ||
| 2432 | TTI::TargetCostKind CostKind) override { | ||
| 2433 | return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind); | ||
| 2434 |   } | ||
| 2435 | InstructionCost | ||
| 2436 | getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, | ||
| 2437 | TTI::TargetCostKind CostKind) override { | ||
| 2438 | return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind); | ||
| 2439 |   } | ||
| 2440 |   InstructionCost getExtendedReductionCost( | ||
| 2441 | unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, | ||
| 2442 | std::optional<FastMathFlags> FMF, | ||
| 2443 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) override { | ||
| 2444 | return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF, | ||
| 2445 | CostKind); | ||
| 2446 |   } | ||
| 2447 |   InstructionCost getMulAccReductionCost( | ||
| 2448 | bool IsUnsigned, Type *ResTy, VectorType *Ty, | ||
| 2449 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) override { | ||
| 2450 | return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind); | ||
| 2451 |   } | ||
| 2452 | InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, | ||
| 2453 | TTI::TargetCostKind CostKind) override { | ||
| 2454 | return Impl.getIntrinsicInstrCost(ICA, CostKind); | ||
| 2455 |   } | ||
| 2456 | InstructionCost getCallInstrCost(Function *F, Type *RetTy, | ||
| 2457 | ArrayRef<Type *> Tys, | ||
| 2458 | TTI::TargetCostKind CostKind) override { | ||
| 2459 | return Impl.getCallInstrCost(F, RetTy, Tys, CostKind); | ||
| 2460 |   } | ||
| 2461 | unsigned getNumberOfParts(Type *Tp) override { | ||
| 2462 | return Impl.getNumberOfParts(Tp); | ||
| 2463 |   } | ||
| 2464 | InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, | ||
| 2465 | const SCEV *Ptr) override { | ||
| 2466 | return Impl.getAddressComputationCost(Ty, SE, Ptr); | ||
| 2467 |   } | ||
| 2468 | InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override { | ||
| 2469 | return Impl.getCostOfKeepingLiveOverCall(Tys); | ||
| 2470 |   } | ||
| 2471 | bool getTgtMemIntrinsic(IntrinsicInst *Inst, | ||
| 2472 | MemIntrinsicInfo &Info) override { | ||
| 2473 | return Impl.getTgtMemIntrinsic(Inst, Info); | ||
| 2474 |   } | ||
| 2475 | unsigned getAtomicMemIntrinsicMaxElementSize() const override { | ||
| 2476 | return Impl.getAtomicMemIntrinsicMaxElementSize(); | ||
| 2477 |   } | ||
| 2478 | Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, | ||
| 2479 | Type *ExpectedType) override { | ||
| 2480 | return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); | ||
| 2481 |   } | ||
| 2482 | Type *getMemcpyLoopLoweringType( | ||
| 2483 | LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, | ||
| 2484 | unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, | ||
| 2485 | std::optional<uint32_t> AtomicElementSize) const override { | ||
| 2486 | return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace, | ||
| 2487 | DestAddrSpace, SrcAlign, DestAlign, | ||
| 2488 | AtomicElementSize); | ||
| 2489 |   } | ||
| 2490 | void getMemcpyLoopResidualLoweringType( | ||
| 2491 | SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, | ||
| 2492 | unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, | ||
| 2493 | unsigned SrcAlign, unsigned DestAlign, | ||
| 2494 | std::optional<uint32_t> AtomicCpySize) const override { | ||
| 2495 | Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes, | ||
| 2496 | SrcAddrSpace, DestAddrSpace, | ||
| 2497 | SrcAlign, DestAlign, AtomicCpySize); | ||
| 2498 |   } | ||
| 2499 | bool areInlineCompatible(const Function *Caller, | ||
| 2500 | const Function *Callee) const override { | ||
| 2501 | return Impl.areInlineCompatible(Caller, Callee); | ||
| 2502 |   } | ||
| 2503 | bool areTypesABICompatible(const Function *Caller, const Function *Callee, | ||
| 2504 | const ArrayRef<Type *> &Types) const override { | ||
| 2505 | return Impl.areTypesABICompatible(Caller, Callee, Types); | ||
| 2506 |   } | ||
| 2507 | bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override { | ||
| 2508 | return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout()); | ||
| 2509 |   } | ||
| 2510 | bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override { | ||
| 2511 | return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout()); | ||
| 2512 |   } | ||
| 2513 | unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override { | ||
| 2514 | return Impl.getLoadStoreVecRegBitWidth(AddrSpace); | ||
| 2515 |   } | ||
| 2516 | bool isLegalToVectorizeLoad(LoadInst *LI) const override { | ||
| 2517 | return Impl.isLegalToVectorizeLoad(LI); | ||
| 2518 |   } | ||
| 2519 | bool isLegalToVectorizeStore(StoreInst *SI) const override { | ||
| 2520 | return Impl.isLegalToVectorizeStore(SI); | ||
| 2521 |   } | ||
| 2522 | bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, | ||
| 2523 | unsigned AddrSpace) const override { | ||
| 2524 | return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, | ||
| 2525 | AddrSpace); | ||
| 2526 |   } | ||
| 2527 | bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, | ||
| 2528 | unsigned AddrSpace) const override { | ||
| 2529 | return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment, | ||
| 2530 | AddrSpace); | ||
| 2531 |   } | ||
| 2532 | bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, | ||
| 2533 | ElementCount VF) const override { | ||
| 2534 | return Impl.isLegalToVectorizeReduction(RdxDesc, VF); | ||
| 2535 |   } | ||
| 2536 | bool isElementTypeLegalForScalableVector(Type *Ty) const override { | ||
| 2537 | return Impl.isElementTypeLegalForScalableVector(Ty); | ||
| 2538 |   } | ||
| 2539 | unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, | ||
| 2540 |                                unsigned ChainSizeInBytes, | ||
| 2541 | VectorType *VecTy) const override { | ||
| 2542 | return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy); | ||
| 2543 |   } | ||
| 2544 | unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, | ||
| 2545 |                                 unsigned ChainSizeInBytes, | ||
| 2546 | VectorType *VecTy) const override { | ||
| 2547 | return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy); | ||
| 2548 |   } | ||
| 2549 | bool preferInLoopReduction(unsigned Opcode, Type *Ty, | ||
| 2550 | ReductionFlags Flags) const override { | ||
| 2551 | return Impl.preferInLoopReduction(Opcode, Ty, Flags); | ||
| 2552 |   } | ||
| 2553 | bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, | ||
| 2554 | ReductionFlags Flags) const override { | ||
| 2555 | return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags); | ||
| 2556 |   } | ||
| 2557 | bool preferEpilogueVectorization() const override { | ||
| 2558 | return Impl.preferEpilogueVectorization(); | ||
| 2559 |   } | ||
| 2560 | |||
| 2561 | bool shouldExpandReduction(const IntrinsicInst *II) const override { | ||
| 2562 | return Impl.shouldExpandReduction(II); | ||
| 2563 |   } | ||
| 2564 | |||
| 2565 | unsigned getGISelRematGlobalCost() const override { | ||
| 2566 | return Impl.getGISelRematGlobalCost(); | ||
| 2567 |   } | ||
| 2568 | |||
| 2569 | unsigned getMinTripCountTailFoldingThreshold() const override { | ||
| 2570 | return Impl.getMinTripCountTailFoldingThreshold(); | ||
| 2571 |   } | ||
| 2572 | |||
| 2573 | bool supportsScalableVectors() const override { | ||
| 2574 | return Impl.supportsScalableVectors(); | ||
| 2575 |   } | ||
| 2576 | |||
| 2577 | bool enableScalableVectorization() const override { | ||
| 2578 | return Impl.enableScalableVectorization(); | ||
| 2579 |   } | ||
| 2580 | |||
| 2581 | bool hasActiveVectorLength(unsigned Opcode, Type *DataType, | ||
| 2582 | Align Alignment) const override { | ||
| 2583 | return Impl.hasActiveVectorLength(Opcode, DataType, Alignment); | ||
| 2584 |   } | ||
| 2585 | |||
| 2586 | VPLegalization | ||
| 2587 | getVPLegalizationStrategy(const VPIntrinsic &PI) const override { | ||
| 2588 | return Impl.getVPLegalizationStrategy(PI); | ||
| 2589 |   } | ||
| 2590 | }; | ||
| 2591 | |||
| 2592 | template <typename T> | ||
| 2593 | TargetTransformInfo::TargetTransformInfo(T Impl) | ||
| 2594 | : TTIImpl(new Model<T>(Impl)) {} | ||
| 2595 | |||
| 2596 | /// Analysis pass providing the \c TargetTransformInfo. | ||
| 2597 | /// | ||
| 2598 | /// The core idea of the TargetIRAnalysis is to expose an interface through | ||
| 2599 | /// which LLVM targets can analyze and provide information about the middle | ||
| 2600 | /// end's target-independent IR. This supports use cases such as target-aware | ||
| 2601 | /// cost modeling of IR constructs. | ||
| 2602 | /// | ||
| 2603 | /// This is a function analysis because much of the cost modeling for targets | ||
| 2604 | /// is done in a subtarget specific way and LLVM supports compiling different | ||
| 2605 | /// functions targeting different subtargets in order to support runtime | ||
| 2606 | /// dispatch according to the observed subtarget. | ||
| 2607 | class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> { | ||
| 2608 | public: | ||
| 2609 | typedef TargetTransformInfo Result; | ||
| 2610 | |||
| 2611 |   /// Default construct a target IR analysis. | ||
| 2612 |   /// | ||
| 2613 |   /// This will use the module's datalayout to construct a baseline | ||
| 2614 |   /// conservative TTI result. | ||
| 2615 | TargetIRAnalysis(); | ||
| 2616 | |||
| 2617 |   /// Construct an IR analysis pass around a target-provide callback. | ||
| 2618 |   /// | ||
| 2619 |   /// The callback will be called with a particular function for which the TTI | ||
| 2620 |   /// is needed and must return a TTI object for that function. | ||
| 2621 | TargetIRAnalysis(std::function<Result(const Function &)> TTICallback); | ||
| 2622 | |||
| 2623 |   // Value semantics. We spell out the constructors for MSVC. | ||
| 2624 | TargetIRAnalysis(const TargetIRAnalysis &Arg) | ||
| 2625 | : TTICallback(Arg.TTICallback) {} | ||
| 2626 | TargetIRAnalysis(TargetIRAnalysis &&Arg) | ||
| 2627 | : TTICallback(std::move(Arg.TTICallback)) {} | ||
| 2628 | TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) { | ||
| 2629 | TTICallback = RHS.TTICallback; | ||
| 2630 | return *this; | ||
| 2631 |   } | ||
| 2632 | TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) { | ||
| 2633 | TTICallback = std::move(RHS.TTICallback); | ||
| 2634 | return *this; | ||
| 2635 |   } | ||
| 2636 | |||
| 2637 | Result run(const Function &F, FunctionAnalysisManager &); | ||
| 2638 | |||
| 2639 | private: | ||
| 2640 | friend AnalysisInfoMixin<TargetIRAnalysis>; | ||
| 2641 | static AnalysisKey Key; | ||
| 2642 | |||
| 2643 |   /// The callback used to produce a result. | ||
| 2644 |   /// | ||
| 2645 |   /// We use a completely opaque callback so that targets can provide whatever | ||
| 2646 |   /// mechanism they desire for constructing the TTI for a given function. | ||
| 2647 |   /// | ||
| 2648 |   /// FIXME: Should we really use std::function? It's relatively inefficient. | ||
| 2649 |   /// It might be possible to arrange for even stateful callbacks to outlive | ||
| 2650 |   /// the analysis and thus use a function_ref which would be lighter weight. | ||
| 2651 |   /// This may also be less error prone as the callback is likely to reference | ||
| 2652 |   /// the external TargetMachine, and that reference needs to never dangle. | ||
| 2653 | std::function<Result(const Function &)> TTICallback; | ||
| 2654 | |||
| 2655 |   /// Helper function used as the callback in the default constructor. | ||
| 2656 | static Result getDefaultTTI(const Function &F); | ||
| 2657 | }; | ||
| 2658 | |||
| 2659 | /// Wrapper pass for TargetTransformInfo. | ||
| 2660 | /// | ||
| 2661 | /// This pass can be constructed from a TTI object which it stores internally | ||
| 2662 | /// and is queried by passes. | ||
| 2663 | class TargetTransformInfoWrapperPass : public ImmutablePass { | ||
| 2664 |   TargetIRAnalysis TIRA; | ||
| 2665 | std::optional<TargetTransformInfo> TTI; | ||
| 2666 | |||
| 2667 | virtual void anchor(); | ||
| 2668 | |||
| 2669 | public: | ||
| 2670 | static char ID; | ||
| 2671 | |||
| 2672 |   /// We must provide a default constructor for the pass but it should | ||
| 2673 |   /// never be used. | ||
| 2674 |   /// | ||
| 2675 |   /// Use the constructor below or call one of the creation routines. | ||
| 2676 | TargetTransformInfoWrapperPass(); | ||
| 2677 | |||
| 2678 | explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA); | ||
| 2679 | |||
| 2680 | TargetTransformInfo &getTTI(const Function &F); | ||
| 2681 | }; | ||
| 2682 | |||
| 2683 | /// Create an analysis pass wrapper around a TTI object. | ||
| 2684 | /// | ||
| 2685 | /// This analysis pass just holds the TTI instance and makes it available to | ||
| 2686 | /// clients. | ||
| 2687 | ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA); | ||
| 2688 | |||
| 2689 | } // namespace llvm | ||
| 2690 | |||
| 2691 | #endif |