Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /// \file
  9. /// This file provides helpers for the implementation of
  10. /// a TargetTransformInfo-conforming class.
  11. ///
  12. //===----------------------------------------------------------------------===//
  13.  
  14. #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
  15. #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
  16.  
  17. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  18. #include "llvm/Analysis/TargetTransformInfo.h"
  19. #include "llvm/Analysis/VectorUtils.h"
  20. #include "llvm/IR/DataLayout.h"
  21. #include "llvm/IR/GetElementPtrTypeIterator.h"
  22. #include "llvm/IR/IntrinsicInst.h"
  23. #include "llvm/IR/Operator.h"
  24. #include "llvm/IR/PatternMatch.h"
  25. #include <optional>
  26. #include <utility>
  27.  
  28. namespace llvm {
  29.  
  30. class Function;
  31.  
  32. /// Base class for use as a mix-in that aids implementing
  33. /// a TargetTransformInfo-compatible class.
  34. class TargetTransformInfoImplBase {
  35. protected:
  36.   typedef TargetTransformInfo TTI;
  37.  
  38.   const DataLayout &DL;
  39.  
  40.   explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
  41.  
  42. public:
  43.   // Provide value semantics. MSVC requires that we spell all of these out.
  44.   TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default;
  45.   TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
  46.  
  47.   const DataLayout &getDataLayout() const { return DL; }
  48.  
  49.   InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
  50.                              ArrayRef<const Value *> Operands,
  51.                              TTI::TargetCostKind CostKind) const {
  52.     // In the basic model, we just assume that all-constant GEPs will be folded
  53.     // into their uses via addressing modes.
  54.     for (const Value *Operand : Operands)
  55.       if (!isa<Constant>(Operand))
  56.         return TTI::TCC_Basic;
  57.  
  58.     return TTI::TCC_Free;
  59.   }
  60.  
  61.   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
  62.                                             unsigned &JTSize,
  63.                                             ProfileSummaryInfo *PSI,
  64.                                             BlockFrequencyInfo *BFI) const {
  65.     (void)PSI;
  66.     (void)BFI;
  67.     JTSize = 0;
  68.     return SI.getNumCases();
  69.   }
  70.  
  71.   unsigned getInliningThresholdMultiplier() const { return 1; }
  72.   unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
  73.  
  74.   int getInlinerVectorBonusPercent() const { return 150; }
  75.  
  76.   InstructionCost getMemcpyCost(const Instruction *I) const {
  77.     return TTI::TCC_Expensive;
  78.   }
  79.  
  80.   // Although this default value is arbitrary, it is not random. It is assumed
  81.   // that a condition that evaluates the same way by a higher percentage than
  82.   // this is best represented as control flow. Therefore, the default value N
  83.   // should be set such that the win from N% correct executions is greater than
  84.   // the loss from (100 - N)% mispredicted executions for the majority of
  85.   //  intended targets.
  86.   BranchProbability getPredictableBranchThreshold() const {
  87.     return BranchProbability(99, 100);
  88.   }
  89.  
  90.   bool hasBranchDivergence() const { return false; }
  91.  
  92.   bool useGPUDivergenceAnalysis() const { return false; }
  93.  
  94.   bool isSourceOfDivergence(const Value *V) const { return false; }
  95.  
  96.   bool isAlwaysUniform(const Value *V) const { return false; }
  97.  
  98.   unsigned getFlatAddressSpace() const { return -1; }
  99.  
  100.   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
  101.                                   Intrinsic::ID IID) const {
  102.     return false;
  103.   }
  104.  
  105.   bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
  106.   bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
  107.     return AS == 0;
  108.   };
  109.  
  110.   unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
  111.  
  112.   bool isSingleThreaded() const { return false; }
  113.  
  114.   std::pair<const Value *, unsigned>
  115.   getPredicatedAddrSpace(const Value *V) const {
  116.     return std::make_pair(nullptr, -1);
  117.   }
  118.  
  119.   Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
  120.                                           Value *NewV) const {
  121.     return nullptr;
  122.   }
  123.  
  124.   bool isLoweredToCall(const Function *F) const {
  125.     assert(F && "A concrete function must be provided to this routine.");
  126.  
  127.     // FIXME: These should almost certainly not be handled here, and instead
  128.     // handled with the help of TLI or the target itself. This was largely
  129.     // ported from existing analysis heuristics here so that such refactorings
  130.     // can take place in the future.
  131.  
  132.     if (F->isIntrinsic())
  133.       return false;
  134.  
  135.     if (F->hasLocalLinkage() || !F->hasName())
  136.       return true;
  137.  
  138.     StringRef Name = F->getName();
  139.  
  140.     // These will all likely lower to a single selection DAG node.
  141.     if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
  142.         Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
  143.         Name == "fmin" || Name == "fminf" || Name == "fminl" ||
  144.         Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
  145.         Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
  146.         Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
  147.       return false;
  148.  
  149.     // These are all likely to be optimized into something smaller.
  150.     if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
  151.         Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
  152.         Name == "floorf" || Name == "ceil" || Name == "round" ||
  153.         Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
  154.         Name == "llabs")
  155.       return false;
  156.  
  157.     return true;
  158.   }
  159.  
  160.   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
  161.                                 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
  162.                                 HardwareLoopInfo &HWLoopInfo) const {
  163.     return false;
  164.   }
  165.  
  166.   bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
  167.                                    AssumptionCache &AC, TargetLibraryInfo *TLI,
  168.                                    DominatorTree *DT,
  169.                                    LoopVectorizationLegality *LVL,
  170.                                    InterleavedAccessInfo *IAI) const {
  171.     return false;
  172.   }
  173.  
  174.   PredicationStyle emitGetActiveLaneMask() const {
  175.     return PredicationStyle::None;
  176.   }
  177.  
  178.   std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
  179.                                                     IntrinsicInst &II) const {
  180.     return std::nullopt;
  181.   }
  182.  
  183.   std::optional<Value *>
  184.   simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
  185.                                    APInt DemandedMask, KnownBits &Known,
  186.                                    bool &KnownBitsComputed) const {
  187.     return std::nullopt;
  188.   }
  189.  
  190.   std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
  191.       InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
  192.       APInt &UndefElts2, APInt &UndefElts3,
  193.       std::function<void(Instruction *, unsigned, APInt, APInt &)>
  194.           SimplifyAndSetOp) const {
  195.     return std::nullopt;
  196.   }
  197.  
  198.   void getUnrollingPreferences(Loop *, ScalarEvolution &,
  199.                                TTI::UnrollingPreferences &,
  200.                                OptimizationRemarkEmitter *) const {}
  201.  
  202.   void getPeelingPreferences(Loop *, ScalarEvolution &,
  203.                              TTI::PeelingPreferences &) const {}
  204.  
  205.   bool isLegalAddImmediate(int64_t Imm) const { return false; }
  206.  
  207.   bool isLegalICmpImmediate(int64_t Imm) const { return false; }
  208.  
  209.   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
  210.                              bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
  211.                              Instruction *I = nullptr) const {
  212.     // Guess that only reg and reg+reg addressing is allowed. This heuristic is
  213.     // taken from the implementation of LSR.
  214.     return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
  215.   }
  216.  
  217.   bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const {
  218.     return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
  219.                     C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
  220.            std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
  221.                     C2.ScaleCost, C2.ImmCost, C2.SetupCost);
  222.   }
  223.  
  224.   bool isNumRegsMajorCostOfLSR() const { return true; }
  225.  
  226.   bool isProfitableLSRChainElement(Instruction *I) const { return false; }
  227.  
  228.   bool canMacroFuseCmp() const { return false; }
  229.  
  230.   bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
  231.                   DominatorTree *DT, AssumptionCache *AC,
  232.                   TargetLibraryInfo *LibInfo) const {
  233.     return false;
  234.   }
  235.  
  236.   TTI::AddressingModeKind
  237.     getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const {
  238.     return TTI::AMK_None;
  239.   }
  240.  
  241.   bool isLegalMaskedStore(Type *DataType, Align Alignment) const {
  242.     return false;
  243.   }
  244.  
  245.   bool isLegalMaskedLoad(Type *DataType, Align Alignment) const {
  246.     return false;
  247.   }
  248.  
  249.   bool isLegalNTStore(Type *DataType, Align Alignment) const {
  250.     // By default, assume nontemporal memory stores are available for stores
  251.     // that are aligned and have a size that is a power of 2.
  252.     unsigned DataSize = DL.getTypeStoreSize(DataType);
  253.     return Alignment >= DataSize && isPowerOf2_32(DataSize);
  254.   }
  255.  
  256.   bool isLegalNTLoad(Type *DataType, Align Alignment) const {
  257.     // By default, assume nontemporal memory loads are available for loads that
  258.     // are aligned and have a size that is a power of 2.
  259.     unsigned DataSize = DL.getTypeStoreSize(DataType);
  260.     return Alignment >= DataSize && isPowerOf2_32(DataSize);
  261.   }
  262.  
  263.   bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const {
  264.     return false;
  265.   }
  266.  
  267.   bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
  268.     return false;
  269.   }
  270.  
  271.   bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
  272.     return false;
  273.   }
  274.  
  275.   bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const {
  276.     return false;
  277.   }
  278.  
  279.   bool forceScalarizeMaskedScatter(VectorType *DataType,
  280.                                    Align Alignment) const {
  281.     return false;
  282.   }
  283.  
  284.   bool isLegalMaskedCompressStore(Type *DataType) const { return false; }
  285.  
  286.   bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
  287.                        const SmallBitVector &OpcodeMask) const {
  288.     return false;
  289.   }
  290.  
  291.   bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
  292.  
  293.   bool enableOrderedReductions() const { return false; }
  294.  
  295.   bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
  296.  
  297.   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
  298.     return false;
  299.   }
  300.  
  301.   bool prefersVectorizedAddressing() const { return true; }
  302.  
  303.   InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
  304.                                        int64_t BaseOffset, bool HasBaseReg,
  305.                                        int64_t Scale,
  306.                                        unsigned AddrSpace) const {
  307.     // Guess that all legal addressing mode are free.
  308.     if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
  309.                               AddrSpace))
  310.       return 0;
  311.     return -1;
  312.   }
  313.  
  314.   bool LSRWithInstrQueries() const { return false; }
  315.  
  316.   bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; }
  317.  
  318.   bool isProfitableToHoist(Instruction *I) const { return true; }
  319.  
  320.   bool useAA() const { return false; }
  321.  
  322.   bool isTypeLegal(Type *Ty) const { return false; }
  323.  
  324.   unsigned getRegUsageForType(Type *Ty) const { return 1; }
  325.  
  326.   bool shouldBuildLookupTables() const { return true; }
  327.  
  328.   bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; }
  329.  
  330.   bool shouldBuildRelLookupTables() const { return false; }
  331.  
  332.   bool useColdCCForColdCall(Function &F) const { return false; }
  333.  
  334.   InstructionCost getScalarizationOverhead(VectorType *Ty,
  335.                                            const APInt &DemandedElts,
  336.                                            bool Insert, bool Extract,
  337.                                            TTI::TargetCostKind CostKind) const {
  338.     return 0;
  339.   }
  340.  
  341.   InstructionCost
  342.   getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
  343.                                    ArrayRef<Type *> Tys,
  344.                                    TTI::TargetCostKind CostKind) const {
  345.     return 0;
  346.   }
  347.  
  348.   bool supportsEfficientVectorElementLoadStore() const { return false; }
  349.  
  350.   bool supportsTailCalls() const { return true; }
  351.  
  352.   bool supportsTailCallFor(const CallBase *CB) const {
  353.     return supportsTailCalls();
  354.   }
  355.  
  356.   bool enableAggressiveInterleaving(bool LoopHasReductions) const {
  357.     return false;
  358.   }
  359.  
  360.   TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
  361.                                                     bool IsZeroCmp) const {
  362.     return {};
  363.   }
  364.  
  365.   bool enableSelectOptimize() const { return true; }
  366.  
  367.   bool enableInterleavedAccessVectorization() const { return false; }
  368.  
  369.   bool enableMaskedInterleavedAccessVectorization() const { return false; }
  370.  
  371.   bool isFPVectorizationPotentiallyUnsafe() const { return false; }
  372.  
  373.   bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
  374.                                       unsigned AddressSpace, Align Alignment,
  375.                                       unsigned *Fast) const {
  376.     return false;
  377.   }
  378.  
  379.   TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
  380.     return TTI::PSK_Software;
  381.   }
  382.  
  383.   bool haveFastSqrt(Type *Ty) const { return false; }
  384.  
  385.   bool isExpensiveToSpeculativelyExecute(const Instruction *I) { return true; }
  386.  
  387.   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
  388.  
  389.   InstructionCost getFPOpCost(Type *Ty) const {
  390.     return TargetTransformInfo::TCC_Basic;
  391.   }
  392.  
  393.   InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
  394.                                         const APInt &Imm, Type *Ty) const {
  395.     return 0;
  396.   }
  397.  
  398.   InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
  399.                                 TTI::TargetCostKind CostKind) const {
  400.     return TTI::TCC_Basic;
  401.   }
  402.  
  403.   InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
  404.                                     const APInt &Imm, Type *Ty,
  405.                                     TTI::TargetCostKind CostKind,
  406.                                     Instruction *Inst = nullptr) const {
  407.     return TTI::TCC_Free;
  408.   }
  409.  
  410.   InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
  411.                                       const APInt &Imm, Type *Ty,
  412.                                       TTI::TargetCostKind CostKind) const {
  413.     return TTI::TCC_Free;
  414.   }
  415.  
  416.   unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
  417.  
  418.   unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
  419.     return Vector ? 1 : 0;
  420.   };
  421.  
  422.   const char *getRegisterClassName(unsigned ClassID) const {
  423.     switch (ClassID) {
  424.     default:
  425.       return "Generic::Unknown Register Class";
  426.     case 0:
  427.       return "Generic::ScalarRC";
  428.     case 1:
  429.       return "Generic::VectorRC";
  430.     }
  431.   }
  432.  
  433.   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
  434.     return TypeSize::getFixed(32);
  435.   }
  436.  
  437.   unsigned getMinVectorRegisterBitWidth() const { return 128; }
  438.  
  439.   std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
  440.   std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
  441.  
  442.   bool
  443.   shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const {
  444.     return false;
  445.   }
  446.  
  447.   ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
  448.     return ElementCount::get(0, IsScalable);
  449.   }
  450.  
  451.   unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; }
  452.   unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { return VF; }
  453.  
  454.   bool shouldConsiderAddressTypePromotion(
  455.       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
  456.     AllowPromotionWithoutCommonHeader = false;
  457.     return false;
  458.   }
  459.  
  460.   unsigned getCacheLineSize() const { return 0; }
  461.   std::optional<unsigned>
  462.   getCacheSize(TargetTransformInfo::CacheLevel Level) const {
  463.     switch (Level) {
  464.     case TargetTransformInfo::CacheLevel::L1D:
  465.       [[fallthrough]];
  466.     case TargetTransformInfo::CacheLevel::L2D:
  467.       return std::nullopt;
  468.     }
  469.     llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
  470.   }
  471.  
  472.   std::optional<unsigned>
  473.   getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
  474.     switch (Level) {
  475.     case TargetTransformInfo::CacheLevel::L1D:
  476.       [[fallthrough]];
  477.     case TargetTransformInfo::CacheLevel::L2D:
  478.       return std::nullopt;
  479.     }
  480.  
  481.     llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
  482.   }
  483.  
  484.   unsigned getPrefetchDistance() const { return 0; }
  485.   unsigned getMinPrefetchStride(unsigned NumMemAccesses,
  486.                                 unsigned NumStridedMemAccesses,
  487.                                 unsigned NumPrefetches, bool HasCall) const {
  488.     return 1;
  489.   }
  490.   unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
  491.   bool enableWritePrefetching() const { return false; }
  492.   bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
  493.  
  494.   unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; }
  495.  
  496.   InstructionCost getArithmeticInstrCost(
  497.       unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
  498.       TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info,
  499.       ArrayRef<const Value *> Args,
  500.       const Instruction *CxtI = nullptr) const {
  501.     // FIXME: A number of transformation tests seem to require these values
  502.     // which seems a little odd for how arbitary there are.
  503.     switch (Opcode) {
  504.     default:
  505.       break;
  506.     case Instruction::FDiv:
  507.     case Instruction::FRem:
  508.     case Instruction::SDiv:
  509.     case Instruction::SRem:
  510.     case Instruction::UDiv:
  511.     case Instruction::URem:
  512.       // FIXME: Unlikely to be true for CodeSize.
  513.       return TTI::TCC_Expensive;
  514.     }
  515.  
  516.     // Assume a 3cy latency for fp arithmetic ops.
  517.     if (CostKind == TTI::TCK_Latency)
  518.       if (Ty->getScalarType()->isFloatingPointTy())
  519.         return 3;
  520.  
  521.     return 1;
  522.   }
  523.  
  524.   InstructionCost
  525.   getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
  526.                  TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
  527.                  ArrayRef<const Value *> Args = std::nullopt) const {
  528.     return 1;
  529.   }
  530.  
  531.   InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
  532.                                    TTI::CastContextHint CCH,
  533.                                    TTI::TargetCostKind CostKind,
  534.                                    const Instruction *I) const {
  535.     switch (Opcode) {
  536.     default:
  537.       break;
  538.     case Instruction::IntToPtr: {
  539.       unsigned SrcSize = Src->getScalarSizeInBits();
  540.       if (DL.isLegalInteger(SrcSize) &&
  541.           SrcSize <= DL.getPointerTypeSizeInBits(Dst))
  542.         return 0;
  543.       break;
  544.     }
  545.     case Instruction::PtrToInt: {
  546.       unsigned DstSize = Dst->getScalarSizeInBits();
  547.       if (DL.isLegalInteger(DstSize) &&
  548.           DstSize >= DL.getPointerTypeSizeInBits(Src))
  549.         return 0;
  550.       break;
  551.     }
  552.     case Instruction::BitCast:
  553.       if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
  554.         // Identity and pointer-to-pointer casts are free.
  555.         return 0;
  556.       break;
  557.     case Instruction::Trunc: {
  558.       // trunc to a native type is free (assuming the target has compare and
  559.       // shift-right of the same width).
  560.       TypeSize DstSize = DL.getTypeSizeInBits(Dst);
  561.       if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue()))
  562.         return 0;
  563.       break;
  564.     }
  565.     }
  566.     return 1;
  567.   }
  568.  
  569.   InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
  570.                                            VectorType *VecTy,
  571.                                            unsigned Index) const {
  572.     return 1;
  573.   }
  574.  
  575.   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
  576.                                  const Instruction *I = nullptr) const {
  577.     // A phi would be free, unless we're costing the throughput because it
  578.     // will require a register.
  579.     if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
  580.       return 0;
  581.     return 1;
  582.   }
  583.  
  584.   InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
  585.                                      CmpInst::Predicate VecPred,
  586.                                      TTI::TargetCostKind CostKind,
  587.                                      const Instruction *I) const {
  588.     return 1;
  589.   }
  590.  
  591.   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
  592.                                      TTI::TargetCostKind CostKind,
  593.                                      unsigned Index, Value *Op0,
  594.                                      Value *Op1) const {
  595.     return 1;
  596.   }
  597.  
  598.   InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
  599.                                      TTI::TargetCostKind CostKind,
  600.                                      unsigned Index) const {
  601.     return 1;
  602.   }
  603.  
  604.   unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
  605.                                      const APInt &DemandedDstElts,
  606.                                      TTI::TargetCostKind CostKind) {
  607.     return 1;
  608.   }
  609.  
  610.   InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
  611.                                   unsigned AddressSpace,
  612.                                   TTI::TargetCostKind CostKind,
  613.                                   TTI::OperandValueInfo OpInfo,
  614.                                   const Instruction *I) const {
  615.     return 1;
  616.   }
  617.  
  618.   InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
  619.                                     unsigned AddressSpace,
  620.                                     TTI::TargetCostKind CostKind,
  621.                                     const Instruction *I) const {
  622.     return 1;
  623.   }
  624.  
  625.   InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
  626.                                         Align Alignment, unsigned AddressSpace,
  627.                                         TTI::TargetCostKind CostKind) const {
  628.     return 1;
  629.   }
  630.  
  631.   InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
  632.                                          const Value *Ptr, bool VariableMask,
  633.                                          Align Alignment,
  634.                                          TTI::TargetCostKind CostKind,
  635.                                          const Instruction *I = nullptr) const {
  636.     return 1;
  637.   }
  638.  
  639.   unsigned getInterleavedMemoryOpCost(
  640.       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
  641.       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
  642.       bool UseMaskForCond, bool UseMaskForGaps) const {
  643.     return 1;
  644.   }
  645.  
  646.   InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
  647.                                         TTI::TargetCostKind CostKind) const {
  648.     switch (ICA.getID()) {
  649.     default:
  650.       break;
  651.     case Intrinsic::annotation:
  652.     case Intrinsic::assume:
  653.     case Intrinsic::sideeffect:
  654.     case Intrinsic::pseudoprobe:
  655.     case Intrinsic::arithmetic_fence:
  656.     case Intrinsic::dbg_declare:
  657.     case Intrinsic::dbg_value:
  658.     case Intrinsic::dbg_label:
  659.     case Intrinsic::invariant_start:
  660.     case Intrinsic::invariant_end:
  661.     case Intrinsic::launder_invariant_group:
  662.     case Intrinsic::strip_invariant_group:
  663.     case Intrinsic::is_constant:
  664.     case Intrinsic::lifetime_start:
  665.     case Intrinsic::lifetime_end:
  666.     case Intrinsic::experimental_noalias_scope_decl:
  667.     case Intrinsic::objectsize:
  668.     case Intrinsic::ptr_annotation:
  669.     case Intrinsic::var_annotation:
  670.     case Intrinsic::experimental_gc_result:
  671.     case Intrinsic::experimental_gc_relocate:
  672.     case Intrinsic::coro_alloc:
  673.     case Intrinsic::coro_begin:
  674.     case Intrinsic::coro_free:
  675.     case Intrinsic::coro_end:
  676.     case Intrinsic::coro_frame:
  677.     case Intrinsic::coro_size:
  678.     case Intrinsic::coro_align:
  679.     case Intrinsic::coro_suspend:
  680.     case Intrinsic::coro_subfn_addr:
  681.     case Intrinsic::threadlocal_address:
  682.       // These intrinsics don't actually represent code after lowering.
  683.       return 0;
  684.     }
  685.     return 1;
  686.   }
  687.  
  688.   InstructionCost getCallInstrCost(Function *F, Type *RetTy,
  689.                                    ArrayRef<Type *> Tys,
  690.                                    TTI::TargetCostKind CostKind) const {
  691.     return 1;
  692.   }
  693.  
  694.   // Assume that we have a register of the right size for the type.
  695.   unsigned getNumberOfParts(Type *Tp) const { return 1; }
  696.  
  697.   InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *,
  698.                                             const SCEV *) const {
  699.     return 0;
  700.   }
  701.  
  702.   InstructionCost getArithmeticReductionCost(unsigned, VectorType *,
  703.                                              std::optional<FastMathFlags> FMF,
  704.                                              TTI::TargetCostKind) const {
  705.     return 1;
  706.   }
  707.  
  708.   InstructionCost getMinMaxReductionCost(VectorType *, VectorType *, bool,
  709.                                          TTI::TargetCostKind) const {
  710.     return 1;
  711.   }
  712.  
  713.   InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
  714.                                            Type *ResTy, VectorType *Ty,
  715.                                            std::optional<FastMathFlags> FMF,
  716.                                            TTI::TargetCostKind CostKind) const {
  717.     return 1;
  718.   }
  719.  
  720.   InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
  721.                                          VectorType *Ty,
  722.                                          TTI::TargetCostKind CostKind) const {
  723.     return 1;
  724.   }
  725.  
  726.   InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
  727.     return 0;
  728.   }
  729.  
  730.   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const {
  731.     return false;
  732.   }
  733.  
  734.   unsigned getAtomicMemIntrinsicMaxElementSize() const {
  735.     // Note for overrides: You must ensure for all element unordered-atomic
  736.     // memory intrinsics that all power-of-2 element sizes up to, and
  737.     // including, the return value of this method have a corresponding
  738.     // runtime lib call. These runtime lib call definitions can be found
  739.     // in RuntimeLibcalls.h
  740.     return 0;
  741.   }
  742.  
  743.   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
  744.                                            Type *ExpectedType) const {
  745.     return nullptr;
  746.   }
  747.  
  748.   Type *
  749.   getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
  750.                             unsigned SrcAddrSpace, unsigned DestAddrSpace,
  751.                             unsigned SrcAlign, unsigned DestAlign,
  752.                             std::optional<uint32_t> AtomicElementSize) const {
  753.     return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
  754.                              : Type::getInt8Ty(Context);
  755.   }
  756.  
  757.   void getMemcpyLoopResidualLoweringType(
  758.       SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
  759.       unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
  760.       unsigned SrcAlign, unsigned DestAlign,
  761.       std::optional<uint32_t> AtomicCpySize) const {
  762.     unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
  763.     Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
  764.     for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
  765.       OpsOut.push_back(OpType);
  766.   }
  767.  
  768.   bool areInlineCompatible(const Function *Caller,
  769.                            const Function *Callee) const {
  770.     return (Caller->getFnAttribute("target-cpu") ==
  771.             Callee->getFnAttribute("target-cpu")) &&
  772.            (Caller->getFnAttribute("target-features") ==
  773.             Callee->getFnAttribute("target-features"));
  774.   }
  775.  
  776.   bool areTypesABICompatible(const Function *Caller, const Function *Callee,
  777.                              const ArrayRef<Type *> &Types) const {
  778.     return (Caller->getFnAttribute("target-cpu") ==
  779.             Callee->getFnAttribute("target-cpu")) &&
  780.            (Caller->getFnAttribute("target-features") ==
  781.             Callee->getFnAttribute("target-features"));
  782.   }
  783.  
  784.   bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
  785.                           const DataLayout &DL) const {
  786.     return false;
  787.   }
  788.  
  789.   bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
  790.                            const DataLayout &DL) const {
  791.     return false;
  792.   }
  793.  
  794.   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
  795.  
  796.   bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
  797.  
  798.   bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
  799.  
  800.   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
  801.                                    unsigned AddrSpace) const {
  802.     return true;
  803.   }
  804.  
  805.   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
  806.                                     unsigned AddrSpace) const {
  807.     return true;
  808.   }
  809.  
  810.   bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
  811.                                    ElementCount VF) const {
  812.     return true;
  813.   }
  814.  
  815.   bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; }
  816.  
  817.   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
  818.                                unsigned ChainSizeInBytes,
  819.                                VectorType *VecTy) const {
  820.     return VF;
  821.   }
  822.  
  823.   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
  824.                                 unsigned ChainSizeInBytes,
  825.                                 VectorType *VecTy) const {
  826.     return VF;
  827.   }
  828.  
  829.   bool preferInLoopReduction(unsigned Opcode, Type *Ty,
  830.                              TTI::ReductionFlags Flags) const {
  831.     return false;
  832.   }
  833.  
  834.   bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
  835.                                        TTI::ReductionFlags Flags) const {
  836.     return false;
  837.   }
  838.  
  839.   bool preferEpilogueVectorization() const {
  840.     return true;
  841.   }
  842.  
  843.   bool shouldExpandReduction(const IntrinsicInst *II) const { return true; }
  844.  
  845.   unsigned getGISelRematGlobalCost() const { return 1; }
  846.  
  847.   unsigned getMinTripCountTailFoldingThreshold() const { return 0; }
  848.  
  849.   bool supportsScalableVectors() const { return false; }
  850.  
  851.   bool enableScalableVectorization() const { return false; }
  852.  
  853.   bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
  854.                              Align Alignment) const {
  855.     return false;
  856.   }
  857.  
  858.   TargetTransformInfo::VPLegalization
  859.   getVPLegalizationStrategy(const VPIntrinsic &PI) const {
  860.     return TargetTransformInfo::VPLegalization(
  861.         /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard,
  862.         /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
  863.   }
  864.  
  865. protected:
  866.   // Obtain the minimum required size to hold the value (without the sign)
  867.   // In case of a vector it returns the min required size for one element.
  868.   unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
  869.     if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
  870.       const auto *VectorValue = cast<Constant>(Val);
  871.  
  872.       // In case of a vector need to pick the max between the min
  873.       // required size for each element
  874.       auto *VT = cast<FixedVectorType>(Val->getType());
  875.  
  876.       // Assume unsigned elements
  877.       isSigned = false;
  878.  
  879.       // The max required size is the size of the vector element type
  880.       unsigned MaxRequiredSize =
  881.           VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
  882.  
  883.       unsigned MinRequiredSize = 0;
  884.       for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
  885.         if (auto *IntElement =
  886.                 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
  887.           bool signedElement = IntElement->getValue().isNegative();
  888.           // Get the element min required size.
  889.           unsigned ElementMinRequiredSize =
  890.               IntElement->getValue().getMinSignedBits() - 1;
  891.           // In case one element is signed then all the vector is signed.
  892.           isSigned |= signedElement;
  893.           // Save the max required bit size between all the elements.
  894.           MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
  895.         } else {
  896.           // not an int constant element
  897.           return MaxRequiredSize;
  898.         }
  899.       }
  900.       return MinRequiredSize;
  901.     }
  902.  
  903.     if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
  904.       isSigned = CI->getValue().isNegative();
  905.       return CI->getValue().getMinSignedBits() - 1;
  906.     }
  907.  
  908.     if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
  909.       isSigned = true;
  910.       return Cast->getSrcTy()->getScalarSizeInBits() - 1;
  911.     }
  912.  
  913.     if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
  914.       isSigned = false;
  915.       return Cast->getSrcTy()->getScalarSizeInBits();
  916.     }
  917.  
  918.     isSigned = false;
  919.     return Val->getType()->getScalarSizeInBits();
  920.   }
  921.  
  922.   bool isStridedAccess(const SCEV *Ptr) const {
  923.     return Ptr && isa<SCEVAddRecExpr>(Ptr);
  924.   }
  925.  
  926.   const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
  927.                                             const SCEV *Ptr) const {
  928.     if (!isStridedAccess(Ptr))
  929.       return nullptr;
  930.     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
  931.     return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
  932.   }
  933.  
  934.   bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
  935.                                        int64_t MergeDistance) const {
  936.     const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
  937.     if (!Step)
  938.       return false;
  939.     APInt StrideVal = Step->getAPInt();
  940.     if (StrideVal.getBitWidth() > 64)
  941.       return false;
  942.     // FIXME: Need to take absolute value for negative stride case.
  943.     return StrideVal.getSExtValue() < MergeDistance;
  944.   }
  945. };
  946.  
  947. /// CRTP base class for use as a mix-in that aids implementing
  948. /// a TargetTransformInfo-compatible class.
  949. template <typename T>
  950. class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
  951. private:
  952.   typedef TargetTransformInfoImplBase BaseT;
  953.  
  954. protected:
  955.   explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
  956.  
  957. public:
  958.   using BaseT::getGEPCost;
  959.  
  960.   InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
  961.                              ArrayRef<const Value *> Operands,
  962.                              TTI::TargetCostKind CostKind) {
  963.     assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
  964.     assert(cast<PointerType>(Ptr->getType()->getScalarType())
  965.                ->isOpaqueOrPointeeTypeMatches(PointeeType) &&
  966.            "explicit pointee type doesn't match operand's pointee type");
  967.     auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
  968.     bool HasBaseReg = (BaseGV == nullptr);
  969.  
  970.     auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
  971.     APInt BaseOffset(PtrSizeBits, 0);
  972.     int64_t Scale = 0;
  973.  
  974.     auto GTI = gep_type_begin(PointeeType, Operands);
  975.     Type *TargetType = nullptr;
  976.  
  977.     // Handle the case where the GEP instruction has a single operand,
  978.     // the basis, therefore TargetType is a nullptr.
  979.     if (Operands.empty())
  980.       return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
  981.  
  982.     for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
  983.       TargetType = GTI.getIndexedType();
  984.       // We assume that the cost of Scalar GEP with constant index and the
  985.       // cost of Vector GEP with splat constant index are the same.
  986.       const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
  987.       if (!ConstIdx)
  988.         if (auto Splat = getSplatValue(*I))
  989.           ConstIdx = dyn_cast<ConstantInt>(Splat);
  990.       if (StructType *STy = GTI.getStructTypeOrNull()) {
  991.         // For structures the index is always splat or scalar constant
  992.         assert(ConstIdx && "Unexpected GEP index");
  993.         uint64_t Field = ConstIdx->getZExtValue();
  994.         BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
  995.       } else {
  996.         // If this operand is a scalable type, bail out early.
  997.         // TODO: handle scalable vectors
  998.         if (isa<ScalableVectorType>(TargetType))
  999.           return TTI::TCC_Basic;
  1000.         int64_t ElementSize =
  1001.             DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue();
  1002.         if (ConstIdx) {
  1003.           BaseOffset +=
  1004.               ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
  1005.         } else {
  1006.           // Needs scale register.
  1007.           if (Scale != 0)
  1008.             // No addressing mode takes two scale registers.
  1009.             return TTI::TCC_Basic;
  1010.           Scale = ElementSize;
  1011.         }
  1012.       }
  1013.     }
  1014.  
  1015.     if (static_cast<T *>(this)->isLegalAddressingMode(
  1016.             TargetType, const_cast<GlobalValue *>(BaseGV),
  1017.             BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
  1018.             Ptr->getType()->getPointerAddressSpace()))
  1019.       return TTI::TCC_Free;
  1020.     return TTI::TCC_Basic;
  1021.   }
  1022.  
  1023.   InstructionCost getInstructionCost(const User *U,
  1024.                                      ArrayRef<const Value *> Operands,
  1025.                                      TTI::TargetCostKind CostKind) {
  1026.     using namespace llvm::PatternMatch;
  1027.  
  1028.     auto *TargetTTI = static_cast<T *>(this);
  1029.     // Handle non-intrinsic calls, invokes, and callbr.
  1030.     // FIXME: Unlikely to be true for anything but CodeSize.
  1031.     auto *CB = dyn_cast<CallBase>(U);
  1032.     if (CB && !isa<IntrinsicInst>(U)) {
  1033.       if (const Function *F = CB->getCalledFunction()) {
  1034.         if (!TargetTTI->isLoweredToCall(F))
  1035.           return TTI::TCC_Basic; // Give a basic cost if it will be lowered
  1036.  
  1037.         return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
  1038.       }
  1039.       // For indirect or other calls, scale cost by number of arguments.
  1040.       return TTI::TCC_Basic * (CB->arg_size() + 1);
  1041.     }
  1042.  
  1043.     Type *Ty = U->getType();
  1044.     unsigned Opcode = Operator::getOpcode(U);
  1045.     auto *I = dyn_cast<Instruction>(U);
  1046.     switch (Opcode) {
  1047.     default:
  1048.       break;
  1049.     case Instruction::Call: {
  1050.       assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
  1051.       auto *Intrinsic = cast<IntrinsicInst>(U);
  1052.       IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
  1053.       return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
  1054.     }
  1055.     case Instruction::Br:
  1056.     case Instruction::Ret:
  1057.     case Instruction::PHI:
  1058.     case Instruction::Switch:
  1059.       return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
  1060.     case Instruction::ExtractValue:
  1061.     case Instruction::Freeze:
  1062.       return TTI::TCC_Free;
  1063.     case Instruction::Alloca:
  1064.       if (cast<AllocaInst>(U)->isStaticAlloca())
  1065.         return TTI::TCC_Free;
  1066.       break;
  1067.     case Instruction::GetElementPtr: {
  1068.       const auto *GEP = cast<GEPOperator>(U);
  1069.       return TargetTTI->getGEPCost(GEP->getSourceElementType(),
  1070.                                    GEP->getPointerOperand(),
  1071.                                    Operands.drop_front(), CostKind);
  1072.     }
  1073.     case Instruction::Add:
  1074.     case Instruction::FAdd:
  1075.     case Instruction::Sub:
  1076.     case Instruction::FSub:
  1077.     case Instruction::Mul:
  1078.     case Instruction::FMul:
  1079.     case Instruction::UDiv:
  1080.     case Instruction::SDiv:
  1081.     case Instruction::FDiv:
  1082.     case Instruction::URem:
  1083.     case Instruction::SRem:
  1084.     case Instruction::FRem:
  1085.     case Instruction::Shl:
  1086.     case Instruction::LShr:
  1087.     case Instruction::AShr:
  1088.     case Instruction::And:
  1089.     case Instruction::Or:
  1090.     case Instruction::Xor:
  1091.     case Instruction::FNeg: {
  1092.       const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(U->getOperand(0));
  1093.       TTI::OperandValueInfo Op2Info;
  1094.       if (Opcode != Instruction::FNeg)
  1095.         Op2Info = TTI::getOperandInfo(U->getOperand(1));
  1096.       SmallVector<const Value *, 2> Operands(U->operand_values());
  1097.       return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
  1098.                                                Op2Info, Operands, I);
  1099.     }
  1100.     case Instruction::IntToPtr:
  1101.     case Instruction::PtrToInt:
  1102.     case Instruction::SIToFP:
  1103.     case Instruction::UIToFP:
  1104.     case Instruction::FPToUI:
  1105.     case Instruction::FPToSI:
  1106.     case Instruction::Trunc:
  1107.     case Instruction::FPTrunc:
  1108.     case Instruction::BitCast:
  1109.     case Instruction::FPExt:
  1110.     case Instruction::SExt:
  1111.     case Instruction::ZExt:
  1112.     case Instruction::AddrSpaceCast: {
  1113.       Type *OpTy = U->getOperand(0)->getType();
  1114.       return TargetTTI->getCastInstrCost(
  1115.           Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
  1116.     }
  1117.     case Instruction::Store: {
  1118.       auto *SI = cast<StoreInst>(U);
  1119.       Type *ValTy = U->getOperand(0)->getType();
  1120.       TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(U->getOperand(0));
  1121.       return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
  1122.                                         SI->getPointerAddressSpace(), CostKind,
  1123.                                         OpInfo, I);
  1124.     }
  1125.     case Instruction::Load: {
  1126.       // FIXME: Arbitary cost which could come from the backend.
  1127.       if (CostKind == TTI::TCK_Latency)
  1128.         return 4;
  1129.       auto *LI = cast<LoadInst>(U);
  1130.       Type *LoadType = U->getType();
  1131.       // If there is a non-register sized type, the cost estimation may expand
  1132.       // it to be several instructions to load into multiple registers on the
  1133.       // target.  But, if the only use of the load is a trunc instruction to a
  1134.       // register sized type, the instruction selector can combine these
  1135.       // instructions to be a single load.  So, in this case, we use the
  1136.       // destination type of the trunc instruction rather than the load to
  1137.       // accurately estimate the cost of this load instruction.
  1138.       if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() &&
  1139.           !LoadType->isVectorTy()) {
  1140.         if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
  1141.           LoadType = TI->getDestTy();
  1142.       }
  1143.       return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
  1144.                                         LI->getPointerAddressSpace(), CostKind,
  1145.                                         {TTI::OK_AnyValue, TTI::OP_None}, I);
  1146.     }
  1147.     case Instruction::Select: {
  1148.       const Value *Op0, *Op1;
  1149.       if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) ||
  1150.           match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
  1151.         // select x, y, false --> x & y
  1152.         // select x, true, y --> x | y
  1153.         const auto Op1Info = TTI::getOperandInfo(Op0);
  1154.         const auto Op2Info = TTI::getOperandInfo(Op1);
  1155.         assert(Op0->getType()->getScalarSizeInBits() == 1 &&
  1156.                Op1->getType()->getScalarSizeInBits() == 1);
  1157.  
  1158.         SmallVector<const Value *, 2> Operands{Op0, Op1};
  1159.         return TargetTTI->getArithmeticInstrCost(
  1160.             match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
  1161.             CostKind, Op1Info, Op2Info, Operands, I);
  1162.       }
  1163.       Type *CondTy = U->getOperand(0)->getType();
  1164.       return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
  1165.                                            CmpInst::BAD_ICMP_PREDICATE,
  1166.                                            CostKind, I);
  1167.     }
  1168.     case Instruction::ICmp:
  1169.     case Instruction::FCmp: {
  1170.       Type *ValTy = U->getOperand(0)->getType();
  1171.       // TODO: Also handle ICmp/FCmp constant expressions.
  1172.       return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
  1173.                                            I ? cast<CmpInst>(I)->getPredicate()
  1174.                                              : CmpInst::BAD_ICMP_PREDICATE,
  1175.                                            CostKind, I);
  1176.     }
  1177.     case Instruction::InsertElement: {
  1178.       auto *IE = dyn_cast<InsertElementInst>(U);
  1179.       if (!IE)
  1180.         return TTI::TCC_Basic; // FIXME
  1181.       unsigned Idx = -1;
  1182.       if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)))
  1183.         if (CI->getValue().getActiveBits() <= 32)
  1184.           Idx = CI->getZExtValue();
  1185.       return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx);
  1186.     }
  1187.     case Instruction::ShuffleVector: {
  1188.       auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
  1189.       if (!Shuffle)
  1190.         return TTI::TCC_Basic; // FIXME
  1191.  
  1192.       auto *VecTy = cast<VectorType>(U->getType());
  1193.       auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType());
  1194.       int NumSubElts, SubIndex;
  1195.  
  1196.       if (Shuffle->changesLength()) {
  1197.         // Treat a 'subvector widening' as a free shuffle.
  1198.         if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
  1199.           return 0;
  1200.  
  1201.         if (Shuffle->isExtractSubvectorMask(SubIndex))
  1202.           return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
  1203.                                            Shuffle->getShuffleMask(), CostKind,
  1204.                                            SubIndex, VecTy, Operands);
  1205.  
  1206.         if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
  1207.           return TargetTTI->getShuffleCost(
  1208.               TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(),
  1209.               CostKind, SubIndex,
  1210.               FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
  1211.               Operands);
  1212.  
  1213.         int ReplicationFactor, VF;
  1214.         if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
  1215.           APInt DemandedDstElts =
  1216.               APInt::getNullValue(Shuffle->getShuffleMask().size());
  1217.           for (auto I : enumerate(Shuffle->getShuffleMask())) {
  1218.             if (I.value() != UndefMaskElem)
  1219.               DemandedDstElts.setBit(I.index());
  1220.           }
  1221.           return TargetTTI->getReplicationShuffleCost(
  1222.               VecSrcTy->getElementType(), ReplicationFactor, VF,
  1223.               DemandedDstElts, CostKind);
  1224.         }
  1225.  
  1226.         return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
  1227.       }
  1228.  
  1229.       if (Shuffle->isIdentity())
  1230.         return 0;
  1231.  
  1232.       if (Shuffle->isReverse())
  1233.         return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy,
  1234.                                          Shuffle->getShuffleMask(), CostKind, 0,
  1235.                                          nullptr, Operands);
  1236.  
  1237.       if (Shuffle->isSelect())
  1238.         return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy,
  1239.                                          Shuffle->getShuffleMask(), CostKind, 0,
  1240.                                          nullptr, Operands);
  1241.  
  1242.       if (Shuffle->isTranspose())
  1243.         return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy,
  1244.                                          Shuffle->getShuffleMask(), CostKind, 0,
  1245.                                          nullptr, Operands);
  1246.  
  1247.       if (Shuffle->isZeroEltSplat())
  1248.         return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy,
  1249.                                          Shuffle->getShuffleMask(), CostKind, 0,
  1250.                                          nullptr, Operands);
  1251.  
  1252.       if (Shuffle->isSingleSource())
  1253.         return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
  1254.                                          Shuffle->getShuffleMask(), CostKind, 0,
  1255.                                          nullptr, Operands);
  1256.  
  1257.       if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
  1258.         return TargetTTI->getShuffleCost(
  1259.             TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), CostKind,
  1260.             SubIndex, FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
  1261.             Operands);
  1262.  
  1263.       if (Shuffle->isSplice(SubIndex))
  1264.         return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy,
  1265.                                          Shuffle->getShuffleMask(), CostKind,
  1266.                                          SubIndex, nullptr, Operands);
  1267.  
  1268.       return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy,
  1269.                                        Shuffle->getShuffleMask(), CostKind, 0,
  1270.                                        nullptr, Operands);
  1271.     }
  1272.     case Instruction::ExtractElement: {
  1273.       auto *EEI = dyn_cast<ExtractElementInst>(U);
  1274.       if (!EEI)
  1275.         return TTI::TCC_Basic; // FIXME
  1276.       unsigned Idx = -1;
  1277.       if (auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1)))
  1278.         if (CI->getValue().getActiveBits() <= 32)
  1279.           Idx = CI->getZExtValue();
  1280.       Type *DstTy = U->getOperand(0)->getType();
  1281.       return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
  1282.     }
  1283.     }
  1284.  
  1285.     // By default, just classify everything as 'basic' or -1 to represent that
  1286.     // don't know the throughput cost.
  1287.     return CostKind == TTI::TCK_RecipThroughput ? -1 : TTI::TCC_Basic;
  1288.   }
  1289.  
  1290.   bool isExpensiveToSpeculativelyExecute(const Instruction *I) {
  1291.     auto *TargetTTI = static_cast<T *>(this);
  1292.     SmallVector<const Value *, 4> Ops(I->operand_values());
  1293.     InstructionCost Cost = TargetTTI->getInstructionCost(
  1294.         I, Ops, TargetTransformInfo::TCK_SizeAndLatency);
  1295.     return Cost >= TargetTransformInfo::TCC_Expensive;
  1296.   }
  1297. };
  1298. } // namespace llvm
  1299.  
  1300. #endif
  1301.