Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
//===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
/// \file
9
/// This file provides helpers for the implementation of
10
/// a TargetTransformInfo-conforming class.
11
///
12
//===----------------------------------------------------------------------===//
13
 
14
#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15
#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16
 
17
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
18
#include "llvm/Analysis/TargetTransformInfo.h"
19
#include "llvm/Analysis/VectorUtils.h"
20
#include "llvm/IR/DataLayout.h"
21
#include "llvm/IR/GetElementPtrTypeIterator.h"
22
#include "llvm/IR/IntrinsicInst.h"
23
#include "llvm/IR/Operator.h"
24
#include "llvm/IR/PatternMatch.h"
25
#include <optional>
26
#include <utility>
27
 
28
namespace llvm {
29
 
30
class Function;
31
 
32
/// Base class for use as a mix-in that aids implementing
33
/// a TargetTransformInfo-compatible class.
34
class TargetTransformInfoImplBase {
35
protected:
36
  typedef TargetTransformInfo TTI;
37
 
38
  const DataLayout &DL;
39
 
40
  explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
41
 
42
public:
43
  // Provide value semantics. MSVC requires that we spell all of these out.
44
  TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default;
45
  TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
46
 
47
  const DataLayout &getDataLayout() const { return DL; }
48
 
49
  InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
50
                             ArrayRef<const Value *> Operands,
51
                             TTI::TargetCostKind CostKind) const {
52
    // In the basic model, we just assume that all-constant GEPs will be folded
53
    // into their uses via addressing modes.
54
    for (const Value *Operand : Operands)
55
      if (!isa<Constant>(Operand))
56
        return TTI::TCC_Basic;
57
 
58
    return TTI::TCC_Free;
59
  }
60
 
61
  unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
62
                                            unsigned &JTSize,
63
                                            ProfileSummaryInfo *PSI,
64
                                            BlockFrequencyInfo *BFI) const {
65
    (void)PSI;
66
    (void)BFI;
67
    JTSize = 0;
68
    return SI.getNumCases();
69
  }
70
 
71
  unsigned getInliningThresholdMultiplier() const { return 1; }
72
  unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
73
 
74
  int getInlinerVectorBonusPercent() const { return 150; }
75
 
76
  InstructionCost getMemcpyCost(const Instruction *I) const {
77
    return TTI::TCC_Expensive;
78
  }
79
 
80
  // Although this default value is arbitrary, it is not random. It is assumed
81
  // that a condition that evaluates the same way by a higher percentage than
82
  // this is best represented as control flow. Therefore, the default value N
83
  // should be set such that the win from N% correct executions is greater than
84
  // the loss from (100 - N)% mispredicted executions for the majority of
85
  //  intended targets.
86
  BranchProbability getPredictableBranchThreshold() const {
87
    return BranchProbability(99, 100);
88
  }
89
 
90
  bool hasBranchDivergence() const { return false; }
91
 
92
  bool useGPUDivergenceAnalysis() const { return false; }
93
 
94
  bool isSourceOfDivergence(const Value *V) const { return false; }
95
 
96
  bool isAlwaysUniform(const Value *V) const { return false; }
97
 
98
  unsigned getFlatAddressSpace() const { return -1; }
99
 
100
  bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
101
                                  Intrinsic::ID IID) const {
102
    return false;
103
  }
104
 
105
  bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
106
  bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
107
    return AS == 0;
108
  };
109
 
110
  unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
111
 
112
  bool isSingleThreaded() const { return false; }
113
 
114
  std::pair<const Value *, unsigned>
115
  getPredicatedAddrSpace(const Value *V) const {
116
    return std::make_pair(nullptr, -1);
117
  }
118
 
119
  Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
120
                                          Value *NewV) const {
121
    return nullptr;
122
  }
123
 
124
  bool isLoweredToCall(const Function *F) const {
125
    assert(F && "A concrete function must be provided to this routine.");
126
 
127
    // FIXME: These should almost certainly not be handled here, and instead
128
    // handled with the help of TLI or the target itself. This was largely
129
    // ported from existing analysis heuristics here so that such refactorings
130
    // can take place in the future.
131
 
132
    if (F->isIntrinsic())
133
      return false;
134
 
135
    if (F->hasLocalLinkage() || !F->hasName())
136
      return true;
137
 
138
    StringRef Name = F->getName();
139
 
140
    // These will all likely lower to a single selection DAG node.
141
    if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
142
        Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
143
        Name == "fmin" || Name == "fminf" || Name == "fminl" ||
144
        Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
145
        Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
146
        Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
147
      return false;
148
 
149
    // These are all likely to be optimized into something smaller.
150
    if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
151
        Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
152
        Name == "floorf" || Name == "ceil" || Name == "round" ||
153
        Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
154
        Name == "llabs")
155
      return false;
156
 
157
    return true;
158
  }
159
 
160
  bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
161
                                AssumptionCache &AC, TargetLibraryInfo *LibInfo,
162
                                HardwareLoopInfo &HWLoopInfo) const {
163
    return false;
164
  }
165
 
166
  bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
167
                                   AssumptionCache &AC, TargetLibraryInfo *TLI,
168
                                   DominatorTree *DT,
169
                                   LoopVectorizationLegality *LVL,
170
                                   InterleavedAccessInfo *IAI) const {
171
    return false;
172
  }
173
 
174
  PredicationStyle emitGetActiveLaneMask() const {
175
    return PredicationStyle::None;
176
  }
177
 
178
  std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
179
                                                    IntrinsicInst &II) const {
180
    return std::nullopt;
181
  }
182
 
183
  std::optional<Value *>
184
  simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
185
                                   APInt DemandedMask, KnownBits &Known,
186
                                   bool &KnownBitsComputed) const {
187
    return std::nullopt;
188
  }
189
 
190
  std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
191
      InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
192
      APInt &UndefElts2, APInt &UndefElts3,
193
      std::function<void(Instruction *, unsigned, APInt, APInt &)>
194
          SimplifyAndSetOp) const {
195
    return std::nullopt;
196
  }
197
 
198
  void getUnrollingPreferences(Loop *, ScalarEvolution &,
199
                               TTI::UnrollingPreferences &,
200
                               OptimizationRemarkEmitter *) const {}
201
 
202
  void getPeelingPreferences(Loop *, ScalarEvolution &,
203
                             TTI::PeelingPreferences &) const {}
204
 
205
  bool isLegalAddImmediate(int64_t Imm) const { return false; }
206
 
207
  bool isLegalICmpImmediate(int64_t Imm) const { return false; }
208
 
209
  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
210
                             bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
211
                             Instruction *I = nullptr) const {
212
    // Guess that only reg and reg+reg addressing is allowed. This heuristic is
213
    // taken from the implementation of LSR.
214
    return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
215
  }
216
 
217
  bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const {
218
    return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
219
                    C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
220
           std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
221
                    C2.ScaleCost, C2.ImmCost, C2.SetupCost);
222
  }
223
 
224
  bool isNumRegsMajorCostOfLSR() const { return true; }
225
 
226
  bool isProfitableLSRChainElement(Instruction *I) const { return false; }
227
 
228
  bool canMacroFuseCmp() const { return false; }
229
 
230
  bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
231
                  DominatorTree *DT, AssumptionCache *AC,
232
                  TargetLibraryInfo *LibInfo) const {
233
    return false;
234
  }
235
 
236
  TTI::AddressingModeKind
237
    getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const {
238
    return TTI::AMK_None;
239
  }
240
 
241
  bool isLegalMaskedStore(Type *DataType, Align Alignment) const {
242
    return false;
243
  }
244
 
245
  bool isLegalMaskedLoad(Type *DataType, Align Alignment) const {
246
    return false;
247
  }
248
 
249
  bool isLegalNTStore(Type *DataType, Align Alignment) const {
250
    // By default, assume nontemporal memory stores are available for stores
251
    // that are aligned and have a size that is a power of 2.
252
    unsigned DataSize = DL.getTypeStoreSize(DataType);
253
    return Alignment >= DataSize && isPowerOf2_32(DataSize);
254
  }
255
 
256
  bool isLegalNTLoad(Type *DataType, Align Alignment) const {
257
    // By default, assume nontemporal memory loads are available for loads that
258
    // are aligned and have a size that is a power of 2.
259
    unsigned DataSize = DL.getTypeStoreSize(DataType);
260
    return Alignment >= DataSize && isPowerOf2_32(DataSize);
261
  }
262
 
263
  bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const {
264
    return false;
265
  }
266
 
267
  bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
268
    return false;
269
  }
270
 
271
  bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
272
    return false;
273
  }
274
 
275
  bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const {
276
    return false;
277
  }
278
 
279
  bool forceScalarizeMaskedScatter(VectorType *DataType,
280
                                   Align Alignment) const {
281
    return false;
282
  }
283
 
284
  bool isLegalMaskedCompressStore(Type *DataType) const { return false; }
285
 
286
  bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
287
                       const SmallBitVector &OpcodeMask) const {
288
    return false;
289
  }
290
 
291
  bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
292
 
293
  bool enableOrderedReductions() const { return false; }
294
 
295
  bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
296
 
297
  bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
298
    return false;
299
  }
300
 
301
  bool prefersVectorizedAddressing() const { return true; }
302
 
303
  InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
304
                                       int64_t BaseOffset, bool HasBaseReg,
305
                                       int64_t Scale,
306
                                       unsigned AddrSpace) const {
307
    // Guess that all legal addressing mode are free.
308
    if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
309
                              AddrSpace))
310
      return 0;
311
    return -1;
312
  }
313
 
314
  bool LSRWithInstrQueries() const { return false; }
315
 
316
  bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; }
317
 
318
  bool isProfitableToHoist(Instruction *I) const { return true; }
319
 
320
  bool useAA() const { return false; }
321
 
322
  bool isTypeLegal(Type *Ty) const { return false; }
323
 
324
  unsigned getRegUsageForType(Type *Ty) const { return 1; }
325
 
326
  bool shouldBuildLookupTables() const { return true; }
327
 
328
  bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; }
329
 
330
  bool shouldBuildRelLookupTables() const { return false; }
331
 
332
  bool useColdCCForColdCall(Function &F) const { return false; }
333
 
334
  InstructionCost getScalarizationOverhead(VectorType *Ty,
335
                                           const APInt &DemandedElts,
336
                                           bool Insert, bool Extract,
337
                                           TTI::TargetCostKind CostKind) const {
338
    return 0;
339
  }
340
 
341
  InstructionCost
342
  getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
343
                                   ArrayRef<Type *> Tys,
344
                                   TTI::TargetCostKind CostKind) const {
345
    return 0;
346
  }
347
 
348
  bool supportsEfficientVectorElementLoadStore() const { return false; }
349
 
350
  bool supportsTailCalls() const { return true; }
351
 
352
  bool supportsTailCallFor(const CallBase *CB) const {
353
    return supportsTailCalls();
354
  }
355
 
356
  bool enableAggressiveInterleaving(bool LoopHasReductions) const {
357
    return false;
358
  }
359
 
360
  TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
361
                                                    bool IsZeroCmp) const {
362
    return {};
363
  }
364
 
365
  bool enableSelectOptimize() const { return true; }
366
 
367
  bool enableInterleavedAccessVectorization() const { return false; }
368
 
369
  bool enableMaskedInterleavedAccessVectorization() const { return false; }
370
 
371
  bool isFPVectorizationPotentiallyUnsafe() const { return false; }
372
 
373
  bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
374
                                      unsigned AddressSpace, Align Alignment,
375
                                      unsigned *Fast) const {
376
    return false;
377
  }
378
 
379
  TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
380
    return TTI::PSK_Software;
381
  }
382
 
383
  bool haveFastSqrt(Type *Ty) const { return false; }
384
 
385
  bool isExpensiveToSpeculativelyExecute(const Instruction *I) { return true; }
386
 
387
  bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
388
 
389
  InstructionCost getFPOpCost(Type *Ty) const {
390
    return TargetTransformInfo::TCC_Basic;
391
  }
392
 
393
  InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
394
                                        const APInt &Imm, Type *Ty) const {
395
    return 0;
396
  }
397
 
398
  InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
399
                                TTI::TargetCostKind CostKind) const {
400
    return TTI::TCC_Basic;
401
  }
402
 
403
  InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
404
                                    const APInt &Imm, Type *Ty,
405
                                    TTI::TargetCostKind CostKind,
406
                                    Instruction *Inst = nullptr) const {
407
    return TTI::TCC_Free;
408
  }
409
 
410
  InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
411
                                      const APInt &Imm, Type *Ty,
412
                                      TTI::TargetCostKind CostKind) const {
413
    return TTI::TCC_Free;
414
  }
415
 
416
  unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
417
 
418
  unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
419
    return Vector ? 1 : 0;
420
  };
421
 
422
  const char *getRegisterClassName(unsigned ClassID) const {
423
    switch (ClassID) {
424
    default:
425
      return "Generic::Unknown Register Class";
426
    case 0:
427
      return "Generic::ScalarRC";
428
    case 1:
429
      return "Generic::VectorRC";
430
    }
431
  }
432
 
433
  TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
434
    return TypeSize::getFixed(32);
435
  }
436
 
437
  unsigned getMinVectorRegisterBitWidth() const { return 128; }
438
 
439
  std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
440
  std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
441
 
442
  bool
443
  shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const {
444
    return false;
445
  }
446
 
447
  ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
448
    return ElementCount::get(0, IsScalable);
449
  }
450
 
451
  unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; }
452
  unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { return VF; }
453
 
454
  bool shouldConsiderAddressTypePromotion(
455
      const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
456
    AllowPromotionWithoutCommonHeader = false;
457
    return false;
458
  }
459
 
460
  unsigned getCacheLineSize() const { return 0; }
461
  std::optional<unsigned>
462
  getCacheSize(TargetTransformInfo::CacheLevel Level) const {
463
    switch (Level) {
464
    case TargetTransformInfo::CacheLevel::L1D:
465
      [[fallthrough]];
466
    case TargetTransformInfo::CacheLevel::L2D:
467
      return std::nullopt;
468
    }
469
    llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
470
  }
471
 
472
  std::optional<unsigned>
473
  getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
474
    switch (Level) {
475
    case TargetTransformInfo::CacheLevel::L1D:
476
      [[fallthrough]];
477
    case TargetTransformInfo::CacheLevel::L2D:
478
      return std::nullopt;
479
    }
480
 
481
    llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
482
  }
483
 
484
  unsigned getPrefetchDistance() const { return 0; }
485
  unsigned getMinPrefetchStride(unsigned NumMemAccesses,
486
                                unsigned NumStridedMemAccesses,
487
                                unsigned NumPrefetches, bool HasCall) const {
488
    return 1;
489
  }
490
  unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
491
  bool enableWritePrefetching() const { return false; }
492
  bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
493
 
494
  unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; }
495
 
496
  InstructionCost getArithmeticInstrCost(
497
      unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
498
      TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info,
499
      ArrayRef<const Value *> Args,
500
      const Instruction *CxtI = nullptr) const {
501
    // FIXME: A number of transformation tests seem to require these values
502
    // which seems a little odd for how arbitary there are.
503
    switch (Opcode) {
504
    default:
505
      break;
506
    case Instruction::FDiv:
507
    case Instruction::FRem:
508
    case Instruction::SDiv:
509
    case Instruction::SRem:
510
    case Instruction::UDiv:
511
    case Instruction::URem:
512
      // FIXME: Unlikely to be true for CodeSize.
513
      return TTI::TCC_Expensive;
514
    }
515
 
516
    // Assume a 3cy latency for fp arithmetic ops.
517
    if (CostKind == TTI::TCK_Latency)
518
      if (Ty->getScalarType()->isFloatingPointTy())
519
        return 3;
520
 
521
    return 1;
522
  }
523
 
524
  InstructionCost
525
  getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
526
                 TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
527
                 ArrayRef<const Value *> Args = std::nullopt) const {
528
    return 1;
529
  }
530
 
531
  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
532
                                   TTI::CastContextHint CCH,
533
                                   TTI::TargetCostKind CostKind,
534
                                   const Instruction *I) const {
535
    switch (Opcode) {
536
    default:
537
      break;
538
    case Instruction::IntToPtr: {
539
      unsigned SrcSize = Src->getScalarSizeInBits();
540
      if (DL.isLegalInteger(SrcSize) &&
541
          SrcSize <= DL.getPointerTypeSizeInBits(Dst))
542
        return 0;
543
      break;
544
    }
545
    case Instruction::PtrToInt: {
546
      unsigned DstSize = Dst->getScalarSizeInBits();
547
      if (DL.isLegalInteger(DstSize) &&
548
          DstSize >= DL.getPointerTypeSizeInBits(Src))
549
        return 0;
550
      break;
551
    }
552
    case Instruction::BitCast:
553
      if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
554
        // Identity and pointer-to-pointer casts are free.
555
        return 0;
556
      break;
557
    case Instruction::Trunc: {
558
      // trunc to a native type is free (assuming the target has compare and
559
      // shift-right of the same width).
560
      TypeSize DstSize = DL.getTypeSizeInBits(Dst);
561
      if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue()))
562
        return 0;
563
      break;
564
    }
565
    }
566
    return 1;
567
  }
568
 
569
  InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
570
                                           VectorType *VecTy,
571
                                           unsigned Index) const {
572
    return 1;
573
  }
574
 
575
  InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
576
                                 const Instruction *I = nullptr) const {
577
    // A phi would be free, unless we're costing the throughput because it
578
    // will require a register.
579
    if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
580
      return 0;
581
    return 1;
582
  }
583
 
584
  InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
585
                                     CmpInst::Predicate VecPred,
586
                                     TTI::TargetCostKind CostKind,
587
                                     const Instruction *I) const {
588
    return 1;
589
  }
590
 
591
  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
592
                                     TTI::TargetCostKind CostKind,
593
                                     unsigned Index, Value *Op0,
594
                                     Value *Op1) const {
595
    return 1;
596
  }
597
 
598
  InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
599
                                     TTI::TargetCostKind CostKind,
600
                                     unsigned Index) const {
601
    return 1;
602
  }
603
 
604
  unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
605
                                     const APInt &DemandedDstElts,
606
                                     TTI::TargetCostKind CostKind) {
607
    return 1;
608
  }
609
 
610
  InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
611
                                  unsigned AddressSpace,
612
                                  TTI::TargetCostKind CostKind,
613
                                  TTI::OperandValueInfo OpInfo,
614
                                  const Instruction *I) const {
615
    return 1;
616
  }
617
 
618
  InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
619
                                    unsigned AddressSpace,
620
                                    TTI::TargetCostKind CostKind,
621
                                    const Instruction *I) const {
622
    return 1;
623
  }
624
 
625
  InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
626
                                        Align Alignment, unsigned AddressSpace,
627
                                        TTI::TargetCostKind CostKind) const {
628
    return 1;
629
  }
630
 
631
  InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
632
                                         const Value *Ptr, bool VariableMask,
633
                                         Align Alignment,
634
                                         TTI::TargetCostKind CostKind,
635
                                         const Instruction *I = nullptr) const {
636
    return 1;
637
  }
638
 
639
  unsigned getInterleavedMemoryOpCost(
640
      unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
641
      Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
642
      bool UseMaskForCond, bool UseMaskForGaps) const {
643
    return 1;
644
  }
645
 
646
  InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
647
                                        TTI::TargetCostKind CostKind) const {
648
    switch (ICA.getID()) {
649
    default:
650
      break;
651
    case Intrinsic::annotation:
652
    case Intrinsic::assume:
653
    case Intrinsic::sideeffect:
654
    case Intrinsic::pseudoprobe:
655
    case Intrinsic::arithmetic_fence:
656
    case Intrinsic::dbg_declare:
657
    case Intrinsic::dbg_value:
658
    case Intrinsic::dbg_label:
659
    case Intrinsic::invariant_start:
660
    case Intrinsic::invariant_end:
661
    case Intrinsic::launder_invariant_group:
662
    case Intrinsic::strip_invariant_group:
663
    case Intrinsic::is_constant:
664
    case Intrinsic::lifetime_start:
665
    case Intrinsic::lifetime_end:
666
    case Intrinsic::experimental_noalias_scope_decl:
667
    case Intrinsic::objectsize:
668
    case Intrinsic::ptr_annotation:
669
    case Intrinsic::var_annotation:
670
    case Intrinsic::experimental_gc_result:
671
    case Intrinsic::experimental_gc_relocate:
672
    case Intrinsic::coro_alloc:
673
    case Intrinsic::coro_begin:
674
    case Intrinsic::coro_free:
675
    case Intrinsic::coro_end:
676
    case Intrinsic::coro_frame:
677
    case Intrinsic::coro_size:
678
    case Intrinsic::coro_align:
679
    case Intrinsic::coro_suspend:
680
    case Intrinsic::coro_subfn_addr:
681
    case Intrinsic::threadlocal_address:
682
      // These intrinsics don't actually represent code after lowering.
683
      return 0;
684
    }
685
    return 1;
686
  }
687
 
688
  InstructionCost getCallInstrCost(Function *F, Type *RetTy,
689
                                   ArrayRef<Type *> Tys,
690
                                   TTI::TargetCostKind CostKind) const {
691
    return 1;
692
  }
693
 
694
  // Assume that we have a register of the right size for the type.
695
  unsigned getNumberOfParts(Type *Tp) const { return 1; }
696
 
697
  InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *,
698
                                            const SCEV *) const {
699
    return 0;
700
  }
701
 
702
  InstructionCost getArithmeticReductionCost(unsigned, VectorType *,
703
                                             std::optional<FastMathFlags> FMF,
704
                                             TTI::TargetCostKind) const {
705
    return 1;
706
  }
707
 
708
  InstructionCost getMinMaxReductionCost(VectorType *, VectorType *, bool,
709
                                         TTI::TargetCostKind) const {
710
    return 1;
711
  }
712
 
713
  InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
714
                                           Type *ResTy, VectorType *Ty,
715
                                           std::optional<FastMathFlags> FMF,
716
                                           TTI::TargetCostKind CostKind) const {
717
    return 1;
718
  }
719
 
720
  InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
721
                                         VectorType *Ty,
722
                                         TTI::TargetCostKind CostKind) const {
723
    return 1;
724
  }
725
 
726
  InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
727
    return 0;
728
  }
729
 
730
  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const {
731
    return false;
732
  }
733
 
734
  unsigned getAtomicMemIntrinsicMaxElementSize() const {
735
    // Note for overrides: You must ensure for all element unordered-atomic
736
    // memory intrinsics that all power-of-2 element sizes up to, and
737
    // including, the return value of this method have a corresponding
738
    // runtime lib call. These runtime lib call definitions can be found
739
    // in RuntimeLibcalls.h
740
    return 0;
741
  }
742
 
743
  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
744
                                           Type *ExpectedType) const {
745
    return nullptr;
746
  }
747
 
748
  Type *
749
  getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
750
                            unsigned SrcAddrSpace, unsigned DestAddrSpace,
751
                            unsigned SrcAlign, unsigned DestAlign,
752
                            std::optional<uint32_t> AtomicElementSize) const {
753
    return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
754
                             : Type::getInt8Ty(Context);
755
  }
756
 
757
  void getMemcpyLoopResidualLoweringType(
758
      SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
759
      unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
760
      unsigned SrcAlign, unsigned DestAlign,
761
      std::optional<uint32_t> AtomicCpySize) const {
762
    unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
763
    Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
764
    for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
765
      OpsOut.push_back(OpType);
766
  }
767
 
768
  bool areInlineCompatible(const Function *Caller,
769
                           const Function *Callee) const {
770
    return (Caller->getFnAttribute("target-cpu") ==
771
            Callee->getFnAttribute("target-cpu")) &&
772
           (Caller->getFnAttribute("target-features") ==
773
            Callee->getFnAttribute("target-features"));
774
  }
775
 
776
  bool areTypesABICompatible(const Function *Caller, const Function *Callee,
777
                             const ArrayRef<Type *> &Types) const {
778
    return (Caller->getFnAttribute("target-cpu") ==
779
            Callee->getFnAttribute("target-cpu")) &&
780
           (Caller->getFnAttribute("target-features") ==
781
            Callee->getFnAttribute("target-features"));
782
  }
783
 
784
  bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
785
                          const DataLayout &DL) const {
786
    return false;
787
  }
788
 
789
  bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
790
                           const DataLayout &DL) const {
791
    return false;
792
  }
793
 
794
  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
795
 
796
  bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
797
 
798
  bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
799
 
800
  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
801
                                   unsigned AddrSpace) const {
802
    return true;
803
  }
804
 
805
  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
806
                                    unsigned AddrSpace) const {
807
    return true;
808
  }
809
 
810
  bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
811
                                   ElementCount VF) const {
812
    return true;
813
  }
814
 
815
  bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; }
816
 
817
  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
818
                               unsigned ChainSizeInBytes,
819
                               VectorType *VecTy) const {
820
    return VF;
821
  }
822
 
823
  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
824
                                unsigned ChainSizeInBytes,
825
                                VectorType *VecTy) const {
826
    return VF;
827
  }
828
 
829
  bool preferInLoopReduction(unsigned Opcode, Type *Ty,
830
                             TTI::ReductionFlags Flags) const {
831
    return false;
832
  }
833
 
834
  bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
835
                                       TTI::ReductionFlags Flags) const {
836
    return false;
837
  }
838
 
839
  bool preferEpilogueVectorization() const {
840
    return true;
841
  }
842
 
843
  bool shouldExpandReduction(const IntrinsicInst *II) const { return true; }
844
 
845
  unsigned getGISelRematGlobalCost() const { return 1; }
846
 
847
  unsigned getMinTripCountTailFoldingThreshold() const { return 0; }
848
 
849
  bool supportsScalableVectors() const { return false; }
850
 
851
  bool enableScalableVectorization() const { return false; }
852
 
853
  bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
854
                             Align Alignment) const {
855
    return false;
856
  }
857
 
858
  TargetTransformInfo::VPLegalization
859
  getVPLegalizationStrategy(const VPIntrinsic &PI) const {
860
    return TargetTransformInfo::VPLegalization(
861
        /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard,
862
        /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
863
  }
864
 
865
protected:
866
  // Obtain the minimum required size to hold the value (without the sign)
867
  // In case of a vector it returns the min required size for one element.
868
  unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
869
    if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
870
      const auto *VectorValue = cast<Constant>(Val);
871
 
872
      // In case of a vector need to pick the max between the min
873
      // required size for each element
874
      auto *VT = cast<FixedVectorType>(Val->getType());
875
 
876
      // Assume unsigned elements
877
      isSigned = false;
878
 
879
      // The max required size is the size of the vector element type
880
      unsigned MaxRequiredSize =
881
          VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
882
 
883
      unsigned MinRequiredSize = 0;
884
      for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
885
        if (auto *IntElement =
886
                dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
887
          bool signedElement = IntElement->getValue().isNegative();
888
          // Get the element min required size.
889
          unsigned ElementMinRequiredSize =
890
              IntElement->getValue().getMinSignedBits() - 1;
891
          // In case one element is signed then all the vector is signed.
892
          isSigned |= signedElement;
893
          // Save the max required bit size between all the elements.
894
          MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
895
        } else {
896
          // not an int constant element
897
          return MaxRequiredSize;
898
        }
899
      }
900
      return MinRequiredSize;
901
    }
902
 
903
    if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
904
      isSigned = CI->getValue().isNegative();
905
      return CI->getValue().getMinSignedBits() - 1;
906
    }
907
 
908
    if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
909
      isSigned = true;
910
      return Cast->getSrcTy()->getScalarSizeInBits() - 1;
911
    }
912
 
913
    if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
914
      isSigned = false;
915
      return Cast->getSrcTy()->getScalarSizeInBits();
916
    }
917
 
918
    isSigned = false;
919
    return Val->getType()->getScalarSizeInBits();
920
  }
921
 
922
  bool isStridedAccess(const SCEV *Ptr) const {
923
    return Ptr && isa<SCEVAddRecExpr>(Ptr);
924
  }
925
 
926
  const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
927
                                            const SCEV *Ptr) const {
928
    if (!isStridedAccess(Ptr))
929
      return nullptr;
930
    const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
931
    return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
932
  }
933
 
934
  bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
935
                                       int64_t MergeDistance) const {
936
    const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
937
    if (!Step)
938
      return false;
939
    APInt StrideVal = Step->getAPInt();
940
    if (StrideVal.getBitWidth() > 64)
941
      return false;
942
    // FIXME: Need to take absolute value for negative stride case.
943
    return StrideVal.getSExtValue() < MergeDistance;
944
  }
945
};
946
 
947
/// CRTP base class for use as a mix-in that aids implementing
948
/// a TargetTransformInfo-compatible class.
949
template <typename T>
950
class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
951
private:
952
  typedef TargetTransformInfoImplBase BaseT;
953
 
954
protected:
955
  explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
956
 
957
public:
958
  using BaseT::getGEPCost;
959
 
960
  InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
961
                             ArrayRef<const Value *> Operands,
962
                             TTI::TargetCostKind CostKind) {
963
    assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
964
    assert(cast<PointerType>(Ptr->getType()->getScalarType())
965
               ->isOpaqueOrPointeeTypeMatches(PointeeType) &&
966
           "explicit pointee type doesn't match operand's pointee type");
967
    auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
968
    bool HasBaseReg = (BaseGV == nullptr);
969
 
970
    auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
971
    APInt BaseOffset(PtrSizeBits, 0);
972
    int64_t Scale = 0;
973
 
974
    auto GTI = gep_type_begin(PointeeType, Operands);
975
    Type *TargetType = nullptr;
976
 
977
    // Handle the case where the GEP instruction has a single operand,
978
    // the basis, therefore TargetType is a nullptr.
979
    if (Operands.empty())
980
      return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
981
 
982
    for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
983
      TargetType = GTI.getIndexedType();
984
      // We assume that the cost of Scalar GEP with constant index and the
985
      // cost of Vector GEP with splat constant index are the same.
986
      const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
987
      if (!ConstIdx)
988
        if (auto Splat = getSplatValue(*I))
989
          ConstIdx = dyn_cast<ConstantInt>(Splat);
990
      if (StructType *STy = GTI.getStructTypeOrNull()) {
991
        // For structures the index is always splat or scalar constant
992
        assert(ConstIdx && "Unexpected GEP index");
993
        uint64_t Field = ConstIdx->getZExtValue();
994
        BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
995
      } else {
996
        // If this operand is a scalable type, bail out early.
997
        // TODO: handle scalable vectors
998
        if (isa<ScalableVectorType>(TargetType))
999
          return TTI::TCC_Basic;
1000
        int64_t ElementSize =
1001
            DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue();
1002
        if (ConstIdx) {
1003
          BaseOffset +=
1004
              ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
1005
        } else {
1006
          // Needs scale register.
1007
          if (Scale != 0)
1008
            // No addressing mode takes two scale registers.
1009
            return TTI::TCC_Basic;
1010
          Scale = ElementSize;
1011
        }
1012
      }
1013
    }
1014
 
1015
    if (static_cast<T *>(this)->isLegalAddressingMode(
1016
            TargetType, const_cast<GlobalValue *>(BaseGV),
1017
            BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
1018
            Ptr->getType()->getPointerAddressSpace()))
1019
      return TTI::TCC_Free;
1020
    return TTI::TCC_Basic;
1021
  }
1022
 
1023
  InstructionCost getInstructionCost(const User *U,
1024
                                     ArrayRef<const Value *> Operands,
1025
                                     TTI::TargetCostKind CostKind) {
1026
    using namespace llvm::PatternMatch;
1027
 
1028
    auto *TargetTTI = static_cast<T *>(this);
1029
    // Handle non-intrinsic calls, invokes, and callbr.
1030
    // FIXME: Unlikely to be true for anything but CodeSize.
1031
    auto *CB = dyn_cast<CallBase>(U);
1032
    if (CB && !isa<IntrinsicInst>(U)) {
1033
      if (const Function *F = CB->getCalledFunction()) {
1034
        if (!TargetTTI->isLoweredToCall(F))
1035
          return TTI::TCC_Basic; // Give a basic cost if it will be lowered
1036
 
1037
        return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
1038
      }
1039
      // For indirect or other calls, scale cost by number of arguments.
1040
      return TTI::TCC_Basic * (CB->arg_size() + 1);
1041
    }
1042
 
1043
    Type *Ty = U->getType();
1044
    unsigned Opcode = Operator::getOpcode(U);
1045
    auto *I = dyn_cast<Instruction>(U);
1046
    switch (Opcode) {
1047
    default:
1048
      break;
1049
    case Instruction::Call: {
1050
      assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
1051
      auto *Intrinsic = cast<IntrinsicInst>(U);
1052
      IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
1053
      return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
1054
    }
1055
    case Instruction::Br:
1056
    case Instruction::Ret:
1057
    case Instruction::PHI:
1058
    case Instruction::Switch:
1059
      return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
1060
    case Instruction::ExtractValue:
1061
    case Instruction::Freeze:
1062
      return TTI::TCC_Free;
1063
    case Instruction::Alloca:
1064
      if (cast<AllocaInst>(U)->isStaticAlloca())
1065
        return TTI::TCC_Free;
1066
      break;
1067
    case Instruction::GetElementPtr: {
1068
      const auto *GEP = cast<GEPOperator>(U);
1069
      return TargetTTI->getGEPCost(GEP->getSourceElementType(),
1070
                                   GEP->getPointerOperand(),
1071
                                   Operands.drop_front(), CostKind);
1072
    }
1073
    case Instruction::Add:
1074
    case Instruction::FAdd:
1075
    case Instruction::Sub:
1076
    case Instruction::FSub:
1077
    case Instruction::Mul:
1078
    case Instruction::FMul:
1079
    case Instruction::UDiv:
1080
    case Instruction::SDiv:
1081
    case Instruction::FDiv:
1082
    case Instruction::URem:
1083
    case Instruction::SRem:
1084
    case Instruction::FRem:
1085
    case Instruction::Shl:
1086
    case Instruction::LShr:
1087
    case Instruction::AShr:
1088
    case Instruction::And:
1089
    case Instruction::Or:
1090
    case Instruction::Xor:
1091
    case Instruction::FNeg: {
1092
      const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(U->getOperand(0));
1093
      TTI::OperandValueInfo Op2Info;
1094
      if (Opcode != Instruction::FNeg)
1095
        Op2Info = TTI::getOperandInfo(U->getOperand(1));
1096
      SmallVector<const Value *, 2> Operands(U->operand_values());
1097
      return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
1098
                                               Op2Info, Operands, I);
1099
    }
1100
    case Instruction::IntToPtr:
1101
    case Instruction::PtrToInt:
1102
    case Instruction::SIToFP:
1103
    case Instruction::UIToFP:
1104
    case Instruction::FPToUI:
1105
    case Instruction::FPToSI:
1106
    case Instruction::Trunc:
1107
    case Instruction::FPTrunc:
1108
    case Instruction::BitCast:
1109
    case Instruction::FPExt:
1110
    case Instruction::SExt:
1111
    case Instruction::ZExt:
1112
    case Instruction::AddrSpaceCast: {
1113
      Type *OpTy = U->getOperand(0)->getType();
1114
      return TargetTTI->getCastInstrCost(
1115
          Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
1116
    }
1117
    case Instruction::Store: {
1118
      auto *SI = cast<StoreInst>(U);
1119
      Type *ValTy = U->getOperand(0)->getType();
1120
      TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(U->getOperand(0));
1121
      return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1122
                                        SI->getPointerAddressSpace(), CostKind,
1123
                                        OpInfo, I);
1124
    }
1125
    case Instruction::Load: {
1126
      // FIXME: Arbitary cost which could come from the backend.
1127
      if (CostKind == TTI::TCK_Latency)
1128
        return 4;
1129
      auto *LI = cast<LoadInst>(U);
1130
      Type *LoadType = U->getType();
1131
      // If there is a non-register sized type, the cost estimation may expand
1132
      // it to be several instructions to load into multiple registers on the
1133
      // target.  But, if the only use of the load is a trunc instruction to a
1134
      // register sized type, the instruction selector can combine these
1135
      // instructions to be a single load.  So, in this case, we use the
1136
      // destination type of the trunc instruction rather than the load to
1137
      // accurately estimate the cost of this load instruction.
1138
      if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() &&
1139
          !LoadType->isVectorTy()) {
1140
        if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1141
          LoadType = TI->getDestTy();
1142
      }
1143
      return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1144
                                        LI->getPointerAddressSpace(), CostKind,
1145
                                        {TTI::OK_AnyValue, TTI::OP_None}, I);
1146
    }
1147
    case Instruction::Select: {
1148
      const Value *Op0, *Op1;
1149
      if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) ||
1150
          match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
1151
        // select x, y, false --> x & y
1152
        // select x, true, y --> x | y
1153
        const auto Op1Info = TTI::getOperandInfo(Op0);
1154
        const auto Op2Info = TTI::getOperandInfo(Op1);
1155
        assert(Op0->getType()->getScalarSizeInBits() == 1 &&
1156
               Op1->getType()->getScalarSizeInBits() == 1);
1157
 
1158
        SmallVector<const Value *, 2> Operands{Op0, Op1};
1159
        return TargetTTI->getArithmeticInstrCost(
1160
            match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
1161
            CostKind, Op1Info, Op2Info, Operands, I);
1162
      }
1163
      Type *CondTy = U->getOperand(0)->getType();
1164
      return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1165
                                           CmpInst::BAD_ICMP_PREDICATE,
1166
                                           CostKind, I);
1167
    }
1168
    case Instruction::ICmp:
1169
    case Instruction::FCmp: {
1170
      Type *ValTy = U->getOperand(0)->getType();
1171
      // TODO: Also handle ICmp/FCmp constant expressions.
1172
      return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1173
                                           I ? cast<CmpInst>(I)->getPredicate()
1174
                                             : CmpInst::BAD_ICMP_PREDICATE,
1175
                                           CostKind, I);
1176
    }
1177
    case Instruction::InsertElement: {
1178
      auto *IE = dyn_cast<InsertElementInst>(U);
1179
      if (!IE)
1180
        return TTI::TCC_Basic; // FIXME
1181
      unsigned Idx = -1;
1182
      if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)))
1183
        if (CI->getValue().getActiveBits() <= 32)
1184
          Idx = CI->getZExtValue();
1185
      return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx);
1186
    }
1187
    case Instruction::ShuffleVector: {
1188
      auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1189
      if (!Shuffle)
1190
        return TTI::TCC_Basic; // FIXME
1191
 
1192
      auto *VecTy = cast<VectorType>(U->getType());
1193
      auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType());
1194
      int NumSubElts, SubIndex;
1195
 
1196
      if (Shuffle->changesLength()) {
1197
        // Treat a 'subvector widening' as a free shuffle.
1198
        if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1199
          return 0;
1200
 
1201
        if (Shuffle->isExtractSubvectorMask(SubIndex))
1202
          return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
1203
                                           Shuffle->getShuffleMask(), CostKind,
1204
                                           SubIndex, VecTy, Operands);
1205
 
1206
        if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1207
          return TargetTTI->getShuffleCost(
1208
              TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(),
1209
              CostKind, SubIndex,
1210
              FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
1211
              Operands);
1212
 
1213
        int ReplicationFactor, VF;
1214
        if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1215
          APInt DemandedDstElts =
1216
              APInt::getNullValue(Shuffle->getShuffleMask().size());
1217
          for (auto I : enumerate(Shuffle->getShuffleMask())) {
1218
            if (I.value() != UndefMaskElem)
1219
              DemandedDstElts.setBit(I.index());
1220
          }
1221
          return TargetTTI->getReplicationShuffleCost(
1222
              VecSrcTy->getElementType(), ReplicationFactor, VF,
1223
              DemandedDstElts, CostKind);
1224
        }
1225
 
1226
        return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
1227
      }
1228
 
1229
      if (Shuffle->isIdentity())
1230
        return 0;
1231
 
1232
      if (Shuffle->isReverse())
1233
        return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy,
1234
                                         Shuffle->getShuffleMask(), CostKind, 0,
1235
                                         nullptr, Operands);
1236
 
1237
      if (Shuffle->isSelect())
1238
        return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy,
1239
                                         Shuffle->getShuffleMask(), CostKind, 0,
1240
                                         nullptr, Operands);
1241
 
1242
      if (Shuffle->isTranspose())
1243
        return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy,
1244
                                         Shuffle->getShuffleMask(), CostKind, 0,
1245
                                         nullptr, Operands);
1246
 
1247
      if (Shuffle->isZeroEltSplat())
1248
        return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy,
1249
                                         Shuffle->getShuffleMask(), CostKind, 0,
1250
                                         nullptr, Operands);
1251
 
1252
      if (Shuffle->isSingleSource())
1253
        return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
1254
                                         Shuffle->getShuffleMask(), CostKind, 0,
1255
                                         nullptr, Operands);
1256
 
1257
      if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1258
        return TargetTTI->getShuffleCost(
1259
            TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), CostKind,
1260
            SubIndex, FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
1261
            Operands);
1262
 
1263
      if (Shuffle->isSplice(SubIndex))
1264
        return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy,
1265
                                         Shuffle->getShuffleMask(), CostKind,
1266
                                         SubIndex, nullptr, Operands);
1267
 
1268
      return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy,
1269
                                       Shuffle->getShuffleMask(), CostKind, 0,
1270
                                       nullptr, Operands);
1271
    }
1272
    case Instruction::ExtractElement: {
1273
      auto *EEI = dyn_cast<ExtractElementInst>(U);
1274
      if (!EEI)
1275
        return TTI::TCC_Basic; // FIXME
1276
      unsigned Idx = -1;
1277
      if (auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1)))
1278
        if (CI->getValue().getActiveBits() <= 32)
1279
          Idx = CI->getZExtValue();
1280
      Type *DstTy = U->getOperand(0)->getType();
1281
      return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
1282
    }
1283
    }
1284
 
1285
    // By default, just classify everything as 'basic' or -1 to represent that
1286
    // don't know the throughput cost.
1287
    return CostKind == TTI::TCK_RecipThroughput ? -1 : TTI::TCC_Basic;
1288
  }
1289
 
1290
  bool isExpensiveToSpeculativelyExecute(const Instruction *I) {
1291
    auto *TargetTTI = static_cast<T *>(this);
1292
    SmallVector<const Value *, 4> Ops(I->operand_values());
1293
    InstructionCost Cost = TargetTTI->getInstructionCost(
1294
        I, Ops, TargetTransformInfo::TCK_SizeAndLatency);
1295
    return Cost >= TargetTransformInfo::TCC_Expensive;
1296
  }
1297
};
1298
} // namespace llvm
1299
 
1300
#endif