WebSVN – QNX 8.QNX8 LLVM/Clang compiler suite – Blame – //llvm-build/x86_64/include/llvm/Analysis/TargetTransformInfoImpl.h

Rev	Author	Line No.	Line
14	pmbaty	1	//===- TargetTransformInfoImpl.h --------------------------------- C++ --===//
		2	//
		3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
		4	// See https://llvm.org/LICENSE.txt for license information.
		5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
		6	//
		7	//===----------------------------------------------------------------------===//
		8	/// \file
		9	/// This file provides helpers for the implementation of
		10	/// a TargetTransformInfo-conforming class.
		11	///
		12	//===----------------------------------------------------------------------===//
		13
		14	#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
		15	#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
		16
		17	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
		18	#include "llvm/Analysis/TargetTransformInfo.h"
		19	#include "llvm/Analysis/VectorUtils.h"
		20	#include "llvm/IR/DataLayout.h"
		21	#include "llvm/IR/GetElementPtrTypeIterator.h"
		22	#include "llvm/IR/IntrinsicInst.h"
		23	#include "llvm/IR/Operator.h"
		24	#include "llvm/IR/PatternMatch.h"
		25	#include <optional>
		26	#include <utility>
		27
		28	namespace llvm {
		29
		30	class Function;
		31
		32	/// Base class for use as a mix-in that aids implementing
		33	/// a TargetTransformInfo-compatible class.
		34	class TargetTransformInfoImplBase {
		35	protected:
		36	typedef TargetTransformInfo TTI;
		37
		38	const DataLayout &DL;
		39
		40	explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
		41
		42	public:
		43	// Provide value semantics. MSVC requires that we spell all of these out.
		44	TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default;
		45	TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
		46
		47	const DataLayout &getDataLayout() const { return DL; }
		48
		49	InstructionCost getGEPCost(Type PointeeType, const Value Ptr,
		50	ArrayRef<const Value *> Operands,
		51	TTI::TargetCostKind CostKind) const {
		52	// In the basic model, we just assume that all-constant GEPs will be folded
		53	// into their uses via addressing modes.
		54	for (const Value *Operand : Operands)
		55	if (!isa<Constant>(Operand))
		56	return TTI::TCC_Basic;
		57
		58	return TTI::TCC_Free;
		59	}
		60
		61	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
		62	unsigned &JTSize,
		63	ProfileSummaryInfo *PSI,
		64	BlockFrequencyInfo *BFI) const {
		65	(void)PSI;
		66	(void)BFI;
		67	JTSize = 0;
		68	return SI.getNumCases();
		69	}
		70
		71	unsigned getInliningThresholdMultiplier() const { return 1; }
		72	unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
		73
		74	int getInlinerVectorBonusPercent() const { return 150; }
		75
		76	InstructionCost getMemcpyCost(const Instruction *I) const {
		77	return TTI::TCC_Expensive;
		78	}
		79
		80	// Although this default value is arbitrary, it is not random. It is assumed
		81	// that a condition that evaluates the same way by a higher percentage than
		82	// this is best represented as control flow. Therefore, the default value N
		83	// should be set such that the win from N% correct executions is greater than
		84	// the loss from (100 - N)% mispredicted executions for the majority of
		85	// intended targets.
		86	BranchProbability getPredictableBranchThreshold() const {
		87	return BranchProbability(99, 100);
		88	}
		89
		90	bool hasBranchDivergence() const { return false; }
		91
		92	bool useGPUDivergenceAnalysis() const { return false; }
		93
		94	bool isSourceOfDivergence(const Value *V) const { return false; }
		95
		96	bool isAlwaysUniform(const Value *V) const { return false; }
		97
		98	unsigned getFlatAddressSpace() const { return -1; }
		99
		100	bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
		101	Intrinsic::ID IID) const {
		102	return false;
		103	}
		104
		105	bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
		106	bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
		107	return AS == 0;
		108	};
		109
		110	unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
		111
		112	bool isSingleThreaded() const { return false; }
		113
		114	std::pair<const Value *, unsigned>
		115	getPredicatedAddrSpace(const Value *V) const {
		116	return std::make_pair(nullptr, -1);
		117	}
		118
		119	Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II, Value *OldV,
		120	Value *NewV) const {
		121	return nullptr;
		122	}
		123
		124	bool isLoweredToCall(const Function *F) const {
		125	assert(F && "A concrete function must be provided to this routine.");
		126
		127	// FIXME: These should almost certainly not be handled here, and instead
		128	// handled with the help of TLI or the target itself. This was largely
		129	// ported from existing analysis heuristics here so that such refactorings
		130	// can take place in the future.
		131
		132	if (F->isIntrinsic())
		133	return false;
		134
		135	if (F->hasLocalLinkage() \|\| !F->hasName())
		136	return true;
		137
		138	StringRef Name = F->getName();
		139
		140	// These will all likely lower to a single selection DAG node.
		141	if (Name == "copysign" \|\| Name == "copysignf" \|\| Name == "copysignl" \|\|
		142	Name == "fabs" \|\| Name == "fabsf" \|\| Name == "fabsl" \|\| Name == "sin" \|\|
		143	Name == "fmin" \|\| Name == "fminf" \|\| Name == "fminl" \|\|
		144	Name == "fmax" \|\| Name == "fmaxf" \|\| Name == "fmaxl" \|\|
		145	Name == "sinf" \|\| Name == "sinl" \|\| Name == "cos" \|\| Name == "cosf" \|\|
		146	Name == "cosl" \|\| Name == "sqrt" \|\| Name == "sqrtf" \|\| Name == "sqrtl")
		147	return false;
		148
		149	// These are all likely to be optimized into something smaller.
		150	if (Name == "pow" \|\| Name == "powf" \|\| Name == "powl" \|\| Name == "exp2" \|\|
		151	Name == "exp2l" \|\| Name == "exp2f" \|\| Name == "floor" \|\|
		152	Name == "floorf" \|\| Name == "ceil" \|\| Name == "round" \|\|
		153	Name == "ffs" \|\| Name == "ffsl" \|\| Name == "abs" \|\| Name == "labs" \|\|
		154	Name == "llabs")
		155	return false;
		156
		157	return true;
		158	}
		159
		160	bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
		161	AssumptionCache &AC, TargetLibraryInfo *LibInfo,
		162	HardwareLoopInfo &HWLoopInfo) const {
		163	return false;
		164	}
		165
		166	bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
		167	AssumptionCache &AC, TargetLibraryInfo *TLI,
		168	DominatorTree *DT,
		169	LoopVectorizationLegality *LVL,
		170	InterleavedAccessInfo *IAI) const {
		171	return false;
		172	}
		173
		174	PredicationStyle emitGetActiveLaneMask() const {
		175	return PredicationStyle::None;
		176	}
		177
		178	std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
		179	IntrinsicInst &II) const {
		180	return std::nullopt;
		181	}
		182
		183	std::optional<Value *>
		184	simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
		185	APInt DemandedMask, KnownBits &Known,
		186	bool &KnownBitsComputed) const {
		187	return std::nullopt;
		188	}
		189
		190	std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
		191	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
		192	APInt &UndefElts2, APInt &UndefElts3,
		193	std::function<void(Instruction *, unsigned, APInt, APInt &)>
		194	SimplifyAndSetOp) const {
		195	return std::nullopt;
		196	}
		197
		198	void getUnrollingPreferences(Loop *, ScalarEvolution &,
		199	TTI::UnrollingPreferences &,
		200	OptimizationRemarkEmitter *) const {}
		201
		202	void getPeelingPreferences(Loop *, ScalarEvolution &,
		203	TTI::PeelingPreferences &) const {}
		204
		205	bool isLegalAddImmediate(int64_t Imm) const { return false; }
		206
		207	bool isLegalICmpImmediate(int64_t Imm) const { return false; }
		208
		209	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
		210	bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
		211	Instruction *I = nullptr) const {
		212	// Guess that only reg and reg+reg addressing is allowed. This heuristic is
		213	// taken from the implementation of LSR.
		214	return !BaseGV && BaseOffset == 0 && (Scale == 0 \|\| Scale == 1);
		215	}
		216
		217	bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const {
		218	return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
		219	C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
		220	std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
		221	C2.ScaleCost, C2.ImmCost, C2.SetupCost);
		222	}
		223
		224	bool isNumRegsMajorCostOfLSR() const { return true; }
		225
		226	bool isProfitableLSRChainElement(Instruction *I) const { return false; }
		227
		228	bool canMacroFuseCmp() const { return false; }
		229
		230	bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE, LoopInfo *LI,
		231	DominatorTree DT, AssumptionCache AC,
		232	TargetLibraryInfo *LibInfo) const {
		233	return false;
		234	}
		235
		236	TTI::AddressingModeKind
		237	getPreferredAddressingMode(const Loop L, ScalarEvolution SE) const {
		238	return TTI::AMK_None;
		239	}
		240
		241	bool isLegalMaskedStore(Type *DataType, Align Alignment) const {
		242	return false;
		243	}
		244
		245	bool isLegalMaskedLoad(Type *DataType, Align Alignment) const {
		246	return false;
		247	}
		248
		249	bool isLegalNTStore(Type *DataType, Align Alignment) const {
		250	// By default, assume nontemporal memory stores are available for stores
		251	// that are aligned and have a size that is a power of 2.
		252	unsigned DataSize = DL.getTypeStoreSize(DataType);
		253	return Alignment >= DataSize && isPowerOf2_32(DataSize);
		254	}
		255
		256	bool isLegalNTLoad(Type *DataType, Align Alignment) const {
		257	// By default, assume nontemporal memory loads are available for loads that
		258	// are aligned and have a size that is a power of 2.
		259	unsigned DataSize = DL.getTypeStoreSize(DataType);
		260	return Alignment >= DataSize && isPowerOf2_32(DataSize);
		261	}
		262
		263	bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const {
		264	return false;
		265	}
		266
		267	bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
		268	return false;
		269	}
		270
		271	bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
		272	return false;
		273	}
		274
		275	bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const {
		276	return false;
		277	}
		278
		279	bool forceScalarizeMaskedScatter(VectorType *DataType,
		280	Align Alignment) const {
		281	return false;
		282	}
		283
		284	bool isLegalMaskedCompressStore(Type *DataType) const { return false; }
		285
		286	bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
		287	const SmallBitVector &OpcodeMask) const {
		288	return false;
		289	}
		290
		291	bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
		292
		293	bool enableOrderedReductions() const { return false; }
		294
		295	bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
		296
		297	bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
		298	return false;
		299	}
		300
		301	bool prefersVectorizedAddressing() const { return true; }
		302
		303	InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
		304	int64_t BaseOffset, bool HasBaseReg,
		305	int64_t Scale,
		306	unsigned AddrSpace) const {
		307	// Guess that all legal addressing mode are free.
		308	if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
		309	AddrSpace))
		310	return 0;
		311	return -1;
		312	}
		313
		314	bool LSRWithInstrQueries() const { return false; }
		315
		316	bool isTruncateFree(Type Ty1, Type Ty2) const { return false; }
		317
		318	bool isProfitableToHoist(Instruction *I) const { return true; }
		319
		320	bool useAA() const { return false; }
		321
		322	bool isTypeLegal(Type *Ty) const { return false; }
		323
		324	unsigned getRegUsageForType(Type *Ty) const { return 1; }
		325
		326	bool shouldBuildLookupTables() const { return true; }
		327
		328	bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; }
		329
		330	bool shouldBuildRelLookupTables() const { return false; }
		331
		332	bool useColdCCForColdCall(Function &F) const { return false; }
		333
		334	InstructionCost getScalarizationOverhead(VectorType *Ty,
		335	const APInt &DemandedElts,
		336	bool Insert, bool Extract,
		337	TTI::TargetCostKind CostKind) const {
		338	return 0;
		339	}
		340
		341	InstructionCost
		342	getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
		343	ArrayRef<Type *> Tys,
		344	TTI::TargetCostKind CostKind) const {
		345	return 0;
		346	}
		347
		348	bool supportsEfficientVectorElementLoadStore() const { return false; }
		349
		350	bool supportsTailCalls() const { return true; }
		351
		352	bool supportsTailCallFor(const CallBase *CB) const {
		353	return supportsTailCalls();
		354	}
		355
		356	bool enableAggressiveInterleaving(bool LoopHasReductions) const {
		357	return false;
		358	}
		359
		360	TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
		361	bool IsZeroCmp) const {
		362	return {};
		363	}
		364
		365	bool enableSelectOptimize() const { return true; }
		366
		367	bool enableInterleavedAccessVectorization() const { return false; }
		368
		369	bool enableMaskedInterleavedAccessVectorization() const { return false; }
		370
		371	bool isFPVectorizationPotentiallyUnsafe() const { return false; }
		372
		373	bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
		374	unsigned AddressSpace, Align Alignment,
		375	unsigned *Fast) const {
		376	return false;
		377	}
		378
		379	TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
		380	return TTI::PSK_Software;
		381	}
		382
		383	bool haveFastSqrt(Type *Ty) const { return false; }
		384
		385	bool isExpensiveToSpeculativelyExecute(const Instruction *I) { return true; }
		386
		387	bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
		388
		389	InstructionCost getFPOpCost(Type *Ty) const {
		390	return TargetTransformInfo::TCC_Basic;
		391	}
		392
		393	InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
		394	const APInt &Imm, Type *Ty) const {
		395	return 0;
		396	}
		397
		398	InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
		399	TTI::TargetCostKind CostKind) const {
		400	return TTI::TCC_Basic;
		401	}
		402
		403	InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
		404	const APInt &Imm, Type *Ty,
		405	TTI::TargetCostKind CostKind,
		406	Instruction *Inst = nullptr) const {
		407	return TTI::TCC_Free;
		408	}
		409
		410	InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
		411	const APInt &Imm, Type *Ty,
		412	TTI::TargetCostKind CostKind) const {
		413	return TTI::TCC_Free;
		414	}
		415
		416	unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
		417
		418	unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
		419	return Vector ? 1 : 0;
		420	};
		421
		422	const char *getRegisterClassName(unsigned ClassID) const {
		423	switch (ClassID) {
		424	default:
		425	return "Generic::Unknown Register Class";
		426	case 0:
		427	return "Generic::ScalarRC";
		428	case 1:
		429	return "Generic::VectorRC";
		430	}
		431	}
		432
		433	TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
		434	return TypeSize::getFixed(32);
		435	}
		436
		437	unsigned getMinVectorRegisterBitWidth() const { return 128; }
		438
		439	std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
		440	std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
		441
		442	bool
		443	shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const {
		444	return false;
		445	}
		446
		447	ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
		448	return ElementCount::get(0, IsScalable);
		449	}
		450
		451	unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; }
		452	unsigned getStoreMinimumVF(unsigned VF, Type , Type ) const { return VF; }
		453
		454	bool shouldConsiderAddressTypePromotion(
		455	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
		456	AllowPromotionWithoutCommonHeader = false;
		457	return false;
		458	}
		459
		460	unsigned getCacheLineSize() const { return 0; }
		461	std::optional<unsigned>
		462	getCacheSize(TargetTransformInfo::CacheLevel Level) const {
		463	switch (Level) {
		464	case TargetTransformInfo::CacheLevel::L1D:
		465	[[fallthrough]];
		466	case TargetTransformInfo::CacheLevel::L2D:
		467	return std::nullopt;
		468	}
		469	llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
		470	}
		471
		472	std::optional<unsigned>
		473	getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
		474	switch (Level) {
		475	case TargetTransformInfo::CacheLevel::L1D:
		476	[[fallthrough]];
		477	case TargetTransformInfo::CacheLevel::L2D:
		478	return std::nullopt;
		479	}
		480
		481	llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
		482	}
		483
		484	unsigned getPrefetchDistance() const { return 0; }
		485	unsigned getMinPrefetchStride(unsigned NumMemAccesses,
		486	unsigned NumStridedMemAccesses,
		487	unsigned NumPrefetches, bool HasCall) const {
		488	return 1;
		489	}
		490	unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
		491	bool enableWritePrefetching() const { return false; }
		492	bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
		493
		494	unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; }
		495
		496	InstructionCost getArithmeticInstrCost(
		497	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
		498	TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info,
		499	ArrayRef<const Value *> Args,
		500	const Instruction *CxtI = nullptr) const {
		501	// FIXME: A number of transformation tests seem to require these values
		502	// which seems a little odd for how arbitary there are.
		503	switch (Opcode) {
		504	default:
		505	break;
		506	case Instruction::FDiv:
		507	case Instruction::FRem:
		508	case Instruction::SDiv:
		509	case Instruction::SRem:
		510	case Instruction::UDiv:
		511	case Instruction::URem:
		512	// FIXME: Unlikely to be true for CodeSize.
		513	return TTI::TCC_Expensive;
		514	}
		515
		516	// Assume a 3cy latency for fp arithmetic ops.
		517	if (CostKind == TTI::TCK_Latency)
		518	if (Ty->getScalarType()->isFloatingPointTy())
		519	return 3;
		520
		521	return 1;
		522	}
		523
		524	InstructionCost
		525	getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
		526	TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
		527	ArrayRef<const Value *> Args = std::nullopt) const {
		528	return 1;
		529	}
		530
		531	InstructionCost getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
		532	TTI::CastContextHint CCH,
		533	TTI::TargetCostKind CostKind,
		534	const Instruction *I) const {
		535	switch (Opcode) {
		536	default:
		537	break;
		538	case Instruction::IntToPtr: {
		539	unsigned SrcSize = Src->getScalarSizeInBits();
		540	if (DL.isLegalInteger(SrcSize) &&
		541	SrcSize <= DL.getPointerTypeSizeInBits(Dst))
		542	return 0;
		543	break;
		544	}
		545	case Instruction::PtrToInt: {
		546	unsigned DstSize = Dst->getScalarSizeInBits();
		547	if (DL.isLegalInteger(DstSize) &&
		548	DstSize >= DL.getPointerTypeSizeInBits(Src))
		549	return 0;
		550	break;
		551	}
		552	case Instruction::BitCast:
		553	if (Dst == Src \|\| (Dst->isPointerTy() && Src->isPointerTy()))
		554	// Identity and pointer-to-pointer casts are free.
		555	return 0;
		556	break;
		557	case Instruction::Trunc: {
		558	// trunc to a native type is free (assuming the target has compare and
		559	// shift-right of the same width).
		560	TypeSize DstSize = DL.getTypeSizeInBits(Dst);
		561	if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue()))
		562	return 0;
		563	break;
		564	}
		565	}
		566	return 1;
		567	}
		568
		569	InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
		570	VectorType *VecTy,
		571	unsigned Index) const {
		572	return 1;
		573	}
		574
		575	InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
		576	const Instruction *I = nullptr) const {
		577	// A phi would be free, unless we're costing the throughput because it
		578	// will require a register.
		579	if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
		580	return 0;
		581	return 1;
		582	}
		583
		584	InstructionCost getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy,
		585	CmpInst::Predicate VecPred,
		586	TTI::TargetCostKind CostKind,
		587	const Instruction *I) const {
		588	return 1;
		589	}
		590
		591	InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
		592	TTI::TargetCostKind CostKind,
		593	unsigned Index, Value *Op0,
		594	Value *Op1) const {
		595	return 1;
		596	}
		597
		598	InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
		599	TTI::TargetCostKind CostKind,
		600	unsigned Index) const {
		601	return 1;
		602	}
		603
		604	unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
		605	const APInt &DemandedDstElts,
		606	TTI::TargetCostKind CostKind) {
		607	return 1;
		608	}
		609
		610	InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
		611	unsigned AddressSpace,
		612	TTI::TargetCostKind CostKind,
		613	TTI::OperandValueInfo OpInfo,
		614	const Instruction *I) const {
		615	return 1;
		616	}
		617
		618	InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
		619	unsigned AddressSpace,
		620	TTI::TargetCostKind CostKind,
		621	const Instruction *I) const {
		622	return 1;
		623	}
		624
		625	InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
		626	Align Alignment, unsigned AddressSpace,
		627	TTI::TargetCostKind CostKind) const {
		628	return 1;
		629	}
		630
		631	InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
		632	const Value *Ptr, bool VariableMask,
		633	Align Alignment,
		634	TTI::TargetCostKind CostKind,
		635	const Instruction *I = nullptr) const {
		636	return 1;
		637	}
		638
		639	unsigned getInterleavedMemoryOpCost(
		640	unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
		641	Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
		642	bool UseMaskForCond, bool UseMaskForGaps) const {
		643	return 1;
		644	}
		645
		646	InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
		647	TTI::TargetCostKind CostKind) const {
		648	switch (ICA.getID()) {
		649	default:
		650	break;
		651	case Intrinsic::annotation:
		652	case Intrinsic::assume:
		653	case Intrinsic::sideeffect:
		654	case Intrinsic::pseudoprobe:
		655	case Intrinsic::arithmetic_fence:
		656	case Intrinsic::dbg_declare:
		657	case Intrinsic::dbg_value:
		658	case Intrinsic::dbg_label:
		659	case Intrinsic::invariant_start:
		660	case Intrinsic::invariant_end:
		661	case Intrinsic::launder_invariant_group:
		662	case Intrinsic::strip_invariant_group:
		663	case Intrinsic::is_constant:
		664	case Intrinsic::lifetime_start:
		665	case Intrinsic::lifetime_end:
		666	case Intrinsic::experimental_noalias_scope_decl:
		667	case Intrinsic::objectsize:
		668	case Intrinsic::ptr_annotation:
		669	case Intrinsic::var_annotation:
		670	case Intrinsic::experimental_gc_result:
		671	case Intrinsic::experimental_gc_relocate:
		672	case Intrinsic::coro_alloc:
		673	case Intrinsic::coro_begin:
		674	case Intrinsic::coro_free:
		675	case Intrinsic::coro_end:
		676	case Intrinsic::coro_frame:
		677	case Intrinsic::coro_size:
		678	case Intrinsic::coro_align:
		679	case Intrinsic::coro_suspend:
		680	case Intrinsic::coro_subfn_addr:
		681	case Intrinsic::threadlocal_address:
		682	// These intrinsics don't actually represent code after lowering.
		683	return 0;
		684	}
		685	return 1;
		686	}
		687
		688	InstructionCost getCallInstrCost(Function F, Type RetTy,
		689	ArrayRef<Type *> Tys,
		690	TTI::TargetCostKind CostKind) const {
		691	return 1;
		692	}
		693
		694	// Assume that we have a register of the right size for the type.
		695	unsigned getNumberOfParts(Type *Tp) const { return 1; }
		696
		697	InstructionCost getAddressComputationCost(Type Tp, ScalarEvolution ,
		698	const SCEV *) const {
		699	return 0;
		700	}
		701
		702	InstructionCost getArithmeticReductionCost(unsigned, VectorType *,
		703	std::optional<FastMathFlags> FMF,
		704	TTI::TargetCostKind) const {
		705	return 1;
		706	}
		707
		708	InstructionCost getMinMaxReductionCost(VectorType , VectorType , bool,
		709	TTI::TargetCostKind) const {
		710	return 1;
		711	}
		712
		713	InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
		714	Type ResTy, VectorType Ty,
		715	std::optional<FastMathFlags> FMF,
		716	TTI::TargetCostKind CostKind) const {
		717	return 1;
		718	}
		719
		720	InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
		721	VectorType *Ty,
		722	TTI::TargetCostKind CostKind) const {
		723	return 1;
		724	}
		725
		726	InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
		727	return 0;
		728	}
		729
		730	bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const {
		731	return false;
		732	}
		733
		734	unsigned getAtomicMemIntrinsicMaxElementSize() const {
		735	// Note for overrides: You must ensure for all element unordered-atomic
		736	// memory intrinsics that all power-of-2 element sizes up to, and
		737	// including, the return value of this method have a corresponding
		738	// runtime lib call. These runtime lib call definitions can be found
		739	// in RuntimeLibcalls.h
		740	return 0;
		741	}
		742
		743	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
		744	Type *ExpectedType) const {
		745	return nullptr;
		746	}
		747
		748	Type *
		749	getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
		750	unsigned SrcAddrSpace, unsigned DestAddrSpace,
		751	unsigned SrcAlign, unsigned DestAlign,
		752	std::optional<uint32_t> AtomicElementSize) const {
		753	return AtomicElementSize ? Type::getIntNTy(Context, AtomicElementSize 8)
		754	: Type::getInt8Ty(Context);
		755	}
		756
		757	void getMemcpyLoopResidualLoweringType(
		758	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
		759	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
		760	unsigned SrcAlign, unsigned DestAlign,
		761	std::optional<uint32_t> AtomicCpySize) const {
		762	unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
		763	Type OpType = Type::getIntNTy(Context, OpSizeInBytes 8);
		764	for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
		765	OpsOut.push_back(OpType);
		766	}
		767
		768	bool areInlineCompatible(const Function *Caller,
		769	const Function *Callee) const {
		770	return (Caller->getFnAttribute("target-cpu") ==
		771	Callee->getFnAttribute("target-cpu")) &&
		772	(Caller->getFnAttribute("target-features") ==
		773	Callee->getFnAttribute("target-features"));
		774	}
		775
		776	bool areTypesABICompatible(const Function Caller, const Function Callee,
		777	const ArrayRef<Type *> &Types) const {
		778	return (Caller->getFnAttribute("target-cpu") ==
		779	Callee->getFnAttribute("target-cpu")) &&
		780	(Caller->getFnAttribute("target-features") ==
		781	Callee->getFnAttribute("target-features"));
		782	}
		783
		784	bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
		785	const DataLayout &DL) const {
		786	return false;
		787	}
		788
		789	bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
		790	const DataLayout &DL) const {
		791	return false;
		792	}
		793
		794	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
		795
		796	bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
		797
		798	bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
		799
		800	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
		801	unsigned AddrSpace) const {
		802	return true;
		803	}
		804
		805	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
		806	unsigned AddrSpace) const {
		807	return true;
		808	}
		809
		810	bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
		811	ElementCount VF) const {
		812	return true;
		813	}
		814
		815	bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; }
		816
		817	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
		818	unsigned ChainSizeInBytes,
		819	VectorType *VecTy) const {
		820	return VF;
		821	}
		822
		823	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
		824	unsigned ChainSizeInBytes,
		825	VectorType *VecTy) const {
		826	return VF;
		827	}
		828
		829	bool preferInLoopReduction(unsigned Opcode, Type *Ty,
		830	TTI::ReductionFlags Flags) const {
		831	return false;
		832	}
		833
		834	bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
		835	TTI::ReductionFlags Flags) const {
		836	return false;
		837	}
		838
		839	bool preferEpilogueVectorization() const {
		840	return true;
		841	}
		842
		843	bool shouldExpandReduction(const IntrinsicInst *II) const { return true; }
		844
		845	unsigned getGISelRematGlobalCost() const { return 1; }
		846
		847	unsigned getMinTripCountTailFoldingThreshold() const { return 0; }
		848
		849	bool supportsScalableVectors() const { return false; }
		850
		851	bool enableScalableVectorization() const { return false; }
		852
		853	bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
		854	Align Alignment) const {
		855	return false;
		856	}
		857
		858	TargetTransformInfo::VPLegalization
		859	getVPLegalizationStrategy(const VPIntrinsic &PI) const {
		860	return TargetTransformInfo::VPLegalization(
		861	/* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard,
		862	/* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
		863	}
		864
		865	protected:
		866	// Obtain the minimum required size to hold the value (without the sign)
		867	// In case of a vector it returns the min required size for one element.
		868	unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
		869	if (isa<ConstantDataVector>(Val) \|\| isa<ConstantVector>(Val)) {
		870	const auto *VectorValue = cast<Constant>(Val);
		871
		872	// In case of a vector need to pick the max between the min
		873	// required size for each element
		874	auto *VT = cast<FixedVectorType>(Val->getType());
		875
		876	// Assume unsigned elements
		877	isSigned = false;
		878
		879	// The max required size is the size of the vector element type
		880	unsigned MaxRequiredSize =
		881	VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
		882
		883	unsigned MinRequiredSize = 0;
		884	for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
		885	if (auto *IntElement =
		886	dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
		887	bool signedElement = IntElement->getValue().isNegative();
		888	// Get the element min required size.
		889	unsigned ElementMinRequiredSize =
		890	IntElement->getValue().getMinSignedBits() - 1;
		891	// In case one element is signed then all the vector is signed.
		892	isSigned \|= signedElement;
		893	// Save the max required bit size between all the elements.
		894	MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
		895	} else {
		896	// not an int constant element
		897	return MaxRequiredSize;
		898	}
		899	}
		900	return MinRequiredSize;
		901	}
		902
		903	if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
		904	isSigned = CI->getValue().isNegative();
		905	return CI->getValue().getMinSignedBits() - 1;
		906	}
		907
		908	if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
		909	isSigned = true;
		910	return Cast->getSrcTy()->getScalarSizeInBits() - 1;
		911	}
		912
		913	if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
		914	isSigned = false;
		915	return Cast->getSrcTy()->getScalarSizeInBits();
		916	}
		917
		918	isSigned = false;
		919	return Val->getType()->getScalarSizeInBits();
		920	}
		921
		922	bool isStridedAccess(const SCEV *Ptr) const {
		923	return Ptr && isa<SCEVAddRecExpr>(Ptr);
		924	}
		925
		926	const SCEVConstant getConstantStrideStep(ScalarEvolution SE,
		927	const SCEV *Ptr) const {
		928	if (!isStridedAccess(Ptr))
		929	return nullptr;
		930	const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
		931	return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
		932	}
		933
		934	bool isConstantStridedAccessLessThan(ScalarEvolution SE, const SCEV Ptr,
		935	int64_t MergeDistance) const {
		936	const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
		937	if (!Step)
		938	return false;
		939	APInt StrideVal = Step->getAPInt();
		940	if (StrideVal.getBitWidth() > 64)
		941	return false;
		942	// FIXME: Need to take absolute value for negative stride case.
		943	return StrideVal.getSExtValue() < MergeDistance;
		944	}
		945	};
		946
		947	/// CRTP base class for use as a mix-in that aids implementing
		948	/// a TargetTransformInfo-compatible class.
		949	template <typename T>
		950	class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
		951	private:
		952	typedef TargetTransformInfoImplBase BaseT;
		953
		954	protected:
		955	explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
		956
		957	public:
		958	using BaseT::getGEPCost;
		959
		960	InstructionCost getGEPCost(Type PointeeType, const Value Ptr,
		961	ArrayRef<const Value *> Operands,
		962	TTI::TargetCostKind CostKind) {
		963	assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
		964	assert(cast<PointerType>(Ptr->getType()->getScalarType())
		965	->isOpaqueOrPointeeTypeMatches(PointeeType) &&
		966	"explicit pointee type doesn't match operand's pointee type");
		967	auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
		968	bool HasBaseReg = (BaseGV == nullptr);
		969
		970	auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
		971	APInt BaseOffset(PtrSizeBits, 0);
		972	int64_t Scale = 0;
		973
		974	auto GTI = gep_type_begin(PointeeType, Operands);
		975	Type *TargetType = nullptr;
		976
		977	// Handle the case where the GEP instruction has a single operand,
		978	// the basis, therefore TargetType is a nullptr.
		979	if (Operands.empty())
		980	return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
		981
		982	for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
		983	TargetType = GTI.getIndexedType();
		984	// We assume that the cost of Scalar GEP with constant index and the
		985	// cost of Vector GEP with splat constant index are the same.
		986	const ConstantInt ConstIdx = dyn_cast<ConstantInt>(I);
		987	if (!ConstIdx)
		988	if (auto Splat = getSplatValue(*I))
		989	ConstIdx = dyn_cast<ConstantInt>(Splat);
		990	if (StructType *STy = GTI.getStructTypeOrNull()) {
		991	// For structures the index is always splat or scalar constant
		992	assert(ConstIdx && "Unexpected GEP index");
		993	uint64_t Field = ConstIdx->getZExtValue();
		994	BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
		995	} else {
		996	// If this operand is a scalable type, bail out early.
		997	// TODO: handle scalable vectors
		998	if (isa<ScalableVectorType>(TargetType))
		999	return TTI::TCC_Basic;
		1000	int64_t ElementSize =
		1001	DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue();
		1002	if (ConstIdx) {
		1003	BaseOffset +=
		1004	ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
		1005	} else {
		1006	// Needs scale register.
		1007	if (Scale != 0)
		1008	// No addressing mode takes two scale registers.
		1009	return TTI::TCC_Basic;
		1010	Scale = ElementSize;
		1011	}
		1012	}
		1013	}
		1014
		1015	if (static_cast<T *>(this)->isLegalAddressingMode(
		1016	TargetType, const_cast<GlobalValue *>(BaseGV),
		1017	BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
		1018	Ptr->getType()->getPointerAddressSpace()))
		1019	return TTI::TCC_Free;
		1020	return TTI::TCC_Basic;
		1021	}
		1022
		1023	InstructionCost getInstructionCost(const User *U,
		1024	ArrayRef<const Value *> Operands,
		1025	TTI::TargetCostKind CostKind) {
		1026	using namespace llvm::PatternMatch;
		1027
		1028	auto TargetTTI = static_cast<T >(this);
		1029	// Handle non-intrinsic calls, invokes, and callbr.
		1030	// FIXME: Unlikely to be true for anything but CodeSize.
		1031	auto *CB = dyn_cast<CallBase>(U);
		1032	if (CB && !isa<IntrinsicInst>(U)) {
		1033	if (const Function *F = CB->getCalledFunction()) {
		1034	if (!TargetTTI->isLoweredToCall(F))
		1035	return TTI::TCC_Basic; // Give a basic cost if it will be lowered
		1036
		1037	return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
		1038	}
		1039	// For indirect or other calls, scale cost by number of arguments.
		1040	return TTI::TCC_Basic * (CB->arg_size() + 1);
		1041	}
		1042
		1043	Type *Ty = U->getType();
		1044	unsigned Opcode = Operator::getOpcode(U);
		1045	auto *I = dyn_cast<Instruction>(U);
		1046	switch (Opcode) {
		1047	default:
		1048	break;
		1049	case Instruction::Call: {
		1050	assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
		1051	auto *Intrinsic = cast<IntrinsicInst>(U);
		1052	IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
		1053	return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
		1054	}
		1055	case Instruction::Br:
		1056	case Instruction::Ret:
		1057	case Instruction::PHI:
		1058	case Instruction::Switch:
		1059	return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
		1060	case Instruction::ExtractValue:
		1061	case Instruction::Freeze:
		1062	return TTI::TCC_Free;
		1063	case Instruction::Alloca:
		1064	if (cast<AllocaInst>(U)->isStaticAlloca())
		1065	return TTI::TCC_Free;
		1066	break;
		1067	case Instruction::GetElementPtr: {
		1068	const auto *GEP = cast<GEPOperator>(U);
		1069	return TargetTTI->getGEPCost(GEP->getSourceElementType(),
		1070	GEP->getPointerOperand(),
		1071	Operands.drop_front(), CostKind);
		1072	}
		1073	case Instruction::Add:
		1074	case Instruction::FAdd:
		1075	case Instruction::Sub:
		1076	case Instruction::FSub:
		1077	case Instruction::Mul:
		1078	case Instruction::FMul:
		1079	case Instruction::UDiv:
		1080	case Instruction::SDiv:
		1081	case Instruction::FDiv:
		1082	case Instruction::URem:
		1083	case Instruction::SRem:
		1084	case Instruction::FRem:
		1085	case Instruction::Shl:
		1086	case Instruction::LShr:
		1087	case Instruction::AShr:
		1088	case Instruction::And:
		1089	case Instruction::Or:
		1090	case Instruction::Xor:
		1091	case Instruction::FNeg: {
		1092	const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(U->getOperand(0));
		1093	TTI::OperandValueInfo Op2Info;
		1094	if (Opcode != Instruction::FNeg)
		1095	Op2Info = TTI::getOperandInfo(U->getOperand(1));
		1096	SmallVector<const Value *, 2> Operands(U->operand_values());
		1097	return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
		1098	Op2Info, Operands, I);
		1099	}
		1100	case Instruction::IntToPtr:
		1101	case Instruction::PtrToInt:
		1102	case Instruction::SIToFP:
		1103	case Instruction::UIToFP:
		1104	case Instruction::FPToUI:
		1105	case Instruction::FPToSI:
		1106	case Instruction::Trunc:
		1107	case Instruction::FPTrunc:
		1108	case Instruction::BitCast:
		1109	case Instruction::FPExt:
		1110	case Instruction::SExt:
		1111	case Instruction::ZExt:
		1112	case Instruction::AddrSpaceCast: {
		1113	Type *OpTy = U->getOperand(0)->getType();
		1114	return TargetTTI->getCastInstrCost(
		1115	Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
		1116	}
		1117	case Instruction::Store: {
		1118	auto *SI = cast<StoreInst>(U);
		1119	Type *ValTy = U->getOperand(0)->getType();
		1120	TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(U->getOperand(0));
		1121	return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
		1122	SI->getPointerAddressSpace(), CostKind,
		1123	OpInfo, I);
		1124	}
		1125	case Instruction::Load: {
		1126	// FIXME: Arbitary cost which could come from the backend.
		1127	if (CostKind == TTI::TCK_Latency)
		1128	return 4;
		1129	auto *LI = cast<LoadInst>(U);
		1130	Type *LoadType = U->getType();
		1131	// If there is a non-register sized type, the cost estimation may expand
		1132	// it to be several instructions to load into multiple registers on the
		1133	// target. But, if the only use of the load is a trunc instruction to a
		1134	// register sized type, the instruction selector can combine these
		1135	// instructions to be a single load. So, in this case, we use the
		1136	// destination type of the trunc instruction rather than the load to
		1137	// accurately estimate the cost of this load instruction.
		1138	if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() &&
		1139	!LoadType->isVectorTy()) {
		1140	if (const TruncInst TI = dyn_cast<TruncInst>(LI->user_begin()))
		1141	LoadType = TI->getDestTy();
		1142	}
		1143	return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
		1144	LI->getPointerAddressSpace(), CostKind,
		1145	{TTI::OK_AnyValue, TTI::OP_None}, I);
		1146	}
		1147	case Instruction::Select: {
		1148	const Value Op0, Op1;
		1149	if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) \|\|
		1150	match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
		1151	// select x, y, false --> x & y
		1152	// select x, true, y --> x \| y
		1153	const auto Op1Info = TTI::getOperandInfo(Op0);
		1154	const auto Op2Info = TTI::getOperandInfo(Op1);
		1155	assert(Op0->getType()->getScalarSizeInBits() == 1 &&
		1156	Op1->getType()->getScalarSizeInBits() == 1);
		1157
		1158	SmallVector<const Value *, 2> Operands{Op0, Op1};
		1159	return TargetTTI->getArithmeticInstrCost(
		1160	match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
		1161	CostKind, Op1Info, Op2Info, Operands, I);
		1162	}
		1163	Type *CondTy = U->getOperand(0)->getType();
		1164	return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
		1165	CmpInst::BAD_ICMP_PREDICATE,
		1166	CostKind, I);
		1167	}
		1168	case Instruction::ICmp:
		1169	case Instruction::FCmp: {
		1170	Type *ValTy = U->getOperand(0)->getType();
		1171	// TODO: Also handle ICmp/FCmp constant expressions.
		1172	return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
		1173	I ? cast<CmpInst>(I)->getPredicate()
		1174	: CmpInst::BAD_ICMP_PREDICATE,
		1175	CostKind, I);
		1176	}
		1177	case Instruction::InsertElement: {
		1178	auto *IE = dyn_cast<InsertElementInst>(U);
		1179	if (!IE)
		1180	return TTI::TCC_Basic; // FIXME
		1181	unsigned Idx = -1;
		1182	if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)))
		1183	if (CI->getValue().getActiveBits() <= 32)
		1184	Idx = CI->getZExtValue();
		1185	return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx);
		1186	}
		1187	case Instruction::ShuffleVector: {
		1188	auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
		1189	if (!Shuffle)
		1190	return TTI::TCC_Basic; // FIXME
		1191
		1192	auto *VecTy = cast<VectorType>(U->getType());
		1193	auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType());
		1194	int NumSubElts, SubIndex;
		1195
		1196	if (Shuffle->changesLength()) {
		1197	// Treat a 'subvector widening' as a free shuffle.
		1198	if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
		1199	return 0;
		1200
		1201	if (Shuffle->isExtractSubvectorMask(SubIndex))
		1202	return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
		1203	Shuffle->getShuffleMask(), CostKind,
		1204	SubIndex, VecTy, Operands);
		1205
		1206	if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
		1207	return TargetTTI->getShuffleCost(
		1208	TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(),
		1209	CostKind, SubIndex,
		1210	FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
		1211	Operands);
		1212
		1213	int ReplicationFactor, VF;
		1214	if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
		1215	APInt DemandedDstElts =
		1216	APInt::getNullValue(Shuffle->getShuffleMask().size());
		1217	for (auto I : enumerate(Shuffle->getShuffleMask())) {
		1218	if (I.value() != UndefMaskElem)
		1219	DemandedDstElts.setBit(I.index());
		1220	}
		1221	return TargetTTI->getReplicationShuffleCost(
		1222	VecSrcTy->getElementType(), ReplicationFactor, VF,
		1223	DemandedDstElts, CostKind);
		1224	}
		1225
		1226	return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
		1227	}
		1228
		1229	if (Shuffle->isIdentity())
		1230	return 0;
		1231
		1232	if (Shuffle->isReverse())
		1233	return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy,
		1234	Shuffle->getShuffleMask(), CostKind, 0,
		1235	nullptr, Operands);
		1236
		1237	if (Shuffle->isSelect())
		1238	return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy,
		1239	Shuffle->getShuffleMask(), CostKind, 0,
		1240	nullptr, Operands);
		1241
		1242	if (Shuffle->isTranspose())
		1243	return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy,
		1244	Shuffle->getShuffleMask(), CostKind, 0,
		1245	nullptr, Operands);
		1246
		1247	if (Shuffle->isZeroEltSplat())
		1248	return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy,
		1249	Shuffle->getShuffleMask(), CostKind, 0,
		1250	nullptr, Operands);
		1251
		1252	if (Shuffle->isSingleSource())
		1253	return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
		1254	Shuffle->getShuffleMask(), CostKind, 0,
		1255	nullptr, Operands);
		1256
		1257	if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
		1258	return TargetTTI->getShuffleCost(
		1259	TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), CostKind,
		1260	SubIndex, FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
		1261	Operands);
		1262
		1263	if (Shuffle->isSplice(SubIndex))
		1264	return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy,
		1265	Shuffle->getShuffleMask(), CostKind,
		1266	SubIndex, nullptr, Operands);
		1267
		1268	return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy,
		1269	Shuffle->getShuffleMask(), CostKind, 0,
		1270	nullptr, Operands);
		1271	}
		1272	case Instruction::ExtractElement: {
		1273	auto *EEI = dyn_cast<ExtractElementInst>(U);
		1274	if (!EEI)
		1275	return TTI::TCC_Basic; // FIXME
		1276	unsigned Idx = -1;
		1277	if (auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1)))
		1278	if (CI->getValue().getActiveBits() <= 32)
		1279	Idx = CI->getZExtValue();
		1280	Type *DstTy = U->getOperand(0)->getType();
		1281	return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
		1282	}
		1283	}
		1284
		1285	// By default, just classify everything as 'basic' or -1 to represent that
		1286	// don't know the throughput cost.
		1287	return CostKind == TTI::TCK_RecipThroughput ? -1 : TTI::TCC_Basic;
		1288	}
		1289
		1290	bool isExpensiveToSpeculativelyExecute(const Instruction *I) {
		1291	auto TargetTTI = static_cast<T >(this);
		1292	SmallVector<const Value *, 4> Ops(I->operand_values());
		1293	InstructionCost Cost = TargetTTI->getInstructionCost(
		1294	I, Ops, TargetTransformInfo::TCK_SizeAndLatency);
		1295	return Cost >= TargetTransformInfo::TCC_Expensive;
		1296	}
		1297	};
		1298	} // namespace llvm
		1299
		1300	#endif

Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

QNX 8.QNX8 LLVM/Clang compiler suite//llvm-build/x86_64/include/llvm/Analysis/TargetTransformInfoImpl.h – Rev 14