WebSVN – QNX 8.QNX8 LLVM/Clang compiler suite – Blame – //llvm-build/x86_64/include/llvm/Analysis/TargetTransformInfo.h

Rev	Author	Line No.	Line
14	pmbaty	1	//===- TargetTransformInfo.h ------------------------------------- C++ --===//
		2	//
		3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
		4	// See https://llvm.org/LICENSE.txt for license information.
		5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
		6	//
		7	//===----------------------------------------------------------------------===//
		8	/// \file
		9	/// This pass exposes codegen information to IR-level passes. Every
		10	/// transformation that uses codegen information is broken into three parts:
		11	/// 1. The IR-level analysis pass.
		12	/// 2. The IR-level transformation interface which provides the needed
		13	/// information.
		14	/// 3. Codegen-level implementation which uses target-specific hooks.
		15	///
		16	/// This file defines #2, which is the interface that IR-level transformations
		17	/// use for querying the codegen.
		18	///
		19	//===----------------------------------------------------------------------===//
		20
		21	#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
		22	#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
		23
		24	#include "llvm/ADT/SmallBitVector.h"
		25	#include "llvm/IR/FMF.h"
		26	#include "llvm/IR/InstrTypes.h"
		27	#include "llvm/IR/PassManager.h"
		28	#include "llvm/Pass.h"
		29	#include "llvm/Support/AtomicOrdering.h"
		30	#include "llvm/Support/BranchProbability.h"
		31	#include "llvm/Support/InstructionCost.h"
		32	#include <functional>
		33	#include <optional>
		34	#include <utility>
		35
		36	namespace llvm {
		37
		38	namespace Intrinsic {
		39	typedef unsigned ID;
		40	}
		41
		42	class AssumptionCache;
		43	class BlockFrequencyInfo;
		44	class DominatorTree;
		45	class BranchInst;
		46	class CallBase;
		47	class Function;
		48	class GlobalValue;
		49	class InstCombiner;
		50	class OptimizationRemarkEmitter;
		51	class InterleavedAccessInfo;
		52	class IntrinsicInst;
		53	class LoadInst;
		54	class Loop;
		55	class LoopInfo;
		56	class LoopVectorizationLegality;
		57	class ProfileSummaryInfo;
		58	class RecurrenceDescriptor;
		59	class SCEV;
		60	class ScalarEvolution;
		61	class StoreInst;
		62	class SwitchInst;
		63	class TargetLibraryInfo;
		64	class Type;
		65	class User;
		66	class Value;
		67	class VPIntrinsic;
		68	struct KnownBits;
		69
		70	/// Information about a load/store intrinsic defined by the target.
		71	struct MemIntrinsicInfo {
		72	/// This is the pointer that the intrinsic is loading from or storing to.
		73	/// If this is non-null, then analysis/optimization passes can assume that
		74	/// this intrinsic is functionally equivalent to a load/store from this
		75	/// pointer.
		76	Value *PtrVal = nullptr;
		77
		78	// Ordering for atomic operations.
		79	AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
		80
		81	// Same Id is set by the target for corresponding load/store intrinsics.
		82	unsigned short MatchingId = 0;
		83
		84	bool ReadMem = false;
		85	bool WriteMem = false;
		86	bool IsVolatile = false;
		87
		88	bool isUnordered() const {
		89	return (Ordering == AtomicOrdering::NotAtomic \|\|
		90	Ordering == AtomicOrdering::Unordered) &&
		91	!IsVolatile;
		92	}
		93	};
		94
		95	/// Attributes of a target dependent hardware loop.
		96	struct HardwareLoopInfo {
		97	HardwareLoopInfo() = delete;
		98	HardwareLoopInfo(Loop *L) : L(L) {}
		99	Loop *L = nullptr;
		100	BasicBlock *ExitBlock = nullptr;
		101	BranchInst *ExitBranch = nullptr;
		102	const SCEV *ExitCount = nullptr;
		103	IntegerType *CountType = nullptr;
		104	Value *LoopDecrement = nullptr; // Decrement the loop counter by this
		105	// value in every iteration.
		106	bool IsNestingLegal = false; // Can a hardware loop be a parent to
		107	// another hardware loop?
		108	bool CounterInReg = false; // Should loop counter be updated in
		109	// the loop via a phi?
		110	bool PerformEntryTest = false; // Generate the intrinsic which also performs
		111	// icmp ne zero on the loop counter value and
		112	// produces an i1 to guard the loop entry.
		113	bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
		114	DominatorTree &DT, bool ForceNestedLoop = false,
		115	bool ForceHardwareLoopPHI = false);
		116	bool canAnalyze(LoopInfo &LI);
		117	};
		118
		119	class IntrinsicCostAttributes {
		120	const IntrinsicInst *II = nullptr;
		121	Type *RetTy = nullptr;
		122	Intrinsic::ID IID;
		123	SmallVector<Type *, 4> ParamTys;
		124	SmallVector<const Value *, 4> Arguments;
		125	FastMathFlags FMF;
		126	// If ScalarizationCost is UINT_MAX, the cost of scalarizing the
		127	// arguments and the return value will be computed based on types.
		128	InstructionCost ScalarizationCost = InstructionCost::getInvalid();
		129
		130	public:
		131	IntrinsicCostAttributes(
		132	Intrinsic::ID Id, const CallBase &CI,
		133	InstructionCost ScalarCost = InstructionCost::getInvalid(),
		134	bool TypeBasedOnly = false);
		135
		136	IntrinsicCostAttributes(
		137	Intrinsic::ID Id, Type RTy, ArrayRef<Type > Tys,
		138	FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
		139	InstructionCost ScalarCost = InstructionCost::getInvalid());
		140
		141	IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
		142	ArrayRef<const Value *> Args);
		143
		144	IntrinsicCostAttributes(
		145	Intrinsic::ID Id, Type RTy, ArrayRef<const Value > Args,
		146	ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags(),
		147	const IntrinsicInst *I = nullptr,
		148	InstructionCost ScalarCost = InstructionCost::getInvalid());
		149
		150	Intrinsic::ID getID() const { return IID; }
		151	const IntrinsicInst *getInst() const { return II; }
		152	Type *getReturnType() const { return RetTy; }
		153	FastMathFlags getFlags() const { return FMF; }
		154	InstructionCost getScalarizationCost() const { return ScalarizationCost; }
		155	const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
		156	const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
		157
		158	bool isTypeBasedOnly() const {
		159	return Arguments.empty();
		160	}
		161
		162	bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
		163	};
		164
		165	enum class PredicationStyle { None, Data, DataAndControlFlow };
		166
		167	class TargetTransformInfo;
		168	typedef TargetTransformInfo TTI;
		169
		170	/// This pass provides access to the codegen interfaces that are needed
		171	/// for IR-level transformations.
		172	class TargetTransformInfo {
		173	public:
		174	/// Construct a TTI object using a type implementing the \c Concept
		175	/// API below.
		176	///
		177	/// This is used by targets to construct a TTI wrapping their target-specific
		178	/// implementation that encodes appropriate costs for their target.
		179	template <typename T> TargetTransformInfo(T Impl);
		180
		181	/// Construct a baseline TTI object using a minimal implementation of
		182	/// the \c Concept API below.
		183	///
		184	/// The TTI implementation will reflect the information in the DataLayout
		185	/// provided if non-null.
		186	explicit TargetTransformInfo(const DataLayout &DL);
		187
		188	// Provide move semantics.
		189	TargetTransformInfo(TargetTransformInfo &&Arg);
		190	TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
		191
		192	// We need to define the destructor out-of-line to define our sub-classes
		193	// out-of-line.
		194	~TargetTransformInfo();
		195
		196	/// Handle the invalidation of this information.
		197	///
		198	/// When used as a result of \c TargetIRAnalysis this method will be called
		199	/// when the function this was computed for changes. When it returns false,
		200	/// the information is preserved across those changes.
		201	bool invalidate(Function &, const PreservedAnalyses &,
		202	FunctionAnalysisManager::Invalidator &) {
		203	// FIXME: We should probably in some way ensure that the subtarget
		204	// information for a function hasn't changed.
		205	return false;
		206	}
		207
		208	/// \name Generic Target Information
		209	/// @{
		210
		211	/// The kind of cost model.
		212	///
		213	/// There are several different cost models that can be customized by the
		214	/// target. The normalization of each cost model may be target specific.
		215	/// e.g. TCK_SizeAndLatency should be comparable to target thresholds such as
		216	/// those derived from MCSchedModel::LoopMicroOpBufferSize etc.
		217	enum TargetCostKind {
		218	TCK_RecipThroughput, ///< Reciprocal throughput.
		219	TCK_Latency, ///< The latency of instruction.
		220	TCK_CodeSize, ///< Instruction code size.
		221	TCK_SizeAndLatency ///< The weighted sum of size and latency.
		222	};
		223
		224	/// Underlying constants for 'cost' values in this interface.
		225	///
		226	/// Many APIs in this interface return a cost. This enum defines the
		227	/// fundamental values that should be used to interpret (and produce) those
		228	/// costs. The costs are returned as an int rather than a member of this
		229	/// enumeration because it is expected that the cost of one IR instruction
		230	/// may have a multiplicative factor to it or otherwise won't fit directly
		231	/// into the enum. Moreover, it is common to sum or average costs which works
		232	/// better as simple integral values. Thus this enum only provides constants.
		233	/// Also note that the returned costs are signed integers to make it natural
		234	/// to add, subtract, and test with zero (a common boundary condition). It is
		235	/// not expected that 2^32 is a realistic cost to be modeling at any point.
		236	///
		237	/// Note that these costs should usually reflect the intersection of code-size
		238	/// cost and execution cost. A free instruction is typically one that folds
		239	/// into another instruction. For example, reg-to-reg moves can often be
		240	/// skipped by renaming the registers in the CPU, but they still are encoded
		241	/// and thus wouldn't be considered 'free' here.
		242	enum TargetCostConstants {
		243	TCC_Free = 0, ///< Expected to fold away in lowering.
		244	TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
		245	TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
		246	};
		247
		248	/// Estimate the cost of a GEP operation when lowered.
		249	InstructionCost
		250	getGEPCost(Type PointeeType, const Value Ptr,
		251	ArrayRef<const Value *> Operands,
		252	TargetCostKind CostKind = TCK_SizeAndLatency) const;
		253
		254	/// \returns A value by which our inlining threshold should be multiplied.
		255	/// This is primarily used to bump up the inlining threshold wholesale on
		256	/// targets where calls are unusually expensive.
		257	///
		258	/// TODO: This is a rather blunt instrument. Perhaps altering the costs of
		259	/// individual classes of instructions would be better.
		260	unsigned getInliningThresholdMultiplier() const;
		261
		262	/// \returns A value to be added to the inlining threshold.
		263	unsigned adjustInliningThreshold(const CallBase *CB) const;
		264
		265	/// \returns Vector bonus in percent.
		266	///
		267	/// Vector bonuses: We want to more aggressively inline vector-dense kernels
		268	/// and apply this bonus based on the percentage of vector instructions. A
		269	/// bonus is applied if the vector instructions exceed 50% and half that
		270	/// amount is applied if it exceeds 10%. Note that these bonuses are some what
		271	/// arbitrary and evolved over time by accident as much as because they are
		272	/// principled bonuses.
		273	/// FIXME: It would be nice to base the bonus values on something more
		274	/// scientific. A target may has no bonus on vector instructions.
		275	int getInlinerVectorBonusPercent() const;
		276
		277	/// \return the expected cost of a memcpy, which could e.g. depend on the
		278	/// source/destination type and alignment and the number of bytes copied.
		279	InstructionCost getMemcpyCost(const Instruction *I) const;
		280
		281	/// \return The estimated number of case clusters when lowering \p 'SI'.
		282	/// \p JTSize Set a jump table size only when \p SI is suitable for a jump
		283	/// table.
		284	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
		285	unsigned &JTSize,
		286	ProfileSummaryInfo *PSI,
		287	BlockFrequencyInfo *BFI) const;
		288
		289	/// Estimate the cost of a given IR user when lowered.
		290	///
		291	/// This can estimate the cost of either a ConstantExpr or Instruction when
		292	/// lowered.
		293	///
		294	/// \p Operands is a list of operands which can be a result of transformations
		295	/// of the current operands. The number of the operands on the list must equal
		296	/// to the number of the current operands the IR user has. Their order on the
		297	/// list must be the same as the order of the current operands the IR user
		298	/// has.
		299	///
		300	/// The returned cost is defined in terms of \c TargetCostConstants, see its
		301	/// comments for a detailed explanation of the cost values.
		302	InstructionCost getInstructionCost(const User *U,
		303	ArrayRef<const Value *> Operands,
		304	TargetCostKind CostKind) const;
		305
		306	/// This is a helper function which calls the three-argument
		307	/// getInstructionCost with \p Operands which are the current operands U has.
		308	InstructionCost getInstructionCost(const User *U,
		309	TargetCostKind CostKind) const {
		310	SmallVector<const Value *, 4> Operands(U->operand_values());
		311	return getInstructionCost(U, Operands, CostKind);
		312	}
		313
		314	/// If a branch or a select condition is skewed in one direction by more than
		315	/// this factor, it is very likely to be predicted correctly.
		316	BranchProbability getPredictableBranchThreshold() const;
		317
		318	/// Return true if branch divergence exists.
		319	///
		320	/// Branch divergence has a significantly negative impact on GPU performance
		321	/// when threads in the same wavefront take different paths due to conditional
		322	/// branches.
		323	bool hasBranchDivergence() const;
		324
		325	/// Return true if the target prefers to use GPU divergence analysis to
		326	/// replace the legacy version.
		327	bool useGPUDivergenceAnalysis() const;
		328
		329	/// Returns whether V is a source of divergence.
		330	///
		331	/// This function provides the target-dependent information for
		332	/// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis
		333	/// first builds the dependency graph, and then runs the reachability
		334	/// algorithm starting with the sources of divergence.
		335	bool isSourceOfDivergence(const Value *V) const;
		336
		337	// Returns true for the target specific
		338	// set of operations which produce uniform result
		339	// even taking non-uniform arguments
		340	bool isAlwaysUniform(const Value *V) const;
		341
		342	/// Returns the address space ID for a target's 'flat' address space. Note
		343	/// this is not necessarily the same as addrspace(0), which LLVM sometimes
		344	/// refers to as the generic address space. The flat address space is a
		345	/// generic address space that can be used access multiple segments of memory
		346	/// with different address spaces. Access of a memory location through a
		347	/// pointer with this address space is expected to be legal but slower
		348	/// compared to the same memory location accessed through a pointer with a
		349	/// different address space.
		350	//
		351	/// This is for targets with different pointer representations which can
		352	/// be converted with the addrspacecast instruction. If a pointer is converted
		353	/// to this address space, optimizations should attempt to replace the access
		354	/// with the source address space.
		355	///
		356	/// \returns ~0u if the target does not have such a flat address space to
		357	/// optimize away.
		358	unsigned getFlatAddressSpace() const;
		359
		360	/// Return any intrinsic address operand indexes which may be rewritten if
		361	/// they use a flat address space pointer.
		362	///
		363	/// \returns true if the intrinsic was handled.
		364	bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
		365	Intrinsic::ID IID) const;
		366
		367	bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
		368
		369	/// Return true if globals in this address space can have initializers other
		370	/// than `undef`.
		371	bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const;
		372
		373	unsigned getAssumedAddrSpace(const Value *V) const;
		374
		375	bool isSingleThreaded() const;
		376
		377	std::pair<const Value *, unsigned>
		378	getPredicatedAddrSpace(const Value *V) const;
		379
		380	/// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
		381	/// NewV, which has a different address space. This should happen for every
		382	/// operand index that collectFlatAddressOperands returned for the intrinsic.
		383	/// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
		384	/// new value (which may be the original \p II with modified operands).
		385	Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II, Value *OldV,
		386	Value *NewV) const;
		387
		388	/// Test whether calls to a function lower to actual program function
		389	/// calls.
		390	///
		391	/// The idea is to test whether the program is likely to require a 'call'
		392	/// instruction or equivalent in order to call the given function.
		393	///
		394	/// FIXME: It's not clear that this is a good or useful query API. Client's
		395	/// should probably move to simpler cost metrics using the above.
		396	/// Alternatively, we could split the cost interface into distinct code-size
		397	/// and execution-speed costs. This would allow modelling the core of this
		398	/// query more accurately as a call is a single small instruction, but
		399	/// incurs significant execution cost.
		400	bool isLoweredToCall(const Function *F) const;
		401
		402	struct LSRCost {
		403	/// TODO: Some of these could be merged. Also, a lexical ordering
		404	/// isn't always optimal.
		405	unsigned Insns;
		406	unsigned NumRegs;
		407	unsigned AddRecCost;
		408	unsigned NumIVMuls;
		409	unsigned NumBaseAdds;
		410	unsigned ImmCost;
		411	unsigned SetupCost;
		412	unsigned ScaleCost;
		413	};
		414
		415	/// Parameters that control the generic loop unrolling transformation.
		416	struct UnrollingPreferences {
		417	/// The cost threshold for the unrolled loop. Should be relative to the
		418	/// getInstructionCost values returned by this API, and the expectation is
		419	/// that the unrolled loop's instructions when run through that interface
		420	/// should not exceed this cost. However, this is only an estimate. Also,
		421	/// specific loops may be unrolled even with a cost above this threshold if
		422	/// deemed profitable. Set this to UINT_MAX to disable the loop body cost
		423	/// restriction.
		424	unsigned Threshold;
		425	/// If complete unrolling will reduce the cost of the loop, we will boost
		426	/// the Threshold by a certain percent to allow more aggressive complete
		427	/// unrolling. This value provides the maximum boost percentage that we
		428	/// can apply to Threshold (The value should be no less than 100).
		429	/// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
		430	/// MaxPercentThresholdBoost / 100)
		431	/// E.g. if complete unrolling reduces the loop execution time by 50%
		432	/// then we boost the threshold by the factor of 2x. If unrolling is not
		433	/// expected to reduce the running time, then we do not increase the
		434	/// threshold.
		435	unsigned MaxPercentThresholdBoost;
		436	/// The cost threshold for the unrolled loop when optimizing for size (set
		437	/// to UINT_MAX to disable).
		438	unsigned OptSizeThreshold;
		439	/// The cost threshold for the unrolled loop, like Threshold, but used
		440	/// for partial/runtime unrolling (set to UINT_MAX to disable).
		441	unsigned PartialThreshold;
		442	/// The cost threshold for the unrolled loop when optimizing for size, like
		443	/// OptSizeThreshold, but used for partial/runtime unrolling (set to
		444	/// UINT_MAX to disable).
		445	unsigned PartialOptSizeThreshold;
		446	/// A forced unrolling factor (the number of concatenated bodies of the
		447	/// original loop in the unrolled loop body). When set to 0, the unrolling
		448	/// transformation will select an unrolling factor based on the current cost
		449	/// threshold and other factors.
		450	unsigned Count;
		451	/// Default unroll count for loops with run-time trip count.
		452	unsigned DefaultUnrollRuntimeCount;
		453	// Set the maximum unrolling factor. The unrolling factor may be selected
		454	// using the appropriate cost threshold, but may not exceed this number
		455	// (set to UINT_MAX to disable). This does not apply in cases where the
		456	// loop is being fully unrolled.
		457	unsigned MaxCount;
		458	/// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
		459	/// applies even if full unrolling is selected. This allows a target to fall
		460	/// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
		461	unsigned FullUnrollMaxCount;
		462	// Represents number of instructions optimized when "back edge"
		463	// becomes "fall through" in unrolled loop.
		464	// For now we count a conditional branch on a backedge and a comparison
		465	// feeding it.
		466	unsigned BEInsns;
		467	/// Allow partial unrolling (unrolling of loops to expand the size of the
		468	/// loop body, not only to eliminate small constant-trip-count loops).
		469	bool Partial;
		470	/// Allow runtime unrolling (unrolling of loops to expand the size of the
		471	/// loop body even when the number of loop iterations is not known at
		472	/// compile time).
		473	bool Runtime;
		474	/// Allow generation of a loop remainder (extra iterations after unroll).
		475	bool AllowRemainder;
		476	/// Allow emitting expensive instructions (such as divisions) when computing
		477	/// the trip count of a loop for runtime unrolling.
		478	bool AllowExpensiveTripCount;
		479	/// Apply loop unroll on any kind of loop
		480	/// (mainly to loops that fail runtime unrolling).
		481	bool Force;
		482	/// Allow using trip count upper bound to unroll loops.
		483	bool UpperBound;
		484	/// Allow unrolling of all the iterations of the runtime loop remainder.
		485	bool UnrollRemainder;
		486	/// Allow unroll and jam. Used to enable unroll and jam for the target.
		487	bool UnrollAndJam;
		488	/// Threshold for unroll and jam, for inner loop size. The 'Threshold'
		489	/// value above is used during unroll and jam for the outer loop size.
		490	/// This value is used in the same manner to limit the size of the inner
		491	/// loop.
		492	unsigned UnrollAndJamInnerLoopThreshold;
		493	/// Don't allow loop unrolling to simulate more than this number of
		494	/// iterations when checking full unroll profitability
		495	unsigned MaxIterationsCountToAnalyze;
		496	};
		497
		498	/// Get target-customized preferences for the generic loop unrolling
		499	/// transformation. The caller will initialize UP with the current
		500	/// target-independent defaults.
		501	void getUnrollingPreferences(Loop *L, ScalarEvolution &,
		502	UnrollingPreferences &UP,
		503	OptimizationRemarkEmitter *ORE) const;
		504
		505	/// Query the target whether it would be profitable to convert the given loop
		506	/// into a hardware loop.
		507	bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
		508	AssumptionCache &AC, TargetLibraryInfo *LibInfo,
		509	HardwareLoopInfo &HWLoopInfo) const;
		510
		511	/// Query the target whether it would be prefered to create a predicated
		512	/// vector loop, which can avoid the need to emit a scalar epilogue loop.
		513	bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
		514	AssumptionCache &AC, TargetLibraryInfo *TLI,
		515	DominatorTree *DT,
		516	LoopVectorizationLegality *LVL,
		517	InterleavedAccessInfo *IAI) const;
		518
		519	/// Query the target whether lowering of the llvm.get.active.lane.mask
		520	/// intrinsic is supported and how the mask should be used. A return value
		521	/// of PredicationStyle::Data indicates the mask is used as data only,
		522	/// whereas PredicationStyle::DataAndControlFlow indicates we should also use
		523	/// the mask for control flow in the loop. If unsupported the return value is
		524	/// PredicationStyle::None.
		525	PredicationStyle emitGetActiveLaneMask() const;
		526
		527	// Parameters that control the loop peeling transformation
		528	struct PeelingPreferences {
		529	/// A forced peeling factor (the number of bodied of the original loop
		530	/// that should be peeled off before the loop body). When set to 0, the
		531	/// a peeling factor based on profile information and other factors.
		532	unsigned PeelCount;
		533	/// Allow peeling off loop iterations.
		534	bool AllowPeeling;
		535	/// Allow peeling off loop iterations for loop nests.
		536	bool AllowLoopNestsPeeling;
		537	/// Allow peeling basing on profile. Uses to enable peeling off all
		538	/// iterations basing on provided profile.
		539	/// If the value is true the peeling cost model can decide to peel only
		540	/// some iterations and in this case it will set this to false.
		541	bool PeelProfiledIterations;
		542	};
		543
		544	/// Get target-customized preferences for the generic loop peeling
		545	/// transformation. The caller will initialize \p PP with the current
		546	/// target-independent defaults with information from \p L and \p SE.
		547	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
		548	PeelingPreferences &PP) const;
		549
		550	/// Targets can implement their own combinations for target-specific
		551	/// intrinsics. This function will be called from the InstCombine pass every
		552	/// time a target-specific intrinsic is encountered.
		553	///
		554	/// \returns std::nullopt to not do anything target specific or a value that
		555	/// will be returned from the InstCombiner. It is possible to return null and
		556	/// stop further processing of the intrinsic by returning nullptr.
		557	std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,
		558	IntrinsicInst & II) const;
		559	/// Can be used to implement target-specific instruction combining.
		560	/// \see instCombineIntrinsic
		561	std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
		562	InstCombiner & IC, IntrinsicInst & II, APInt DemandedMask,
		563	KnownBits & Known, bool &KnownBitsComputed) const;
		564	/// Can be used to implement target-specific instruction combining.
		565	/// \see instCombineIntrinsic
		566	std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
		567	InstCombiner & IC, IntrinsicInst & II, APInt DemandedElts,
		568	APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3,
		569	std::function<void(Instruction *, unsigned, APInt, APInt &)>
		570	SimplifyAndSetOp) const;
		571	/// @}
		572
		573	/// \name Scalar Target Information
		574	/// @{
		575
		576	/// Flags indicating the kind of support for population count.
		577	///
		578	/// Compared to the SW implementation, HW support is supposed to
		579	/// significantly boost the performance when the population is dense, and it
		580	/// may or may not degrade performance if the population is sparse. A HW
		581	/// support is considered as "Fast" if it can outperform, or is on a par
		582	/// with, SW implementation when the population is sparse; otherwise, it is
		583	/// considered as "Slow".
		584	enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
		585
		586	/// Return true if the specified immediate is legal add immediate, that
		587	/// is the target has add instructions which can add a register with the
		588	/// immediate without having to materialize the immediate into a register.
		589	bool isLegalAddImmediate(int64_t Imm) const;
		590
		591	/// Return true if the specified immediate is legal icmp immediate,
		592	/// that is the target has icmp instructions which can compare a register
		593	/// against the immediate without having to materialize the immediate into a
		594	/// register.
		595	bool isLegalICmpImmediate(int64_t Imm) const;
		596
		597	/// Return true if the addressing mode represented by AM is legal for
		598	/// this target, for a load/store of the specified type.
		599	/// The type may be VoidTy, in which case only return true if the addressing
		600	/// mode is legal for a load/store of any legal type.
		601	/// If target returns true in LSRWithInstrQueries(), I may be valid.
		602	/// TODO: Handle pre/postinc as well.
		603	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
		604	bool HasBaseReg, int64_t Scale,
		605	unsigned AddrSpace = 0,
		606	Instruction *I = nullptr) const;
		607
		608	/// Return true if LSR cost of C1 is lower than C2.
		609	bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
		610	const TargetTransformInfo::LSRCost &C2) const;
		611
		612	/// Return true if LSR major cost is number of registers. Targets which
		613	/// implement their own isLSRCostLess and unset number of registers as major
		614	/// cost should return false, otherwise return true.
		615	bool isNumRegsMajorCostOfLSR() const;
		616
		617	/// \returns true if LSR should not optimize a chain that includes \p I.
		618	bool isProfitableLSRChainElement(Instruction *I) const;
		619
		620	/// Return true if the target can fuse a compare and branch.
		621	/// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
		622	/// calculation for the instructions in a loop.
		623	bool canMacroFuseCmp() const;
		624
		625	/// Return true if the target can save a compare for loop count, for example
		626	/// hardware loop saves a compare.
		627	bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE, LoopInfo *LI,
		628	DominatorTree DT, AssumptionCache AC,
		629	TargetLibraryInfo *LibInfo) const;
		630
		631	enum AddressingModeKind {
		632	AMK_PreIndexed,
		633	AMK_PostIndexed,
		634	AMK_None
		635	};
		636
		637	/// Return the preferred addressing mode LSR should make efforts to generate.
		638	AddressingModeKind getPreferredAddressingMode(const Loop *L,
		639	ScalarEvolution *SE) const;
		640
		641	/// Return true if the target supports masked store.
		642	bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
		643	/// Return true if the target supports masked load.
		644	bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
		645
		646	/// Return true if the target supports nontemporal store.
		647	bool isLegalNTStore(Type *DataType, Align Alignment) const;
		648	/// Return true if the target supports nontemporal load.
		649	bool isLegalNTLoad(Type *DataType, Align Alignment) const;
		650
		651	/// \Returns true if the target supports broadcasting a load to a vector of
		652	/// type <NumElements x ElementTy>.
		653	bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;
		654
		655	/// Return true if the target supports masked scatter.
		656	bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
		657	/// Return true if the target supports masked gather.
		658	bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
		659	/// Return true if the target forces scalarizing of llvm.masked.gather
		660	/// intrinsics.
		661	bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const;
		662	/// Return true if the target forces scalarizing of llvm.masked.scatter
		663	/// intrinsics.
		664	bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const;
		665
		666	/// Return true if the target supports masked compress store.
		667	bool isLegalMaskedCompressStore(Type *DataType) const;
		668	/// Return true if the target supports masked expand load.
		669	bool isLegalMaskedExpandLoad(Type *DataType) const;
		670
		671	/// Return true if this is an alternating opcode pattern that can be lowered
		672	/// to a single instruction on the target. In X86 this is for the addsub
		673	/// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
		674	/// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
		675	/// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
		676	/// when \p Opcode0 is selected and `1` when Opcode1 is selected.
		677	/// \p VecTy is the vector type of the instruction to be generated.
		678	bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
		679	const SmallBitVector &OpcodeMask) const;
		680
		681	/// Return true if we should be enabling ordered reductions for the target.
		682	bool enableOrderedReductions() const;
		683
		684	/// Return true if the target has a unified operation to calculate division
		685	/// and remainder. If so, the additional implicit multiplication and
		686	/// subtraction required to calculate a remainder from division are free. This
		687	/// can enable more aggressive transformations for division and remainder than
		688	/// would typically be allowed using throughput or size cost models.
		689	bool hasDivRemOp(Type *DataType, bool IsSigned) const;
		690
		691	/// Return true if the given instruction (assumed to be a memory access
		692	/// instruction) has a volatile variant. If that's the case then we can avoid
		693	/// addrspacecast to generic AS for volatile loads/stores. Default
		694	/// implementation returns false, which prevents address space inference for
		695	/// volatile loads/stores.
		696	bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
		697
		698	/// Return true if target doesn't mind addresses in vectors.
		699	bool prefersVectorizedAddressing() const;
		700
		701	/// Return the cost of the scaling factor used in the addressing
		702	/// mode represented by AM for this target, for a load/store
		703	/// of the specified type.
		704	/// If the AM is supported, the return value must be >= 0.
		705	/// If the AM is not supported, it returns a negative value.
		706	/// TODO: Handle pre/postinc as well.
		707	InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
		708	int64_t BaseOffset, bool HasBaseReg,
		709	int64_t Scale,
		710	unsigned AddrSpace = 0) const;
		711
		712	/// Return true if the loop strength reduce pass should make
		713	/// Instruction* based TTI queries to isLegalAddressingMode(). This is
		714	/// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
		715	/// immediate offset and no index register.
		716	bool LSRWithInstrQueries() const;
		717
		718	/// Return true if it's free to truncate a value of type Ty1 to type
		719	/// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
		720	/// by referencing its sub-register AX.
		721	bool isTruncateFree(Type Ty1, Type Ty2) const;
		722
		723	/// Return true if it is profitable to hoist instruction in the
		724	/// then/else to before if.
		725	bool isProfitableToHoist(Instruction *I) const;
		726
		727	bool useAA() const;
		728
		729	/// Return true if this type is legal.
		730	bool isTypeLegal(Type *Ty) const;
		731
		732	/// Returns the estimated number of registers required to represent \p Ty.
		733	unsigned getRegUsageForType(Type *Ty) const;
		734
		735	/// Return true if switches should be turned into lookup tables for the
		736	/// target.
		737	bool shouldBuildLookupTables() const;
		738
		739	/// Return true if switches should be turned into lookup tables
		740	/// containing this constant value for the target.
		741	bool shouldBuildLookupTablesForConstant(Constant *C) const;
		742
		743	/// Return true if lookup tables should be turned into relative lookup tables.
		744	bool shouldBuildRelLookupTables() const;
		745
		746	/// Return true if the input function which is cold at all call sites,
		747	/// should use coldcc calling convention.
		748	bool useColdCCForColdCall(Function &F) const;
		749
		750	/// Estimate the overhead of scalarizing an instruction. Insert and Extract
		751	/// are set if the demanded result elements need to be inserted and/or
		752	/// extracted from vectors.
		753	InstructionCost getScalarizationOverhead(VectorType *Ty,
		754	const APInt &DemandedElts,
		755	bool Insert, bool Extract,
		756	TTI::TargetCostKind CostKind) const;
		757
		758	/// Estimate the overhead of scalarizing an instructions unique
		759	/// non-constant operands. The (potentially vector) types to use for each of
		760	/// argument are passes via Tys.
		761	InstructionCost
		762	getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
		763	ArrayRef<Type *> Tys,
		764	TTI::TargetCostKind CostKind) const;
		765
		766	/// If target has efficient vector element load/store instructions, it can
		767	/// return true here so that insertion/extraction costs are not added to
		768	/// the scalarization cost of a load/store.
		769	bool supportsEfficientVectorElementLoadStore() const;
		770
		771	/// If the target supports tail calls.
		772	bool supportsTailCalls() const;
		773
		774	/// If target supports tail call on \p CB
		775	bool supportsTailCallFor(const CallBase *CB) const;
		776
		777	/// Don't restrict interleaved unrolling to small loops.
		778	bool enableAggressiveInterleaving(bool LoopHasReductions) const;
		779
		780	/// Returns options for expansion of memcmp. IsZeroCmp is
		781	// true if this is the expansion of memcmp(p1, p2, s) == 0.
		782	struct MemCmpExpansionOptions {
		783	// Return true if memcmp expansion is enabled.
		784	operator bool() const { return MaxNumLoads > 0; }
		785
		786	// Maximum number of load operations.
		787	unsigned MaxNumLoads = 0;
		788
		789	// The list of available load sizes (in bytes), sorted in decreasing order.
		790	SmallVector<unsigned, 8> LoadSizes;
		791
		792	// For memcmp expansion when the memcmp result is only compared equal or
		793	// not-equal to 0, allow up to this number of load pairs per block. As an
		794	// example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
		795	// a0 = load2bytes &a[0]
		796	// b0 = load2bytes &b[0]
		797	// a2 = load1byte &a[2]
		798	// b2 = load1byte &b[2]
		799	// r = cmp eq (a0 ^ b0 \| a2 ^ b2), 0
		800	unsigned NumLoadsPerBlock = 1;
		801
		802	// Set to true to allow overlapping loads. For example, 7-byte compares can
		803	// be done with two 4-byte compares instead of 4+2+1-byte compares. This
		804	// requires all loads in LoadSizes to be doable in an unaligned way.
		805	bool AllowOverlappingLoads = false;
		806	};
		807	MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
		808	bool IsZeroCmp) const;
		809
		810	/// Should the Select Optimization pass be enabled and ran.
		811	bool enableSelectOptimize() const;
		812
		813	/// Enable matching of interleaved access groups.
		814	bool enableInterleavedAccessVectorization() const;
		815
		816	/// Enable matching of interleaved access groups that contain predicated
		817	/// accesses or gaps and therefore vectorized using masked
		818	/// vector loads/stores.
		819	bool enableMaskedInterleavedAccessVectorization() const;
		820
		821	/// Indicate that it is potentially unsafe to automatically vectorize
		822	/// floating-point operations because the semantics of vector and scalar
		823	/// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
		824	/// does not support IEEE-754 denormal numbers, while depending on the
		825	/// platform, scalar floating-point math does.
		826	/// This applies to floating-point math operations and calls, not memory
		827	/// operations, shuffles, or casts.
		828	bool isFPVectorizationPotentiallyUnsafe() const;
		829
		830	/// Determine if the target supports unaligned memory accesses.
		831	bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
		832	unsigned AddressSpace = 0,
		833	Align Alignment = Align(1),
		834	unsigned *Fast = nullptr) const;
		835
		836	/// Return hardware support for population count.
		837	PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
		838
		839	/// Return true if the hardware has a fast square-root instruction.
		840	bool haveFastSqrt(Type *Ty) const;
		841
		842	/// Return true if the cost of the instruction is too high to speculatively
		843	/// execute and should be kept behind a branch.
		844	/// This normally just wraps around a getInstructionCost() call, but some
		845	/// targets might report a low TCK_SizeAndLatency value that is incompatible
		846	/// with the fixed TCC_Expensive value.
		847	/// NOTE: This assumes the instruction passes isSafeToSpeculativelyExecute().
		848	bool isExpensiveToSpeculativelyExecute(const Instruction *I) const;
		849
		850	/// Return true if it is faster to check if a floating-point value is NaN
		851	/// (or not-NaN) versus a comparison against a constant FP zero value.
		852	/// Targets should override this if materializing a 0.0 for comparison is
		853	/// generally as cheap as checking for ordered/unordered.
		854	bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
		855
		856	/// Return the expected cost of supporting the floating point operation
		857	/// of the specified type.
		858	InstructionCost getFPOpCost(Type *Ty) const;
		859
		860	/// Return the expected cost of materializing for the given integer
		861	/// immediate of the specified type.
		862	InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
		863	TargetCostKind CostKind) const;
		864
		865	/// Return the expected cost of materialization for the given integer
		866	/// immediate of the specified type for a given instruction. The cost can be
		867	/// zero if the immediate can be folded into the specified instruction.
		868	InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
		869	const APInt &Imm, Type *Ty,
		870	TargetCostKind CostKind,
		871	Instruction *Inst = nullptr) const;
		872	InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
		873	const APInt &Imm, Type *Ty,
		874	TargetCostKind CostKind) const;
		875
		876	/// Return the expected cost for the given integer when optimising
		877	/// for size. This is different than the other integer immediate cost
		878	/// functions in that it is subtarget agnostic. This is useful when you e.g.
		879	/// target one ISA such as Aarch32 but smaller encodings could be possible
		880	/// with another such as Thumb. This return value is used as a penalty when
		881	/// the total costs for a constant is calculated (the bigger the cost, the
		882	/// more beneficial constant hoisting is).
		883	InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
		884	const APInt &Imm, Type *Ty) const;
		885	/// @}
		886
		887	/// \name Vector Target Information
		888	/// @{
		889
		890	/// The various kinds of shuffle patterns for vector queries.
		891	enum ShuffleKind {
		892	SK_Broadcast, ///< Broadcast element 0 to all other elements.
		893	SK_Reverse, ///< Reverse the order of the vector.
		894	SK_Select, ///< Selects elements from the corresponding lane of
		895	///< either source operand. This is equivalent to a
		896	///< vector select with a constant condition operand.
		897	SK_Transpose, ///< Transpose two vectors.
		898	SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
		899	SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
		900	SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
		901	///< with any shuffle mask.
		902	SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
		903	///< shuffle mask.
		904	SK_Splice ///< Concatenates elements from the first input vector
		905	///< with elements of the second input vector. Returning
		906	///< a vector of the same type as the input vectors.
		907	///< Index indicates start offset in first input vector.
		908	};
		909
		910	/// Additional information about an operand's possible values.
		911	enum OperandValueKind {
		912	OK_AnyValue, // Operand can have any value.
		913	OK_UniformValue, // Operand is uniform (splat of a value).
		914	OK_UniformConstantValue, // Operand is uniform constant.
		915	OK_NonUniformConstantValue // Operand is a non uniform constant value.
		916	};
		917
		918	/// Additional properties of an operand's values.
		919	enum OperandValueProperties {
		920	OP_None = 0,
		921	OP_PowerOf2 = 1,
		922	OP_NegatedPowerOf2 = 2,
		923	};
		924
		925	// Describe the values an operand can take. We're in the process
		926	// of migrating uses of OperandValueKind and OperandValueProperties
		927	// to use this class, and then will change the internal representation.
		928	struct OperandValueInfo {
		929	OperandValueKind Kind = OK_AnyValue;
		930	OperandValueProperties Properties = OP_None;
		931
		932	bool isConstant() const {
		933	return Kind == OK_UniformConstantValue \|\| Kind == OK_NonUniformConstantValue;
		934	}
		935	bool isUniform() const {
		936	return Kind == OK_UniformConstantValue \|\| Kind == OK_UniformValue;
		937	}
		938	bool isPowerOf2() const {
		939	return Properties == OP_PowerOf2;
		940	}
		941	bool isNegatedPowerOf2() const {
		942	return Properties == OP_NegatedPowerOf2;
		943	}
		944
		945	OperandValueInfo getNoProps() const {
		946	return {Kind, OP_None};
		947	}
		948	};
		949
		950	/// \return the number of registers in the target-provided register class.
		951	unsigned getNumberOfRegisters(unsigned ClassID) const;
		952
		953	/// \return the target-provided register class ID for the provided type,
		954	/// accounting for type promotion and other type-legalization techniques that
		955	/// the target might apply. However, it specifically does not account for the
		956	/// scalarization or splitting of vector types. Should a vector type require
		957	/// scalarization or splitting into multiple underlying vector registers, that
		958	/// type should be mapped to a register class containing no registers.
		959	/// Specifically, this is designed to provide a simple, high-level view of the
		960	/// register allocation later performed by the backend. These register classes
		961	/// don't necessarily map onto the register classes used by the backend.
		962	/// FIXME: It's not currently possible to determine how many registers
		963	/// are used by the provided type.
		964	unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
		965
		966	/// \return the target-provided register class name
		967	const char *getRegisterClassName(unsigned ClassID) const;
		968
		969	enum RegisterKind { RGK_Scalar, RGK_FixedWidthVector, RGK_ScalableVector };
		970
		971	/// \return The width of the largest scalar or vector register type.
		972	TypeSize getRegisterBitWidth(RegisterKind K) const;
		973
		974	/// \return The width of the smallest vector register type.
		975	unsigned getMinVectorRegisterBitWidth() const;
		976
		977	/// \return The maximum value of vscale if the target specifies an
		978	/// architectural maximum vector length, and std::nullopt otherwise.
		979	std::optional<unsigned> getMaxVScale() const;
		980
		981	/// \return the value of vscale to tune the cost model for.
		982	std::optional<unsigned> getVScaleForTuning() const;
		983
		984	/// \return True if the vectorization factor should be chosen to
		985	/// make the vector of the smallest element type match the size of a
		986	/// vector register. For wider element types, this could result in
		987	/// creating vectors that span multiple vector registers.
		988	/// If false, the vectorization factor will be chosen based on the
		989	/// size of the widest element type.
		990	/// \p K Register Kind for vectorization.
		991	bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const;
		992
		993	/// \return The minimum vectorization factor for types of given element
		994	/// bit width, or 0 if there is no minimum VF. The returned value only
		995	/// applies when shouldMaximizeVectorBandwidth returns true.
		996	/// If IsScalable is true, the returned ElementCount must be a scalable VF.
		997	ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
		998
		999	/// \return The maximum vectorization factor for types of given element
		1000	/// bit width and opcode, or 0 if there is no maximum VF.
		1001	/// Currently only used by the SLP vectorizer.
		1002	unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
		1003
		1004	/// \return The minimum vectorization factor for the store instruction. Given
		1005	/// the initial estimation of the minimum vector factor and store value type,
		1006	/// it tries to find possible lowest VF, which still might be profitable for
		1007	/// the vectorization.
		1008	/// \param VF Initial estimation of the minimum vector factor.
		1009	/// \param ScalarMemTy Scalar memory type of the store operation.
		1010	/// \param ScalarValTy Scalar type of the stored value.
		1011	/// Currently only used by the SLP vectorizer.
		1012	unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
		1013	Type *ScalarValTy) const;
		1014
		1015	/// \return True if it should be considered for address type promotion.
		1016	/// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
		1017	/// profitable without finding other extensions fed by the same input.
		1018	bool shouldConsiderAddressTypePromotion(
		1019	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
		1020
		1021	/// \return The size of a cache line in bytes.
		1022	unsigned getCacheLineSize() const;
		1023
		1024	/// The possible cache levels
		1025	enum class CacheLevel {
		1026	L1D, // The L1 data cache
		1027	L2D, // The L2 data cache
		1028
		1029	// We currently do not model L3 caches, as their sizes differ widely between
		1030	// microarchitectures. Also, we currently do not have a use for L3 cache
		1031	// size modeling yet.
		1032	};
		1033
		1034	/// \return The size of the cache level in bytes, if available.
		1035	std::optional<unsigned> getCacheSize(CacheLevel Level) const;
		1036
		1037	/// \return The associativity of the cache level, if available.
		1038	std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
		1039
		1040	/// \return How much before a load we should place the prefetch
		1041	/// instruction. This is currently measured in number of
		1042	/// instructions.
		1043	unsigned getPrefetchDistance() const;
		1044
		1045	/// Some HW prefetchers can handle accesses up to a certain constant stride.
		1046	/// Sometimes prefetching is beneficial even below the HW prefetcher limit,
		1047	/// and the arguments provided are meant to serve as a basis for deciding this
		1048	/// for a particular loop.
		1049	///
		1050	/// \param NumMemAccesses Number of memory accesses in the loop.
		1051	/// \param NumStridedMemAccesses Number of the memory accesses that
		1052	/// ScalarEvolution could find a known stride
		1053	/// for.
		1054	/// \param NumPrefetches Number of software prefetches that will be
		1055	/// emitted as determined by the addresses
		1056	/// involved and the cache line size.
		1057	/// \param HasCall True if the loop contains a call.
		1058	///
		1059	/// \return This is the minimum stride in bytes where it makes sense to start
		1060	/// adding SW prefetches. The default is 1, i.e. prefetch with any
		1061	/// stride.
		1062	unsigned getMinPrefetchStride(unsigned NumMemAccesses,
		1063	unsigned NumStridedMemAccesses,
		1064	unsigned NumPrefetches, bool HasCall) const;
		1065
		1066	/// \return The maximum number of iterations to prefetch ahead. If
		1067	/// the required number of iterations is more than this number, no
		1068	/// prefetching is performed.
		1069	unsigned getMaxPrefetchIterationsAhead() const;
		1070
		1071	/// \return True if prefetching should also be done for writes.
		1072	bool enableWritePrefetching() const;
		1073
		1074	/// \return if target want to issue a prefetch in address space \p AS.
		1075	bool shouldPrefetchAddressSpace(unsigned AS) const;
		1076
		1077	/// \return The maximum interleave factor that any transform should try to
		1078	/// perform for this target. This number depends on the level of parallelism
		1079	/// and the number of execution units in the CPU.
		1080	unsigned getMaxInterleaveFactor(unsigned VF) const;
		1081
		1082	/// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
		1083	static OperandValueInfo getOperandInfo(const Value *V);
		1084
		1085	/// This is an approximation of reciprocal throughput of a math/logic op.
		1086	/// A higher cost indicates less expected throughput.
		1087	/// From Agner Fog's guides, reciprocal throughput is "the average number of
		1088	/// clock cycles per instruction when the instructions are not part of a
		1089	/// limiting dependency chain."
		1090	/// Therefore, costs should be scaled to account for multiple execution units
		1091	/// on the target that can process this type of instruction. For example, if
		1092	/// there are 5 scalar integer units and 2 vector integer units that can
		1093	/// calculate an 'add' in a single cycle, this model should indicate that the
		1094	/// cost of the vector add instruction is 2.5 times the cost of the scalar
		1095	/// add instruction.
		1096	/// \p Args is an optional argument which holds the instruction operands
		1097	/// values so the TTI can analyze those values searching for special
		1098	/// cases or optimizations based on those values.
		1099	/// \p CxtI is the optional original context instruction, if one exists, to
		1100	/// provide even more information.
		1101	InstructionCost getArithmeticInstrCost(
		1102	unsigned Opcode, Type *Ty,
		1103	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
		1104	TTI::OperandValueInfo Opd1Info = {TTI::OK_AnyValue, TTI::OP_None},
		1105	TTI::OperandValueInfo Opd2Info = {TTI::OK_AnyValue, TTI::OP_None},
		1106	ArrayRef<const Value > Args = ArrayRef<const Value >(),
		1107	const Instruction *CxtI = nullptr) const;
		1108
		1109	/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
		1110	/// The exact mask may be passed as Mask, or else the array will be empty.
		1111	/// The index and subtype parameters are used by the subvector insertion and
		1112	/// extraction shuffle kinds to show the insert/extract point and the type of
		1113	/// the subvector being inserted/extracted. The operands of the shuffle can be
		1114	/// passed through \p Args, which helps improve the cost estimation in some
		1115	/// cases, like in broadcast loads.
		1116	/// NOTE: For subvector extractions Tp represents the source type.
		1117	InstructionCost
		1118	getShuffleCost(ShuffleKind Kind, VectorType *Tp,
		1119	ArrayRef<int> Mask = std::nullopt,
		1120	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
		1121	int Index = 0, VectorType *SubTp = nullptr,
		1122	ArrayRef<const Value *> Args = std::nullopt) const;
		1123
		1124	/// Represents a hint about the context in which a cast is used.
		1125	///
		1126	/// For zext/sext, the context of the cast is the operand, which must be a
		1127	/// load of some kind. For trunc, the context is of the cast is the single
		1128	/// user of the instruction, which must be a store of some kind.
		1129	///
		1130	/// This enum allows the vectorizer to give getCastInstrCost an idea of the
		1131	/// type of cast it's dealing with, as not every cast is equal. For instance,
		1132	/// the zext of a load may be free, but the zext of an interleaving load can
		1133	//// be (very) expensive!
		1134	///
		1135	/// See \c getCastContextHint to compute a CastContextHint from a cast
		1136	/// Instruction*. Callers can use it if they don't need to override the
		1137	/// context and just want it to be calculated from the instruction.
		1138	///
		1139	/// FIXME: This handles the types of load/store that the vectorizer can
		1140	/// produce, which are the cases where the context instruction is most
		1141	/// likely to be incorrect. There are other situations where that can happen
		1142	/// too, which might be handled here but in the long run a more general
		1143	/// solution of costing multiple instructions at the same times may be better.
		1144	enum class CastContextHint : uint8_t {
		1145	None, ///< The cast is not used with a load/store of any kind.
		1146	Normal, ///< The cast is used with a normal load/store.
		1147	Masked, ///< The cast is used with a masked load/store.
		1148	GatherScatter, ///< The cast is used with a gather/scatter.
		1149	Interleave, ///< The cast is used with an interleaved load/store.
		1150	Reversed, ///< The cast is used with a reversed load/store.
		1151	};
		1152
		1153	/// Calculates a CastContextHint from \p I.
		1154	/// This should be used by callers of getCastInstrCost if they wish to
		1155	/// determine the context from some instruction.
		1156	/// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
		1157	/// or if it's another type of cast.
		1158	static CastContextHint getCastContextHint(const Instruction *I);
		1159
		1160	/// \return The expected cost of cast instructions, such as bitcast, trunc,
		1161	/// zext, etc. If there is an existing instruction that holds Opcode, it
		1162	/// may be passed in the 'I' parameter.
		1163	InstructionCost
		1164	getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
		1165	TTI::CastContextHint CCH,
		1166	TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
		1167	const Instruction *I = nullptr) const;
		1168
		1169	/// \return The expected cost of a sign- or zero-extended vector extract. Use
		1170	/// Index = -1 to indicate that there is no information about the index value.
		1171	InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
		1172	VectorType *VecTy,
		1173	unsigned Index) const;
		1174
		1175	/// \return The expected cost of control-flow related instructions such as
		1176	/// Phi, Ret, Br, Switch.
		1177	InstructionCost
		1178	getCFInstrCost(unsigned Opcode,
		1179	TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
		1180	const Instruction *I = nullptr) const;
		1181
		1182	/// \returns The expected cost of compare and select instructions. If there
		1183	/// is an existing instruction that holds Opcode, it may be passed in the
		1184	/// 'I' parameter. The \p VecPred parameter can be used to indicate the select
		1185	/// is using a compare with the specified predicate as condition. When vector
		1186	/// types are passed, \p VecPred must be used for all lanes.
		1187	InstructionCost
		1188	getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy,
		1189	CmpInst::Predicate VecPred,
		1190	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
		1191	const Instruction *I = nullptr) const;
		1192
		1193	/// \return The expected cost of vector Insert and Extract.
		1194	/// Use -1 to indicate that there is no information on the index value.
		1195	/// This is used when the instruction is not available; a typical use
		1196	/// case is to provision the cost of vectorization/scalarization in
		1197	/// vectorizer passes.
		1198	InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
		1199	TTI::TargetCostKind CostKind,
		1200	unsigned Index = -1, Value *Op0 = nullptr,
		1201	Value *Op1 = nullptr) const;
		1202
		1203	/// \return The expected cost of vector Insert and Extract.
		1204	/// This is used when instruction is available, and implementation
		1205	/// asserts 'I' is not nullptr.
		1206	///
		1207	/// A typical suitable use case is cost estimation when vector instruction
		1208	/// exists (e.g., from basic blocks during transformation).
		1209	InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
		1210	TTI::TargetCostKind CostKind,
		1211	unsigned Index = -1) const;
		1212
		1213	/// \return The cost of replication shuffle of \p VF elements typed \p EltTy
		1214	/// \p ReplicationFactor times.
		1215	///
		1216	/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
		1217	/// <0,0,0,1,1,1,2,2,2,3,3,3>
		1218	InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
		1219	int VF,
		1220	const APInt &DemandedDstElts,
		1221	TTI::TargetCostKind CostKind);
		1222
		1223	/// \return The cost of Load and Store instructions.
		1224	InstructionCost
		1225	getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
		1226	unsigned AddressSpace,
		1227	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
		1228	OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
		1229	const Instruction *I = nullptr) const;
		1230
		1231	/// \return The cost of VP Load and Store instructions.
		1232	InstructionCost
		1233	getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
		1234	unsigned AddressSpace,
		1235	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
		1236	const Instruction *I = nullptr) const;
		1237
		1238	/// \return The cost of masked Load and Store instructions.
		1239	InstructionCost getMaskedMemoryOpCost(
		1240	unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
		1241	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
		1242
		1243	/// \return The cost of Gather or Scatter operation
		1244	/// \p Opcode - is a type of memory access Load or Store
		1245	/// \p DataTy - a vector type of the data to be loaded or stored
		1246	/// \p Ptr - pointer [or vector of pointers] - address[es] in memory
		1247	/// \p VariableMask - true when the memory access is predicated with a mask
		1248	/// that is not a compile-time constant
		1249	/// \p Alignment - alignment of single element
		1250	/// \p I - the optional original context instruction, if one exists, e.g. the
		1251	/// load/store to transform or the call to the gather/scatter intrinsic
		1252	InstructionCost getGatherScatterOpCost(
		1253	unsigned Opcode, Type DataTy, const Value Ptr, bool VariableMask,
		1254	Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
		1255	const Instruction *I = nullptr) const;
		1256
		1257	/// \return The cost of the interleaved memory operation.
		1258	/// \p Opcode is the memory operation code
		1259	/// \p VecTy is the vector type of the interleaved access.
		1260	/// \p Factor is the interleave factor
		1261	/// \p Indices is the indices for interleaved load members (as interleaved
		1262	/// load allows gaps)
		1263	/// \p Alignment is the alignment of the memory operation
		1264	/// \p AddressSpace is address space of the pointer.
		1265	/// \p UseMaskForCond indicates if the memory access is predicated.
		1266	/// \p UseMaskForGaps indicates if gaps should be masked.
		1267	InstructionCost getInterleavedMemoryOpCost(
		1268	unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
		1269	Align Alignment, unsigned AddressSpace,
		1270	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
		1271	bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
		1272
		1273	/// A helper function to determine the type of reduction algorithm used
		1274	/// for a given \p Opcode and set of FastMathFlags \p FMF.
		1275	static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) {
		1276	return FMF && !(*FMF).allowReassoc();
		1277	}
		1278
		1279	/// Calculate the cost of vector reduction intrinsics.
		1280	///
		1281	/// This is the cost of reducing the vector value of type \p Ty to a scalar
		1282	/// value using the operation denoted by \p Opcode. The FastMathFlags
		1283	/// parameter \p FMF indicates what type of reduction we are performing:
		1284	/// 1. Tree-wise. This is the typical 'fast' reduction performed that
		1285	/// involves successively splitting a vector into half and doing the
		1286	/// operation on the pair of halves until you have a scalar value. For
		1287	/// example:
		1288	/// (v0, v1, v2, v3)
		1289	/// ((v0+v2), (v1+v3), undef, undef)
		1290	/// ((v0+v2+v1+v3), undef, undef, undef)
		1291	/// This is the default behaviour for integer operations, whereas for
		1292	/// floating point we only do this if \p FMF indicates that
		1293	/// reassociation is allowed.
		1294	/// 2. Ordered. For a vector with N elements this involves performing N
		1295	/// operations in lane order, starting with an initial scalar value, i.e.
		1296	/// result = InitVal + v0
		1297	/// result = result + v1
		1298	/// result = result + v2
		1299	/// result = result + v3
		1300	/// This is only the case for FP operations and when reassociation is not
		1301	/// allowed.
		1302	///
		1303	InstructionCost getArithmeticReductionCost(
		1304	unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
		1305	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
		1306
		1307	InstructionCost getMinMaxReductionCost(
		1308	VectorType Ty, VectorType CondTy, bool IsUnsigned,
		1309	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
		1310
		1311	/// Calculate the cost of an extended reduction pattern, similar to
		1312	/// getArithmeticReductionCost of an Add reduction with multiply and optional
		1313	/// extensions. This is the cost of as:
		1314	/// ResTy vecreduce.add(mul (A, B)).
		1315	/// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)).
		1316	InstructionCost getMulAccReductionCost(
		1317	bool IsUnsigned, Type ResTy, VectorType Ty,
		1318	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
		1319
		1320	/// Calculate the cost of an extended reduction pattern, similar to
		1321	/// getArithmeticReductionCost of a reduction with an extension.
		1322	/// This is the cost of as:
		1323	/// ResTy vecreduce.opcode(ext(Ty A)).
		1324	InstructionCost getExtendedReductionCost(
		1325	unsigned Opcode, bool IsUnsigned, Type ResTy, VectorType Ty,
		1326	std::optional<FastMathFlags> FMF,
		1327	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
		1328
		1329	/// \returns The cost of Intrinsic instructions. Analyses the real arguments.
		1330	/// Three cases are handled: 1. scalar instruction 2. vector instruction
		1331	/// 3. scalar instruction which is to be vectorized.
		1332	InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
		1333	TTI::TargetCostKind CostKind) const;
		1334
		1335	/// \returns The cost of Call instructions.
		1336	InstructionCost getCallInstrCost(
		1337	Function F, Type RetTy, ArrayRef<Type *> Tys,
		1338	TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
		1339
		1340	/// \returns The number of pieces into which the provided type must be
		1341	/// split during legalization. Zero is returned when the answer is unknown.
		1342	unsigned getNumberOfParts(Type *Tp) const;
		1343
		1344	/// \returns The cost of the address computation. For most targets this can be
		1345	/// merged into the instruction indexing mode. Some targets might want to
		1346	/// distinguish between address computation for memory operations on vector
		1347	/// types and scalar types. Such targets should override this function.
		1348	/// The 'SE' parameter holds pointer for the scalar evolution object which
		1349	/// is used in order to get the Ptr step value in case of constant stride.
		1350	/// The 'Ptr' parameter holds SCEV of the access pointer.
		1351	InstructionCost getAddressComputationCost(Type *Ty,
		1352	ScalarEvolution *SE = nullptr,
		1353	const SCEV *Ptr = nullptr) const;
		1354
		1355	/// \returns The cost, if any, of keeping values of the given types alive
		1356	/// over a callsite.
		1357	///
		1358	/// Some types may require the use of register classes that do not have
		1359	/// any callee-saved registers, so would require a spill and fill.
		1360	InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
		1361
		1362	/// \returns True if the intrinsic is a supported memory intrinsic. Info
		1363	/// will contain additional information - whether the intrinsic may write
		1364	/// or read to memory, volatility and the pointer. Info is undefined
		1365	/// if false is returned.
		1366	bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
		1367
		1368	/// \returns The maximum element size, in bytes, for an element
		1369	/// unordered-atomic memory intrinsic.
		1370	unsigned getAtomicMemIntrinsicMaxElementSize() const;
		1371
		1372	/// \returns A value which is the result of the given memory intrinsic. New
		1373	/// instructions may be created to extract the result from the given intrinsic
		1374	/// memory operation. Returns nullptr if the target cannot create a result
		1375	/// from the given intrinsic.
		1376	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
		1377	Type *ExpectedType) const;
		1378
		1379	/// \returns The type to use in a loop expansion of a memcpy call.
		1380	Type *getMemcpyLoopLoweringType(
		1381	LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
		1382	unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
		1383	std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
		1384
		1385	/// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
		1386	/// \param RemainingBytes The number of bytes to copy.
		1387	///
		1388	/// Calculates the operand types to use when copying \p RemainingBytes of
		1389	/// memory, where source and destination alignments are \p SrcAlign and
		1390	/// \p DestAlign respectively.
		1391	void getMemcpyLoopResidualLoweringType(
		1392	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
		1393	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
		1394	unsigned SrcAlign, unsigned DestAlign,
		1395	std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
		1396
		1397	/// \returns True if the two functions have compatible attributes for inlining
		1398	/// purposes.
		1399	bool areInlineCompatible(const Function *Caller,
		1400	const Function *Callee) const;
		1401
		1402	/// \returns True if the caller and callee agree on how \p Types will be
		1403	/// passed to or returned from the callee.
		1404	/// to the callee.
		1405	/// \param Types List of types to check.
		1406	bool areTypesABICompatible(const Function Caller, const Function Callee,
		1407	const ArrayRef<Type *> &Types) const;
		1408
		1409	/// The type of load/store indexing.
		1410	enum MemIndexedMode {
		1411	MIM_Unindexed, ///< No indexing.
		1412	MIM_PreInc, ///< Pre-incrementing.
		1413	MIM_PreDec, ///< Pre-decrementing.
		1414	MIM_PostInc, ///< Post-incrementing.
		1415	MIM_PostDec ///< Post-decrementing.
		1416	};
		1417
		1418	/// \returns True if the specified indexed load for the given type is legal.
		1419	bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
		1420
		1421	/// \returns True if the specified indexed store for the given type is legal.
		1422	bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
		1423
		1424	/// \returns The bitwidth of the largest vector type that should be used to
		1425	/// load/store in the given address space.
		1426	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
		1427
		1428	/// \returns True if the load instruction is legal to vectorize.
		1429	bool isLegalToVectorizeLoad(LoadInst *LI) const;
		1430
		1431	/// \returns True if the store instruction is legal to vectorize.
		1432	bool isLegalToVectorizeStore(StoreInst *SI) const;
		1433
		1434	/// \returns True if it is legal to vectorize the given load chain.
		1435	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
		1436	unsigned AddrSpace) const;
		1437
		1438	/// \returns True if it is legal to vectorize the given store chain.
		1439	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
		1440	unsigned AddrSpace) const;
		1441
		1442	/// \returns True if it is legal to vectorize the given reduction kind.
		1443	bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
		1444	ElementCount VF) const;
		1445
		1446	/// \returns True if the given type is supported for scalable vectors
		1447	bool isElementTypeLegalForScalableVector(Type *Ty) const;
		1448
		1449	/// \returns The new vector factor value if the target doesn't support \p
		1450	/// SizeInBytes loads or has a better vector factor.
		1451	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
		1452	unsigned ChainSizeInBytes,
		1453	VectorType *VecTy) const;
		1454
		1455	/// \returns The new vector factor value if the target doesn't support \p
		1456	/// SizeInBytes stores or has a better vector factor.
		1457	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
		1458	unsigned ChainSizeInBytes,
		1459	VectorType *VecTy) const;
		1460
		1461	/// Flags describing the kind of vector reduction.
		1462	struct ReductionFlags {
		1463	ReductionFlags() = default;
		1464	bool IsMaxOp =
		1465	false; ///< If the op a min/max kind, true if it's a max operation.
		1466	bool IsSigned = false; ///< Whether the operation is a signed int reduction.
		1467	bool NoNaN =
		1468	false; ///< If op is an fp min/max, whether NaNs may be present.
		1469	};
		1470
		1471	/// \returns True if the target prefers reductions in loop.
		1472	bool preferInLoopReduction(unsigned Opcode, Type *Ty,
		1473	ReductionFlags Flags) const;
		1474
		1475	/// \returns True if the target prefers reductions select kept in the loop
		1476	/// when tail folding. i.e.
		1477	/// loop:
		1478	/// p = phi (0, s)
		1479	/// a = add (p, x)
		1480	/// s = select (mask, a, p)
		1481	/// vecreduce.add(s)
		1482	///
		1483	/// As opposed to the normal scheme of p = phi (0, a) which allows the select
		1484	/// to be pulled out of the loop. If the select(.., add, ..) can be predicated
		1485	/// by the target, this can lead to cleaner code generation.
		1486	bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
		1487	ReductionFlags Flags) const;
		1488
		1489	/// Return true if the loop vectorizer should consider vectorizing an
		1490	/// otherwise scalar epilogue loop.
		1491	bool preferEpilogueVectorization() const;
		1492
		1493	/// \returns True if the target wants to expand the given reduction intrinsic
		1494	/// into a shuffle sequence.
		1495	bool shouldExpandReduction(const IntrinsicInst *II) const;
		1496
		1497	/// \returns the size cost of rematerializing a GlobalValue address relative
		1498	/// to a stack reload.
		1499	unsigned getGISelRematGlobalCost() const;
		1500
		1501	/// \returns the lower bound of a trip count to decide on vectorization
		1502	/// while tail-folding.
		1503	unsigned getMinTripCountTailFoldingThreshold() const;
		1504
		1505	/// \returns True if the target supports scalable vectors.
		1506	bool supportsScalableVectors() const;
		1507
		1508	/// \return true when scalable vectorization is preferred.
		1509	bool enableScalableVectorization() const;
		1510
		1511	/// \name Vector Predication Information
		1512	/// @{
		1513	/// Whether the target supports the %evl parameter of VP intrinsic efficiently
		1514	/// in hardware, for the given opcode and type/alignment. (see LLVM Language
		1515	/// Reference - "Vector Predication Intrinsics").
		1516	/// Use of %evl is discouraged when that is not the case.
		1517	bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
		1518	Align Alignment) const;
		1519
		1520	struct VPLegalization {
		1521	enum VPTransform {
		1522	// keep the predicating parameter
		1523	Legal = 0,
		1524	// where legal, discard the predicate parameter
		1525	Discard = 1,
		1526	// transform into something else that is also predicating
		1527	Convert = 2
		1528	};
		1529
		1530	// How to transform the EVL parameter.
		1531	// Legal: keep the EVL parameter as it is.
		1532	// Discard: Ignore the EVL parameter where it is safe to do so.
		1533	// Convert: Fold the EVL into the mask parameter.
		1534	VPTransform EVLParamStrategy;
		1535
		1536	// How to transform the operator.
		1537	// Legal: The target supports this operator.
		1538	// Convert: Convert this to a non-VP operation.
		1539	// The 'Discard' strategy is invalid.
		1540	VPTransform OpStrategy;
		1541
		1542	bool shouldDoNothing() const {
		1543	return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
		1544	}
		1545	VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
		1546	: EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {}
		1547	};
		1548
		1549	/// \returns How the target needs this vector-predicated operation to be
		1550	/// transformed.
		1551	VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const;
		1552	/// @}
		1553
		1554	/// @}
		1555
		1556	private:
		1557	/// The abstract base class used to type erase specific TTI
		1558	/// implementations.
		1559	class Concept;
		1560
		1561	/// The template model for the base class which wraps a concrete
		1562	/// implementation in a type erased interface.
		1563	template <typename T> class Model;
		1564
		1565	std::unique_ptr<Concept> TTIImpl;
		1566	};
		1567
		1568	class TargetTransformInfo::Concept {
		1569	public:
		1570	virtual ~Concept() = 0;
		1571	virtual const DataLayout &getDataLayout() const = 0;
		1572	virtual InstructionCost getGEPCost(Type PointeeType, const Value Ptr,
		1573	ArrayRef<const Value *> Operands,
		1574	TTI::TargetCostKind CostKind) = 0;
		1575	virtual unsigned getInliningThresholdMultiplier() = 0;
		1576	virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
		1577	virtual int getInlinerVectorBonusPercent() = 0;
		1578	virtual InstructionCost getMemcpyCost(const Instruction *I) = 0;
		1579	virtual unsigned
		1580	getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
		1581	ProfileSummaryInfo *PSI,
		1582	BlockFrequencyInfo *BFI) = 0;
		1583	virtual InstructionCost getInstructionCost(const User *U,
		1584	ArrayRef<const Value *> Operands,
		1585	TargetCostKind CostKind) = 0;
		1586	virtual BranchProbability getPredictableBranchThreshold() = 0;
		1587	virtual bool hasBranchDivergence() = 0;
		1588	virtual bool useGPUDivergenceAnalysis() = 0;
		1589	virtual bool isSourceOfDivergence(const Value *V) = 0;
		1590	virtual bool isAlwaysUniform(const Value *V) = 0;
		1591	virtual unsigned getFlatAddressSpace() = 0;
		1592	virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
		1593	Intrinsic::ID IID) const = 0;
		1594	virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
		1595	virtual bool
		1596	canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
		1597	virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
		1598	virtual bool isSingleThreaded() const = 0;
		1599	virtual std::pair<const Value *, unsigned>
		1600	getPredicatedAddrSpace(const Value *V) const = 0;
		1601	virtual Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II,
		1602	Value *OldV,
		1603	Value *NewV) const = 0;
		1604	virtual bool isLoweredToCall(const Function *F) = 0;
		1605	virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
		1606	UnrollingPreferences &UP,
		1607	OptimizationRemarkEmitter *ORE) = 0;
		1608	virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
		1609	PeelingPreferences &PP) = 0;
		1610	virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
		1611	AssumptionCache &AC,
		1612	TargetLibraryInfo *LibInfo,
		1613	HardwareLoopInfo &HWLoopInfo) = 0;
		1614	virtual bool
		1615	preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
		1616	AssumptionCache &AC, TargetLibraryInfo *TLI,
		1617	DominatorTree DT, LoopVectorizationLegality LVL,
		1618	InterleavedAccessInfo *IAI) = 0;
		1619	virtual PredicationStyle emitGetActiveLaneMask() = 0;
		1620	virtual std::optional<Instruction *> instCombineIntrinsic(
		1621	InstCombiner &IC, IntrinsicInst &II) = 0;
		1622	virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
		1623	InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,
		1624	KnownBits & Known, bool &KnownBitsComputed) = 0;
		1625	virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
		1626	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts,
		1627	APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
		1628	std::function<void(Instruction *, unsigned, APInt, APInt &)>
		1629	SimplifyAndSetOp) = 0;
		1630	virtual bool isLegalAddImmediate(int64_t Imm) = 0;
		1631	virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
		1632	virtual bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV,
		1633	int64_t BaseOffset, bool HasBaseReg,
		1634	int64_t Scale, unsigned AddrSpace,
		1635	Instruction *I) = 0;
		1636	virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
		1637	const TargetTransformInfo::LSRCost &C2) = 0;
		1638	virtual bool isNumRegsMajorCostOfLSR() = 0;
		1639	virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
		1640	virtual bool canMacroFuseCmp() = 0;
		1641	virtual bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE,
		1642	LoopInfo LI, DominatorTree DT, AssumptionCache *AC,
		1643	TargetLibraryInfo *LibInfo) = 0;
		1644	virtual AddressingModeKind
		1645	getPreferredAddressingMode(const Loop L, ScalarEvolution SE) const = 0;
		1646	virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
		1647	virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
		1648	virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
		1649	virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
		1650	virtual bool isLegalBroadcastLoad(Type *ElementTy,
		1651	ElementCount NumElements) const = 0;
		1652	virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
		1653	virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
		1654	virtual bool forceScalarizeMaskedGather(VectorType *DataType,
		1655	Align Alignment) = 0;
		1656	virtual bool forceScalarizeMaskedScatter(VectorType *DataType,
		1657	Align Alignment) = 0;
		1658	virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
		1659	virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
		1660	virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
		1661	unsigned Opcode1,
		1662	const SmallBitVector &OpcodeMask) const = 0;
		1663	virtual bool enableOrderedReductions() = 0;
		1664	virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
		1665	virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
		1666	virtual bool prefersVectorizedAddressing() = 0;
		1667	virtual InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
		1668	int64_t BaseOffset,
		1669	bool HasBaseReg, int64_t Scale,
		1670	unsigned AddrSpace) = 0;
		1671	virtual bool LSRWithInstrQueries() = 0;
		1672	virtual bool isTruncateFree(Type Ty1, Type Ty2) = 0;
		1673	virtual bool isProfitableToHoist(Instruction *I) = 0;
		1674	virtual bool useAA() = 0;
		1675	virtual bool isTypeLegal(Type *Ty) = 0;
		1676	virtual unsigned getRegUsageForType(Type *Ty) = 0;
		1677	virtual bool shouldBuildLookupTables() = 0;
		1678	virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
		1679	virtual bool shouldBuildRelLookupTables() = 0;
		1680	virtual bool useColdCCForColdCall(Function &F) = 0;
		1681	virtual InstructionCost getScalarizationOverhead(VectorType *Ty,
		1682	const APInt &DemandedElts,
		1683	bool Insert, bool Extract,
		1684	TargetCostKind CostKind) = 0;
		1685	virtual InstructionCost
		1686	getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
		1687	ArrayRef<Type *> Tys,
		1688	TargetCostKind CostKind) = 0;
		1689	virtual bool supportsEfficientVectorElementLoadStore() = 0;
		1690	virtual bool supportsTailCalls() = 0;
		1691	virtual bool supportsTailCallFor(const CallBase *CB) = 0;
		1692	virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
		1693	virtual MemCmpExpansionOptions
		1694	enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
		1695	virtual bool enableSelectOptimize() = 0;
		1696	virtual bool enableInterleavedAccessVectorization() = 0;
		1697	virtual bool enableMaskedInterleavedAccessVectorization() = 0;
		1698	virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
		1699	virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
		1700	unsigned BitWidth,
		1701	unsigned AddressSpace,
		1702	Align Alignment,
		1703	unsigned *Fast) = 0;
		1704	virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
		1705	virtual bool haveFastSqrt(Type *Ty) = 0;
		1706	virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I) = 0;
		1707	virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
		1708	virtual InstructionCost getFPOpCost(Type *Ty) = 0;
		1709	virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
		1710	const APInt &Imm, Type *Ty) = 0;
		1711	virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
		1712	TargetCostKind CostKind) = 0;
		1713	virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
		1714	const APInt &Imm, Type *Ty,
		1715	TargetCostKind CostKind,
		1716	Instruction *Inst = nullptr) = 0;
		1717	virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
		1718	const APInt &Imm, Type *Ty,
		1719	TargetCostKind CostKind) = 0;
		1720	virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
		1721	virtual unsigned getRegisterClassForType(bool Vector,
		1722	Type *Ty = nullptr) const = 0;
		1723	virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
		1724	virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0;
		1725	virtual unsigned getMinVectorRegisterBitWidth() const = 0;
		1726	virtual std::optional<unsigned> getMaxVScale() const = 0;
		1727	virtual std::optional<unsigned> getVScaleForTuning() const = 0;
		1728	virtual bool
		1729	shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const = 0;
		1730	virtual ElementCount getMinimumVF(unsigned ElemWidth,
		1731	bool IsScalable) const = 0;
		1732	virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
		1733	virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
		1734	Type *ScalarValTy) const = 0;
		1735	virtual bool shouldConsiderAddressTypePromotion(
		1736	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
		1737	virtual unsigned getCacheLineSize() const = 0;
		1738	virtual std::optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
		1739	virtual std::optional<unsigned> getCacheAssociativity(CacheLevel Level)
		1740	const = 0;
		1741
		1742	/// \return How much before a load we should place the prefetch
		1743	/// instruction. This is currently measured in number of
		1744	/// instructions.
		1745	virtual unsigned getPrefetchDistance() const = 0;
		1746
		1747	/// \return Some HW prefetchers can handle accesses up to a certain
		1748	/// constant stride. This is the minimum stride in bytes where it
		1749	/// makes sense to start adding SW prefetches. The default is 1,
		1750	/// i.e. prefetch with any stride. Sometimes prefetching is beneficial
		1751	/// even below the HW prefetcher limit, and the arguments provided are
		1752	/// meant to serve as a basis for deciding this for a particular loop.
		1753	virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
		1754	unsigned NumStridedMemAccesses,
		1755	unsigned NumPrefetches,
		1756	bool HasCall) const = 0;
		1757
		1758	/// \return The maximum number of iterations to prefetch ahead. If
		1759	/// the required number of iterations is more than this number, no
		1760	/// prefetching is performed.
		1761	virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
		1762
		1763	/// \return True if prefetching should also be done for writes.
		1764	virtual bool enableWritePrefetching() const = 0;
		1765
		1766	/// \return if target want to issue a prefetch in address space \p AS.
		1767	virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0;
		1768
		1769	virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
		1770	virtual InstructionCost getArithmeticInstrCost(
		1771	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
		1772	OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
		1773	ArrayRef<const Value > Args, const Instruction CxtI = nullptr) = 0;
		1774
		1775	virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
		1776	ArrayRef<int> Mask,
		1777	TTI::TargetCostKind CostKind,
		1778	int Index, VectorType *SubTp,
		1779	ArrayRef<const Value *> Args) = 0;
		1780	virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
		1781	Type *Src, CastContextHint CCH,
		1782	TTI::TargetCostKind CostKind,
		1783	const Instruction *I) = 0;
		1784	virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
		1785	VectorType *VecTy,
		1786	unsigned Index) = 0;
		1787	virtual InstructionCost getCFInstrCost(unsigned Opcode,
		1788	TTI::TargetCostKind CostKind,
		1789	const Instruction *I = nullptr) = 0;
		1790	virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
		1791	Type *CondTy,
		1792	CmpInst::Predicate VecPred,
		1793	TTI::TargetCostKind CostKind,
		1794	const Instruction *I) = 0;
		1795	virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
		1796	TTI::TargetCostKind CostKind,
		1797	unsigned Index, Value *Op0,
		1798	Value *Op1) = 0;
		1799	virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
		1800	TTI::TargetCostKind CostKind,
		1801	unsigned Index) = 0;
		1802
		1803	virtual InstructionCost
		1804	getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
		1805	const APInt &DemandedDstElts,
		1806	TTI::TargetCostKind CostKind) = 0;
		1807
		1808	virtual InstructionCost
		1809	getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
		1810	unsigned AddressSpace, TTI::TargetCostKind CostKind,
		1811	OperandValueInfo OpInfo, const Instruction *I) = 0;
		1812	virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
		1813	Align Alignment,
		1814	unsigned AddressSpace,
		1815	TTI::TargetCostKind CostKind,
		1816	const Instruction *I) = 0;
		1817	virtual InstructionCost
		1818	getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
		1819	unsigned AddressSpace,
		1820	TTI::TargetCostKind CostKind) = 0;
		1821	virtual InstructionCost
		1822	getGatherScatterOpCost(unsigned Opcode, Type DataTy, const Value Ptr,
		1823	bool VariableMask, Align Alignment,
		1824	TTI::TargetCostKind CostKind,
		1825	const Instruction *I = nullptr) = 0;
		1826
		1827	virtual InstructionCost getInterleavedMemoryOpCost(
		1828	unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
		1829	Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
		1830	bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
		1831	virtual InstructionCost
		1832	getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
		1833	std::optional<FastMathFlags> FMF,
		1834	TTI::TargetCostKind CostKind) = 0;
		1835	virtual InstructionCost
		1836	getMinMaxReductionCost(VectorType Ty, VectorType CondTy, bool IsUnsigned,
		1837	TTI::TargetCostKind CostKind) = 0;
		1838	virtual InstructionCost getExtendedReductionCost(
		1839	unsigned Opcode, bool IsUnsigned, Type ResTy, VectorType Ty,
		1840	std::optional<FastMathFlags> FMF,
		1841	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0;
		1842	virtual InstructionCost getMulAccReductionCost(
		1843	bool IsUnsigned, Type ResTy, VectorType Ty,
		1844	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0;
		1845	virtual InstructionCost
		1846	getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
		1847	TTI::TargetCostKind CostKind) = 0;
		1848	virtual InstructionCost getCallInstrCost(Function F, Type RetTy,
		1849	ArrayRef<Type *> Tys,
		1850	TTI::TargetCostKind CostKind) = 0;
		1851	virtual unsigned getNumberOfParts(Type *Tp) = 0;
		1852	virtual InstructionCost
		1853	getAddressComputationCost(Type Ty, ScalarEvolution SE, const SCEV *Ptr) = 0;
		1854	virtual InstructionCost
		1855	getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
		1856	virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
		1857	MemIntrinsicInfo &Info) = 0;
		1858	virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
		1859	virtual Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
		1860	Type *ExpectedType) = 0;
		1861	virtual Type *getMemcpyLoopLoweringType(
		1862	LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
		1863	unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
		1864	std::optional<uint32_t> AtomicElementSize) const = 0;
		1865
		1866	virtual void getMemcpyLoopResidualLoweringType(
		1867	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
		1868	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
		1869	unsigned SrcAlign, unsigned DestAlign,
		1870	std::optional<uint32_t> AtomicCpySize) const = 0;
		1871	virtual bool areInlineCompatible(const Function *Caller,
		1872	const Function *Callee) const = 0;
		1873	virtual bool areTypesABICompatible(const Function *Caller,
		1874	const Function *Callee,
		1875	const ArrayRef<Type *> &Types) const = 0;
		1876	virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
		1877	virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
		1878	virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
		1879	virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
		1880	virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
		1881	virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
		1882	Align Alignment,
		1883	unsigned AddrSpace) const = 0;
		1884	virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
		1885	Align Alignment,
		1886	unsigned AddrSpace) const = 0;
		1887	virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
		1888	ElementCount VF) const = 0;
		1889	virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
		1890	virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
		1891	unsigned ChainSizeInBytes,
		1892	VectorType *VecTy) const = 0;
		1893	virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
		1894	unsigned ChainSizeInBytes,
		1895	VectorType *VecTy) const = 0;
		1896	virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
		1897	ReductionFlags) const = 0;
		1898	virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
		1899	ReductionFlags) const = 0;
		1900	virtual bool preferEpilogueVectorization() const = 0;
		1901
		1902	virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
		1903	virtual unsigned getGISelRematGlobalCost() const = 0;
		1904	virtual unsigned getMinTripCountTailFoldingThreshold() const = 0;
		1905	virtual bool enableScalableVectorization() const = 0;
		1906	virtual bool supportsScalableVectors() const = 0;
		1907	virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
		1908	Align Alignment) const = 0;
		1909	virtual VPLegalization
		1910	getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
		1911	};
		1912
		1913	template <typename T>
		1914	class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
		1915	T Impl;
		1916
		1917	public:
		1918	Model(T Impl) : Impl(std::move(Impl)) {}
		1919	~Model() override = default;
		1920
		1921	const DataLayout &getDataLayout() const override {
		1922	return Impl.getDataLayout();
		1923	}
		1924
		1925	InstructionCost
		1926	getGEPCost(Type PointeeType, const Value Ptr,
		1927	ArrayRef<const Value *> Operands,
		1928	TargetTransformInfo::TargetCostKind CostKind) override {
		1929	return Impl.getGEPCost(PointeeType, Ptr, Operands, CostKind);
		1930	}
		1931	unsigned getInliningThresholdMultiplier() override {
		1932	return Impl.getInliningThresholdMultiplier();
		1933	}
		1934	unsigned adjustInliningThreshold(const CallBase *CB) override {
		1935	return Impl.adjustInliningThreshold(CB);
		1936	}
		1937	int getInlinerVectorBonusPercent() override {
		1938	return Impl.getInlinerVectorBonusPercent();
		1939	}
		1940	InstructionCost getMemcpyCost(const Instruction *I) override {
		1941	return Impl.getMemcpyCost(I);
		1942	}
		1943	InstructionCost getInstructionCost(const User *U,
		1944	ArrayRef<const Value *> Operands,
		1945	TargetCostKind CostKind) override {
		1946	return Impl.getInstructionCost(U, Operands, CostKind);
		1947	}
		1948	BranchProbability getPredictableBranchThreshold() override {
		1949	return Impl.getPredictableBranchThreshold();
		1950	}
		1951	bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
		1952	bool useGPUDivergenceAnalysis() override {
		1953	return Impl.useGPUDivergenceAnalysis();
		1954	}
		1955	bool isSourceOfDivergence(const Value *V) override {
		1956	return Impl.isSourceOfDivergence(V);
		1957	}
		1958
		1959	bool isAlwaysUniform(const Value *V) override {
		1960	return Impl.isAlwaysUniform(V);
		1961	}
		1962
		1963	unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
		1964
		1965	bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
		1966	Intrinsic::ID IID) const override {
		1967	return Impl.collectFlatAddressOperands(OpIndexes, IID);
		1968	}
		1969
		1970	bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
		1971	return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
		1972	}
		1973
		1974	bool
		1975	canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
		1976	return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
		1977	}
		1978
		1979	unsigned getAssumedAddrSpace(const Value *V) const override {
		1980	return Impl.getAssumedAddrSpace(V);
		1981	}
		1982
		1983	bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
		1984
		1985	std::pair<const Value *, unsigned>
		1986	getPredicatedAddrSpace(const Value *V) const override {
		1987	return Impl.getPredicatedAddrSpace(V);
		1988	}
		1989
		1990	Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II, Value *OldV,
		1991	Value *NewV) const override {
		1992	return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
		1993	}
		1994
		1995	bool isLoweredToCall(const Function *F) override {
		1996	return Impl.isLoweredToCall(F);
		1997	}
		1998	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
		1999	UnrollingPreferences &UP,
		2000	OptimizationRemarkEmitter *ORE) override {
		2001	return Impl.getUnrollingPreferences(L, SE, UP, ORE);
		2002	}
		2003	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
		2004	PeelingPreferences &PP) override {
		2005	return Impl.getPeelingPreferences(L, SE, PP);
		2006	}
		2007	bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
		2008	AssumptionCache &AC, TargetLibraryInfo *LibInfo,
		2009	HardwareLoopInfo &HWLoopInfo) override {
		2010	return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
		2011	}
		2012	bool preferPredicateOverEpilogue(Loop L, LoopInfo LI, ScalarEvolution &SE,
		2013	AssumptionCache &AC, TargetLibraryInfo *TLI,
		2014	DominatorTree *DT,
		2015	LoopVectorizationLegality *LVL,
		2016	InterleavedAccessInfo *IAI) override {
		2017	return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI);
		2018	}
		2019	PredicationStyle emitGetActiveLaneMask() override {
		2020	return Impl.emitGetActiveLaneMask();
		2021	}
		2022	std::optional<Instruction *>
		2023	instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
		2024	return Impl.instCombineIntrinsic(IC, II);
		2025	}
		2026	std::optional<Value *>
		2027	simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
		2028	APInt DemandedMask, KnownBits &Known,
		2029	bool &KnownBitsComputed) override {
		2030	return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
		2031	KnownBitsComputed);
		2032	}
		2033	std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
		2034	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
		2035	APInt &UndefElts2, APInt &UndefElts3,
		2036	std::function<void(Instruction *, unsigned, APInt, APInt &)>
		2037	SimplifyAndSetOp) override {
		2038	return Impl.simplifyDemandedVectorEltsIntrinsic(
		2039	IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
		2040	SimplifyAndSetOp);
		2041	}
		2042	bool isLegalAddImmediate(int64_t Imm) override {
		2043	return Impl.isLegalAddImmediate(Imm);
		2044	}
		2045	bool isLegalICmpImmediate(int64_t Imm) override {
		2046	return Impl.isLegalICmpImmediate(Imm);
		2047	}
		2048	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
		2049	bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
		2050	Instruction *I) override {
		2051	return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
		2052	AddrSpace, I);
		2053	}
		2054	bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
		2055	const TargetTransformInfo::LSRCost &C2) override {
		2056	return Impl.isLSRCostLess(C1, C2);
		2057	}
		2058	bool isNumRegsMajorCostOfLSR() override {
		2059	return Impl.isNumRegsMajorCostOfLSR();
		2060	}
		2061	bool isProfitableLSRChainElement(Instruction *I) override {
		2062	return Impl.isProfitableLSRChainElement(I);
		2063	}
		2064	bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
		2065	bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE, LoopInfo *LI,
		2066	DominatorTree DT, AssumptionCache AC,
		2067	TargetLibraryInfo *LibInfo) override {
		2068	return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
		2069	}
		2070	AddressingModeKind
		2071	getPreferredAddressingMode(const Loop *L,
		2072	ScalarEvolution *SE) const override {
		2073	return Impl.getPreferredAddressingMode(L, SE);
		2074	}
		2075	bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
		2076	return Impl.isLegalMaskedStore(DataType, Alignment);
		2077	}
		2078	bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
		2079	return Impl.isLegalMaskedLoad(DataType, Alignment);
		2080	}
		2081	bool isLegalNTStore(Type *DataType, Align Alignment) override {
		2082	return Impl.isLegalNTStore(DataType, Alignment);
		2083	}
		2084	bool isLegalNTLoad(Type *DataType, Align Alignment) override {
		2085	return Impl.isLegalNTLoad(DataType, Alignment);
		2086	}
		2087	bool isLegalBroadcastLoad(Type *ElementTy,
		2088	ElementCount NumElements) const override {
		2089	return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
		2090	}
		2091	bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
		2092	return Impl.isLegalMaskedScatter(DataType, Alignment);
		2093	}
		2094	bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
		2095	return Impl.isLegalMaskedGather(DataType, Alignment);
		2096	}
		2097	bool forceScalarizeMaskedGather(VectorType *DataType,
		2098	Align Alignment) override {
		2099	return Impl.forceScalarizeMaskedGather(DataType, Alignment);
		2100	}
		2101	bool forceScalarizeMaskedScatter(VectorType *DataType,
		2102	Align Alignment) override {
		2103	return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
		2104	}
		2105	bool isLegalMaskedCompressStore(Type *DataType) override {
		2106	return Impl.isLegalMaskedCompressStore(DataType);
		2107	}
		2108	bool isLegalMaskedExpandLoad(Type *DataType) override {
		2109	return Impl.isLegalMaskedExpandLoad(DataType);
		2110	}
		2111	bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
		2112	const SmallBitVector &OpcodeMask) const override {
		2113	return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
		2114	}
		2115	bool enableOrderedReductions() override {
		2116	return Impl.enableOrderedReductions();
		2117	}
		2118	bool hasDivRemOp(Type *DataType, bool IsSigned) override {
		2119	return Impl.hasDivRemOp(DataType, IsSigned);
		2120	}
		2121	bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
		2122	return Impl.hasVolatileVariant(I, AddrSpace);
		2123	}
		2124	bool prefersVectorizedAddressing() override {
		2125	return Impl.prefersVectorizedAddressing();
		2126	}
		2127	InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
		2128	int64_t BaseOffset, bool HasBaseReg,
		2129	int64_t Scale,
		2130	unsigned AddrSpace) override {
		2131	return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
		2132	AddrSpace);
		2133	}
		2134	bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
		2135	bool isTruncateFree(Type Ty1, Type Ty2) override {
		2136	return Impl.isTruncateFree(Ty1, Ty2);
		2137	}
		2138	bool isProfitableToHoist(Instruction *I) override {
		2139	return Impl.isProfitableToHoist(I);
		2140	}
		2141	bool useAA() override { return Impl.useAA(); }
		2142	bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
		2143	unsigned getRegUsageForType(Type *Ty) override {
		2144	return Impl.getRegUsageForType(Ty);
		2145	}
		2146	bool shouldBuildLookupTables() override {
		2147	return Impl.shouldBuildLookupTables();
		2148	}
		2149	bool shouldBuildLookupTablesForConstant(Constant *C) override {
		2150	return Impl.shouldBuildLookupTablesForConstant(C);
		2151	}
		2152	bool shouldBuildRelLookupTables() override {
		2153	return Impl.shouldBuildRelLookupTables();
		2154	}
		2155	bool useColdCCForColdCall(Function &F) override {
		2156	return Impl.useColdCCForColdCall(F);
		2157	}
		2158
		2159	InstructionCost getScalarizationOverhead(VectorType *Ty,
		2160	const APInt &DemandedElts,
		2161	bool Insert, bool Extract,
		2162	TargetCostKind CostKind) override {
		2163	return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
		2164	CostKind);
		2165	}
		2166	InstructionCost
		2167	getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
		2168	ArrayRef<Type *> Tys,
		2169	TargetCostKind CostKind) override {
		2170	return Impl.getOperandsScalarizationOverhead(Args, Tys, CostKind);
		2171	}
		2172
		2173	bool supportsEfficientVectorElementLoadStore() override {
		2174	return Impl.supportsEfficientVectorElementLoadStore();
		2175	}
		2176
		2177	bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
		2178	bool supportsTailCallFor(const CallBase *CB) override {
		2179	return Impl.supportsTailCallFor(CB);
		2180	}
		2181
		2182	bool enableAggressiveInterleaving(bool LoopHasReductions) override {
		2183	return Impl.enableAggressiveInterleaving(LoopHasReductions);
		2184	}
		2185	MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
		2186	bool IsZeroCmp) const override {
		2187	return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
		2188	}
		2189	bool enableInterleavedAccessVectorization() override {
		2190	return Impl.enableInterleavedAccessVectorization();
		2191	}
		2192	bool enableSelectOptimize() override {
		2193	return Impl.enableSelectOptimize();
		2194	}
		2195	bool enableMaskedInterleavedAccessVectorization() override {
		2196	return Impl.enableMaskedInterleavedAccessVectorization();
		2197	}
		2198	bool isFPVectorizationPotentiallyUnsafe() override {
		2199	return Impl.isFPVectorizationPotentiallyUnsafe();
		2200	}
		2201	bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
		2202	unsigned AddressSpace, Align Alignment,
		2203	unsigned *Fast) override {
		2204	return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
		2205	Alignment, Fast);
		2206	}
		2207	PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
		2208	return Impl.getPopcntSupport(IntTyWidthInBit);
		2209	}
		2210	bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
		2211
		2212	bool isExpensiveToSpeculativelyExecute(const Instruction* I) override {
		2213	return Impl.isExpensiveToSpeculativelyExecute(I);
		2214	}
		2215
		2216	bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
		2217	return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
		2218	}
		2219
		2220	InstructionCost getFPOpCost(Type *Ty) override {
		2221	return Impl.getFPOpCost(Ty);
		2222	}
		2223
		2224	InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
		2225	const APInt &Imm, Type *Ty) override {
		2226	return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
		2227	}
		2228	InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
		2229	TargetCostKind CostKind) override {
		2230	return Impl.getIntImmCost(Imm, Ty, CostKind);
		2231	}
		2232	InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
		2233	const APInt &Imm, Type *Ty,
		2234	TargetCostKind CostKind,
		2235	Instruction *Inst = nullptr) override {
		2236	return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
		2237	}
		2238	InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
		2239	const APInt &Imm, Type *Ty,
		2240	TargetCostKind CostKind) override {
		2241	return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
		2242	}
		2243	unsigned getNumberOfRegisters(unsigned ClassID) const override {
		2244	return Impl.getNumberOfRegisters(ClassID);
		2245	}
		2246	unsigned getRegisterClassForType(bool Vector,
		2247	Type *Ty = nullptr) const override {
		2248	return Impl.getRegisterClassForType(Vector, Ty);
		2249	}
		2250	const char *getRegisterClassName(unsigned ClassID) const override {
		2251	return Impl.getRegisterClassName(ClassID);
		2252	}
		2253	TypeSize getRegisterBitWidth(RegisterKind K) const override {
		2254	return Impl.getRegisterBitWidth(K);
		2255	}
		2256	unsigned getMinVectorRegisterBitWidth() const override {
		2257	return Impl.getMinVectorRegisterBitWidth();
		2258	}
		2259	std::optional<unsigned> getMaxVScale() const override {
		2260	return Impl.getMaxVScale();
		2261	}
		2262	std::optional<unsigned> getVScaleForTuning() const override {
		2263	return Impl.getVScaleForTuning();
		2264	}
		2265	bool shouldMaximizeVectorBandwidth(
		2266	TargetTransformInfo::RegisterKind K) const override {
		2267	return Impl.shouldMaximizeVectorBandwidth(K);
		2268	}
		2269	ElementCount getMinimumVF(unsigned ElemWidth,
		2270	bool IsScalable) const override {
		2271	return Impl.getMinimumVF(ElemWidth, IsScalable);
		2272	}
		2273	unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
		2274	return Impl.getMaximumVF(ElemWidth, Opcode);
		2275	}
		2276	unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
		2277	Type *ScalarValTy) const override {
		2278	return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
		2279	}
		2280	bool shouldConsiderAddressTypePromotion(
		2281	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
		2282	return Impl.shouldConsiderAddressTypePromotion(
		2283	I, AllowPromotionWithoutCommonHeader);
		2284	}
		2285	unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
		2286	std::optional<unsigned> getCacheSize(CacheLevel Level) const override {
		2287	return Impl.getCacheSize(Level);
		2288	}
		2289	std::optional<unsigned>
		2290	getCacheAssociativity(CacheLevel Level) const override {
		2291	return Impl.getCacheAssociativity(Level);
		2292	}
		2293
		2294	/// Return the preferred prefetch distance in terms of instructions.
		2295	///
		2296	unsigned getPrefetchDistance() const override {
		2297	return Impl.getPrefetchDistance();
		2298	}
		2299
		2300	/// Return the minimum stride necessary to trigger software
		2301	/// prefetching.
		2302	///
		2303	unsigned getMinPrefetchStride(unsigned NumMemAccesses,
		2304	unsigned NumStridedMemAccesses,
		2305	unsigned NumPrefetches,
		2306	bool HasCall) const override {
		2307	return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
		2308	NumPrefetches, HasCall);
		2309	}
		2310
		2311	/// Return the maximum prefetch distance in terms of loop
		2312	/// iterations.
		2313	///
		2314	unsigned getMaxPrefetchIterationsAhead() const override {
		2315	return Impl.getMaxPrefetchIterationsAhead();
		2316	}
		2317
		2318	/// \return True if prefetching should also be done for writes.
		2319	bool enableWritePrefetching() const override {
		2320	return Impl.enableWritePrefetching();
		2321	}
		2322
		2323	/// \return if target want to issue a prefetch in address space \p AS.
		2324	bool shouldPrefetchAddressSpace(unsigned AS) const override {
		2325	return Impl.shouldPrefetchAddressSpace(AS);
		2326	}
		2327
		2328	unsigned getMaxInterleaveFactor(unsigned VF) override {
		2329	return Impl.getMaxInterleaveFactor(VF);
		2330	}
		2331	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
		2332	unsigned &JTSize,
		2333	ProfileSummaryInfo *PSI,
		2334	BlockFrequencyInfo *BFI) override {
		2335	return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
		2336	}
		2337	InstructionCost getArithmeticInstrCost(
		2338	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
		2339	OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
		2340	ArrayRef<const Value *> Args,
		2341	const Instruction *CxtI = nullptr) override {
		2342	return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
		2343	Args, CxtI);
		2344	}
		2345
		2346	InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
		2347	ArrayRef<int> Mask,
		2348	TTI::TargetCostKind CostKind, int Index,
		2349	VectorType *SubTp,
		2350	ArrayRef<const Value *> Args) override {
		2351	return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
		2352	}
		2353	InstructionCost getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
		2354	CastContextHint CCH,
		2355	TTI::TargetCostKind CostKind,
		2356	const Instruction *I) override {
		2357	return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
		2358	}
		2359	InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
		2360	VectorType *VecTy,
		2361	unsigned Index) override {
		2362	return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
		2363	}
		2364	InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
		2365	const Instruction *I = nullptr) override {
		2366	return Impl.getCFInstrCost(Opcode, CostKind, I);
		2367	}
		2368	InstructionCost getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy,
		2369	CmpInst::Predicate VecPred,
		2370	TTI::TargetCostKind CostKind,
		2371	const Instruction *I) override {
		2372	return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
		2373	}
		2374	InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
		2375	TTI::TargetCostKind CostKind,
		2376	unsigned Index, Value *Op0,
		2377	Value *Op1) override {
		2378	return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
		2379	}
		2380	InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
		2381	TTI::TargetCostKind CostKind,
		2382	unsigned Index) override {
		2383	return Impl.getVectorInstrCost(I, Val, CostKind, Index);
		2384	}
		2385	InstructionCost
		2386	getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
		2387	const APInt &DemandedDstElts,
		2388	TTI::TargetCostKind CostKind) override {
		2389	return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
		2390	DemandedDstElts, CostKind);
		2391	}
		2392	InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
		2393	unsigned AddressSpace,
		2394	TTI::TargetCostKind CostKind,
		2395	OperandValueInfo OpInfo,
		2396	const Instruction *I) override {
		2397	return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
		2398	OpInfo, I);
		2399	}
		2400	InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
		2401	unsigned AddressSpace,
		2402	TTI::TargetCostKind CostKind,
		2403	const Instruction *I) override {
		2404	return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
		2405	CostKind, I);
		2406	}
		2407	InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
		2408	Align Alignment, unsigned AddressSpace,
		2409	TTI::TargetCostKind CostKind) override {
		2410	return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
		2411	CostKind);
		2412	}
		2413	InstructionCost
		2414	getGatherScatterOpCost(unsigned Opcode, Type DataTy, const Value Ptr,
		2415	bool VariableMask, Align Alignment,
		2416	TTI::TargetCostKind CostKind,
		2417	const Instruction *I = nullptr) override {
		2418	return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
		2419	Alignment, CostKind, I);
		2420	}
		2421	InstructionCost getInterleavedMemoryOpCost(
		2422	unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
		2423	Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
		2424	bool UseMaskForCond, bool UseMaskForGaps) override {
		2425	return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
		2426	Alignment, AddressSpace, CostKind,
		2427	UseMaskForCond, UseMaskForGaps);
		2428	}
		2429	InstructionCost
		2430	getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
		2431	std::optional<FastMathFlags> FMF,
		2432	TTI::TargetCostKind CostKind) override {
		2433	return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
		2434	}
		2435	InstructionCost
		2436	getMinMaxReductionCost(VectorType Ty, VectorType CondTy, bool IsUnsigned,
		2437	TTI::TargetCostKind CostKind) override {
		2438	return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
		2439	}
		2440	InstructionCost getExtendedReductionCost(
		2441	unsigned Opcode, bool IsUnsigned, Type ResTy, VectorType Ty,
		2442	std::optional<FastMathFlags> FMF,
		2443	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) override {
		2444	return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
		2445	CostKind);
		2446	}
		2447	InstructionCost getMulAccReductionCost(
		2448	bool IsUnsigned, Type ResTy, VectorType Ty,
		2449	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) override {
		2450	return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
		2451	}
		2452	InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
		2453	TTI::TargetCostKind CostKind) override {
		2454	return Impl.getIntrinsicInstrCost(ICA, CostKind);
		2455	}
		2456	InstructionCost getCallInstrCost(Function F, Type RetTy,
		2457	ArrayRef<Type *> Tys,
		2458	TTI::TargetCostKind CostKind) override {
		2459	return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
		2460	}
		2461	unsigned getNumberOfParts(Type *Tp) override {
		2462	return Impl.getNumberOfParts(Tp);
		2463	}
		2464	InstructionCost getAddressComputationCost(Type Ty, ScalarEvolution SE,
		2465	const SCEV *Ptr) override {
		2466	return Impl.getAddressComputationCost(Ty, SE, Ptr);
		2467	}
		2468	InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
		2469	return Impl.getCostOfKeepingLiveOverCall(Tys);
		2470	}
		2471	bool getTgtMemIntrinsic(IntrinsicInst *Inst,
		2472	MemIntrinsicInfo &Info) override {
		2473	return Impl.getTgtMemIntrinsic(Inst, Info);
		2474	}
		2475	unsigned getAtomicMemIntrinsicMaxElementSize() const override {
		2476	return Impl.getAtomicMemIntrinsicMaxElementSize();
		2477	}
		2478	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
		2479	Type *ExpectedType) override {
		2480	return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
		2481	}
		2482	Type *getMemcpyLoopLoweringType(
		2483	LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
		2484	unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
		2485	std::optional<uint32_t> AtomicElementSize) const override {
		2486	return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
		2487	DestAddrSpace, SrcAlign, DestAlign,
		2488	AtomicElementSize);
		2489	}
		2490	void getMemcpyLoopResidualLoweringType(
		2491	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
		2492	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
		2493	unsigned SrcAlign, unsigned DestAlign,
		2494	std::optional<uint32_t> AtomicCpySize) const override {
		2495	Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
		2496	SrcAddrSpace, DestAddrSpace,
		2497	SrcAlign, DestAlign, AtomicCpySize);
		2498	}
		2499	bool areInlineCompatible(const Function *Caller,
		2500	const Function *Callee) const override {
		2501	return Impl.areInlineCompatible(Caller, Callee);
		2502	}
		2503	bool areTypesABICompatible(const Function Caller, const Function Callee,
		2504	const ArrayRef<Type *> &Types) const override {
		2505	return Impl.areTypesABICompatible(Caller, Callee, Types);
		2506	}
		2507	bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
		2508	return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
		2509	}
		2510	bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
		2511	return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
		2512	}
		2513	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
		2514	return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
		2515	}
		2516	bool isLegalToVectorizeLoad(LoadInst *LI) const override {
		2517	return Impl.isLegalToVectorizeLoad(LI);
		2518	}
		2519	bool isLegalToVectorizeStore(StoreInst *SI) const override {
		2520	return Impl.isLegalToVectorizeStore(SI);
		2521	}
		2522	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
		2523	unsigned AddrSpace) const override {
		2524	return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
		2525	AddrSpace);
		2526	}
		2527	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
		2528	unsigned AddrSpace) const override {
		2529	return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
		2530	AddrSpace);
		2531	}
		2532	bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
		2533	ElementCount VF) const override {
		2534	return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
		2535	}
		2536	bool isElementTypeLegalForScalableVector(Type *Ty) const override {
		2537	return Impl.isElementTypeLegalForScalableVector(Ty);
		2538	}
		2539	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
		2540	unsigned ChainSizeInBytes,
		2541	VectorType *VecTy) const override {
		2542	return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
		2543	}
		2544	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
		2545	unsigned ChainSizeInBytes,
		2546	VectorType *VecTy) const override {
		2547	return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
		2548	}
		2549	bool preferInLoopReduction(unsigned Opcode, Type *Ty,
		2550	ReductionFlags Flags) const override {
		2551	return Impl.preferInLoopReduction(Opcode, Ty, Flags);
		2552	}
		2553	bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
		2554	ReductionFlags Flags) const override {
		2555	return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
		2556	}
		2557	bool preferEpilogueVectorization() const override {
		2558	return Impl.preferEpilogueVectorization();
		2559	}
		2560
		2561	bool shouldExpandReduction(const IntrinsicInst *II) const override {
		2562	return Impl.shouldExpandReduction(II);
		2563	}
		2564
		2565	unsigned getGISelRematGlobalCost() const override {
		2566	return Impl.getGISelRematGlobalCost();
		2567	}
		2568
		2569	unsigned getMinTripCountTailFoldingThreshold() const override {
		2570	return Impl.getMinTripCountTailFoldingThreshold();
		2571	}
		2572
		2573	bool supportsScalableVectors() const override {
		2574	return Impl.supportsScalableVectors();
		2575	}
		2576
		2577	bool enableScalableVectorization() const override {
		2578	return Impl.enableScalableVectorization();
		2579	}
		2580
		2581	bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
		2582	Align Alignment) const override {
		2583	return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
		2584	}
		2585
		2586	VPLegalization
		2587	getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
		2588	return Impl.getVPLegalizationStrategy(PI);
		2589	}
		2590	};
		2591
		2592	template <typename T>
		2593	TargetTransformInfo::TargetTransformInfo(T Impl)
		2594	: TTIImpl(new Model<T>(Impl)) {}
		2595
		2596	/// Analysis pass providing the \c TargetTransformInfo.
		2597	///
		2598	/// The core idea of the TargetIRAnalysis is to expose an interface through
		2599	/// which LLVM targets can analyze and provide information about the middle
		2600	/// end's target-independent IR. This supports use cases such as target-aware
		2601	/// cost modeling of IR constructs.
		2602	///
		2603	/// This is a function analysis because much of the cost modeling for targets
		2604	/// is done in a subtarget specific way and LLVM supports compiling different
		2605	/// functions targeting different subtargets in order to support runtime
		2606	/// dispatch according to the observed subtarget.
		2607	class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
		2608	public:
		2609	typedef TargetTransformInfo Result;
		2610
		2611	/// Default construct a target IR analysis.
		2612	///
		2613	/// This will use the module's datalayout to construct a baseline
		2614	/// conservative TTI result.
		2615	TargetIRAnalysis();
		2616
		2617	/// Construct an IR analysis pass around a target-provide callback.
		2618	///
		2619	/// The callback will be called with a particular function for which the TTI
		2620	/// is needed and must return a TTI object for that function.
		2621	TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
		2622
		2623	// Value semantics. We spell out the constructors for MSVC.
		2624	TargetIRAnalysis(const TargetIRAnalysis &Arg)
		2625	: TTICallback(Arg.TTICallback) {}
		2626	TargetIRAnalysis(TargetIRAnalysis &&Arg)
		2627	: TTICallback(std::move(Arg.TTICallback)) {}
		2628	TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
		2629	TTICallback = RHS.TTICallback;
		2630	return *this;
		2631	}
		2632	TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
		2633	TTICallback = std::move(RHS.TTICallback);
		2634	return *this;
		2635	}
		2636
		2637	Result run(const Function &F, FunctionAnalysisManager &);
		2638
		2639	private:
		2640	friend AnalysisInfoMixin<TargetIRAnalysis>;
		2641	static AnalysisKey Key;
		2642
		2643	/// The callback used to produce a result.
		2644	///
		2645	/// We use a completely opaque callback so that targets can provide whatever
		2646	/// mechanism they desire for constructing the TTI for a given function.
		2647	///
		2648	/// FIXME: Should we really use std::function? It's relatively inefficient.
		2649	/// It might be possible to arrange for even stateful callbacks to outlive
		2650	/// the analysis and thus use a function_ref which would be lighter weight.
		2651	/// This may also be less error prone as the callback is likely to reference
		2652	/// the external TargetMachine, and that reference needs to never dangle.
		2653	std::function<Result(const Function &)> TTICallback;
		2654
		2655	/// Helper function used as the callback in the default constructor.
		2656	static Result getDefaultTTI(const Function &F);
		2657	};
		2658
		2659	/// Wrapper pass for TargetTransformInfo.
		2660	///
		2661	/// This pass can be constructed from a TTI object which it stores internally
		2662	/// and is queried by passes.
		2663	class TargetTransformInfoWrapperPass : public ImmutablePass {
		2664	TargetIRAnalysis TIRA;
		2665	std::optional<TargetTransformInfo> TTI;
		2666
		2667	virtual void anchor();
		2668
		2669	public:
		2670	static char ID;
		2671
		2672	/// We must provide a default constructor for the pass but it should
		2673	/// never be used.
		2674	///
		2675	/// Use the constructor below or call one of the creation routines.
		2676	TargetTransformInfoWrapperPass();
		2677
		2678	explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
		2679
		2680	TargetTransformInfo &getTTI(const Function &F);
		2681	};
		2682
		2683	/// Create an analysis pass wrapper around a TTI object.
		2684	///
		2685	/// This analysis pass just holds the TTI instance and makes it available to
		2686	/// clients.
		2687	ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
		2688
		2689	} // namespace llvm
		2690
		2691	#endif

Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

QNX 8.QNX8 LLVM/Clang compiler suite//llvm-build/x86_64/include/llvm/Analysis/TargetTransformInfo.h – Rev 14