WebSVN – QNX 8.QNX8 LLVM/Clang compiler suite – Blame – //llvm-build/x86_64/include/llvm/MCA/HardwareUnits/LSUnit.h

Rev	Author	Line No.	Line
14	pmbaty	1	//===------------------------- LSUnit.h --------------------------- C++--===//
		2	//
		3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
		4	// See https://llvm.org/LICENSE.txt for license information.
		5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
		6	//
		7	//===----------------------------------------------------------------------===//
		8	/// \file
		9	///
		10	/// A Load/Store unit class that models load/store queues and that implements
		11	/// a simple weak memory consistency model.
		12	///
		13	//===----------------------------------------------------------------------===//
		14
		15	#ifndef LLVM_MCA_HARDWAREUNITS_LSUNIT_H
		16	#define LLVM_MCA_HARDWAREUNITS_LSUNIT_H
		17
		18	#include "llvm/ADT/DenseMap.h"
		19	#include "llvm/ADT/SmallVector.h"
		20	#include "llvm/MC/MCSchedule.h"
		21	#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
		22	#include "llvm/MCA/Instruction.h"
		23
		24	namespace llvm {
		25	namespace mca {
		26
		27	/// A node of a memory dependency graph. A MemoryGroup describes a set of
		28	/// instructions with same memory dependencies.
		29	///
		30	/// By construction, instructions of a MemoryGroup don't depend on each other.
		31	/// At dispatch stage, instructions are mapped by the LSUnit to MemoryGroups.
		32	/// A Memory group identifier is then stored as a "token" in field
		33	/// Instruction::LSUTokenID of each dispatched instructions. That token is used
		34	/// internally by the LSUnit to track memory dependencies.
		35	class MemoryGroup {
		36	unsigned NumPredecessors;
		37	unsigned NumExecutingPredecessors;
		38	unsigned NumExecutedPredecessors;
		39
		40	unsigned NumInstructions;
		41	unsigned NumExecuting;
		42	unsigned NumExecuted;
		43	// Successors that are in a order dependency with this group.
		44	SmallVector<MemoryGroup *, 4> OrderSucc;
		45	// Successors that are in a data dependency with this group.
		46	SmallVector<MemoryGroup *, 4> DataSucc;
		47
		48	CriticalDependency CriticalPredecessor;
		49	InstRef CriticalMemoryInstruction;
		50
		51	MemoryGroup(const MemoryGroup &) = delete;
		52	MemoryGroup &operator=(const MemoryGroup &) = delete;
		53
		54	public:
		55	MemoryGroup()
		56	: NumPredecessors(0), NumExecutingPredecessors(0),
		57	NumExecutedPredecessors(0), NumInstructions(0), NumExecuting(0),
		58	NumExecuted(0), CriticalPredecessor() {}
		59	MemoryGroup(MemoryGroup &&) = default;
		60
		61	size_t getNumSuccessors() const {
		62	return OrderSucc.size() + DataSucc.size();
		63	}
		64	unsigned getNumPredecessors() const { return NumPredecessors; }
		65	unsigned getNumExecutingPredecessors() const {
		66	return NumExecutingPredecessors;
		67	}
		68	unsigned getNumExecutedPredecessors() const {
		69	return NumExecutedPredecessors;
		70	}
		71	unsigned getNumInstructions() const { return NumInstructions; }
		72	unsigned getNumExecuting() const { return NumExecuting; }
		73	unsigned getNumExecuted() const { return NumExecuted; }
		74
		75	const InstRef &getCriticalMemoryInstruction() const {
		76	return CriticalMemoryInstruction;
		77	}
		78	const CriticalDependency &getCriticalPredecessor() const {
		79	return CriticalPredecessor;
		80	}
		81
		82	void addSuccessor(MemoryGroup *Group, bool IsDataDependent) {
		83	// Do not need to add a dependency if there is no data
		84	// dependency and all instructions from this group have been
		85	// issued already.
		86	if (!IsDataDependent && isExecuting())
		87	return;
		88
		89	Group->NumPredecessors++;
		90	assert(!isExecuted() && "Should have been removed!");
		91	if (isExecuting())
		92	Group->onGroupIssued(CriticalMemoryInstruction, IsDataDependent);
		93
		94	if (IsDataDependent)
		95	DataSucc.emplace_back(Group);
		96	else
		97	OrderSucc.emplace_back(Group);
		98	}
		99
		100	bool isWaiting() const {
		101	return NumPredecessors >
		102	(NumExecutingPredecessors + NumExecutedPredecessors);
		103	}
		104	bool isPending() const {
		105	return NumExecutingPredecessors &&
		106	((NumExecutedPredecessors + NumExecutingPredecessors) ==
		107	NumPredecessors);
		108	}
		109	bool isReady() const { return NumExecutedPredecessors == NumPredecessors; }
		110	bool isExecuting() const {
		111	return NumExecuting && (NumExecuting == (NumInstructions - NumExecuted));
		112	}
		113	bool isExecuted() const { return NumInstructions == NumExecuted; }
		114
		115	void onGroupIssued(const InstRef &IR, bool ShouldUpdateCriticalDep) {
		116	assert(!isReady() && "Unexpected group-start event!");
		117	NumExecutingPredecessors++;
		118
		119	if (!ShouldUpdateCriticalDep)
		120	return;
		121
		122	unsigned Cycles = IR.getInstruction()->getCyclesLeft();
		123	if (CriticalPredecessor.Cycles < Cycles) {
		124	CriticalPredecessor.IID = IR.getSourceIndex();
		125	CriticalPredecessor.Cycles = Cycles;
		126	}
		127	}
		128
		129	void onGroupExecuted() {
		130	assert(!isReady() && "Inconsistent state found!");
		131	NumExecutingPredecessors--;
		132	NumExecutedPredecessors++;
		133	}
		134
		135	void onInstructionIssued(const InstRef &IR) {
		136	assert(!isExecuting() && "Invalid internal state!");
		137	++NumExecuting;
		138
		139	// update the CriticalMemDep.
		140	const Instruction &IS = *IR.getInstruction();
		141	if ((bool)CriticalMemoryInstruction) {
		142	const Instruction &OtherIS = *CriticalMemoryInstruction.getInstruction();
		143	if (OtherIS.getCyclesLeft() < IS.getCyclesLeft())
		144	CriticalMemoryInstruction = IR;
		145	} else {
		146	CriticalMemoryInstruction = IR;
		147	}
		148
		149	if (!isExecuting())
		150	return;
		151
		152	// Notify successors that this group started execution.
		153	for (MemoryGroup *MG : OrderSucc) {
		154	MG->onGroupIssued(CriticalMemoryInstruction, false);
		155	// Release the order dependency with this group.
		156	MG->onGroupExecuted();
		157	}
		158
		159	for (MemoryGroup *MG : DataSucc)
		160	MG->onGroupIssued(CriticalMemoryInstruction, true);
		161	}
		162
		163	void onInstructionExecuted(const InstRef &IR) {
		164	assert(isReady() && !isExecuted() && "Invalid internal state!");
		165	--NumExecuting;
		166	++NumExecuted;
		167
		168	if (CriticalMemoryInstruction &&
		169	CriticalMemoryInstruction.getSourceIndex() == IR.getSourceIndex()) {
		170	CriticalMemoryInstruction.invalidate();
		171	}
		172
		173	if (!isExecuted())
		174	return;
		175
		176	// Notify data dependent successors that this group has finished execution.
		177	for (MemoryGroup *MG : DataSucc)
		178	MG->onGroupExecuted();
		179	}
		180
		181	void addInstruction() {
		182	assert(!getNumSuccessors() && "Cannot add instructions to this group!");
		183	++NumInstructions;
		184	}
		185
		186	void cycleEvent() {
		187	if (isWaiting() && CriticalPredecessor.Cycles)
		188	CriticalPredecessor.Cycles--;
		189	}
		190	};
		191
		192	/// Abstract base interface for LS (load/store) units in llvm-mca.
		193	class LSUnitBase : public HardwareUnit {
		194	/// Load queue size.
		195	///
		196	/// A value of zero for this field means that the load queue is unbounded.
		197	/// Processor models can declare the size of a load queue via tablegen (see
		198	/// the definition of tablegen class LoadQueue in
		199	/// llvm/Target/TargetSchedule.td).
		200	unsigned LQSize;
		201
		202	/// Load queue size.
		203	///
		204	/// A value of zero for this field means that the store queue is unbounded.
		205	/// Processor models can declare the size of a store queue via tablegen (see
		206	/// the definition of tablegen class StoreQueue in
		207	/// llvm/Target/TargetSchedule.td).
		208	unsigned SQSize;
		209
		210	unsigned UsedLQEntries;
		211	unsigned UsedSQEntries;
		212
		213	/// True if loads don't alias with stores.
		214	///
		215	/// By default, the LS unit assumes that loads and stores don't alias with
		216	/// eachother. If this field is set to false, then loads are always assumed to
		217	/// alias with stores.
		218	const bool NoAlias;
		219
		220	/// Used to map group identifiers to MemoryGroups.
		221	DenseMap<unsigned, std::unique_ptr<MemoryGroup>> Groups;
		222	unsigned NextGroupID;
		223
		224	public:
		225	LSUnitBase(const MCSchedModel &SM, unsigned LoadQueueSize,
		226	unsigned StoreQueueSize, bool AssumeNoAlias);
		227
		228	virtual ~LSUnitBase();
		229
		230	/// Returns the total number of entries in the load queue.
		231	unsigned getLoadQueueSize() const { return LQSize; }
		232
		233	/// Returns the total number of entries in the store queue.
		234	unsigned getStoreQueueSize() const { return SQSize; }
		235
		236	unsigned getUsedLQEntries() const { return UsedLQEntries; }
		237	unsigned getUsedSQEntries() const { return UsedSQEntries; }
		238	void acquireLQSlot() { ++UsedLQEntries; }
		239	void acquireSQSlot() { ++UsedSQEntries; }
		240	void releaseLQSlot() { --UsedLQEntries; }
		241	void releaseSQSlot() { --UsedSQEntries; }
		242
		243	bool assumeNoAlias() const { return NoAlias; }
		244
		245	enum Status {
		246	LSU_AVAILABLE = 0,
		247	LSU_LQUEUE_FULL, // Load Queue unavailable
		248	LSU_SQUEUE_FULL // Store Queue unavailable
		249	};
		250
		251	/// This method checks the availability of the load/store buffers.
		252	///
		253	/// Returns LSU_AVAILABLE if there are enough load/store queue entries to
		254	/// accomodate instruction IR. By default, LSU_AVAILABLE is returned if IR is
		255	/// not a memory operation.
		256	virtual Status isAvailable(const InstRef &IR) const = 0;
		257
		258	/// Allocates LS resources for instruction IR.
		259	///
		260	/// This method assumes that a previous call to `isAvailable(IR)` succeeded
		261	/// with a LSUnitBase::Status value of LSU_AVAILABLE.
		262	/// Returns the GroupID associated with this instruction. That value will be
		263	/// used to set the LSUTokenID field in class Instruction.
		264	virtual unsigned dispatch(const InstRef &IR) = 0;
		265
		266	bool isSQEmpty() const { return !UsedSQEntries; }
		267	bool isLQEmpty() const { return !UsedLQEntries; }
		268	bool isSQFull() const { return SQSize && SQSize == UsedSQEntries; }
		269	bool isLQFull() const { return LQSize && LQSize == UsedLQEntries; }
		270
		271	bool isValidGroupID(unsigned Index) const {
		272	return Index && (Groups.find(Index) != Groups.end());
		273	}
		274
		275	/// Check if a peviously dispatched instruction IR is now ready for execution.
		276	bool isReady(const InstRef &IR) const {
		277	unsigned GroupID = IR.getInstruction()->getLSUTokenID();
		278	const MemoryGroup &Group = getGroup(GroupID);
		279	return Group.isReady();
		280	}
		281
		282	/// Check if instruction IR only depends on memory instructions that are
		283	/// currently executing.
		284	bool isPending(const InstRef &IR) const {
		285	unsigned GroupID = IR.getInstruction()->getLSUTokenID();
		286	const MemoryGroup &Group = getGroup(GroupID);
		287	return Group.isPending();
		288	}
		289
		290	/// Check if instruction IR is still waiting on memory operations, and the
		291	/// wait time is still unknown.
		292	bool isWaiting(const InstRef &IR) const {
		293	unsigned GroupID = IR.getInstruction()->getLSUTokenID();
		294	const MemoryGroup &Group = getGroup(GroupID);
		295	return Group.isWaiting();
		296	}
		297
		298	bool hasDependentUsers(const InstRef &IR) const {
		299	unsigned GroupID = IR.getInstruction()->getLSUTokenID();
		300	const MemoryGroup &Group = getGroup(GroupID);
		301	return !Group.isExecuted() && Group.getNumSuccessors();
		302	}
		303
		304	const MemoryGroup &getGroup(unsigned Index) const {
		305	assert(isValidGroupID(Index) && "Group doesn't exist!");
		306	return *Groups.find(Index)->second;
		307	}
		308
		309	MemoryGroup &getGroup(unsigned Index) {
		310	assert(isValidGroupID(Index) && "Group doesn't exist!");
		311	return *Groups.find(Index)->second;
		312	}
		313
		314	unsigned createMemoryGroup() {
		315	Groups.insert(
		316	std::make_pair(NextGroupID, std::make_unique<MemoryGroup>()));
		317	return NextGroupID++;
		318	}
		319
		320	virtual void onInstructionExecuted(const InstRef &IR);
		321
		322	// Loads are tracked by the LDQ (load queue) from dispatch until completion.
		323	// Stores are tracked by the STQ (store queue) from dispatch until commitment.
		324	// By default we conservatively assume that the LDQ receives a load at
		325	// dispatch. Loads leave the LDQ at retirement stage.
		326	virtual void onInstructionRetired(const InstRef &IR);
		327
		328	virtual void onInstructionIssued(const InstRef &IR) {
		329	unsigned GroupID = IR.getInstruction()->getLSUTokenID();
		330	Groups[GroupID]->onInstructionIssued(IR);
		331	}
		332
		333	virtual void cycleEvent();
		334
		335	#ifndef NDEBUG
		336	void dump() const;
		337	#endif
		338	};
		339
		340	/// Default Load/Store Unit (LS Unit) for simulated processors.
		341	///
		342	/// Each load (or store) consumes one entry in the load (or store) queue.
		343	///
		344	/// Rules are:
		345	/// 1) A younger load is allowed to pass an older load only if there are no
		346	/// stores nor barriers in between the two loads.
		347	/// 2) An younger store is not allowed to pass an older store.
		348	/// 3) A younger store is not allowed to pass an older load.
		349	/// 4) A younger load is allowed to pass an older store only if the load does
		350	/// not alias with the store.
		351	///
		352	/// This class optimistically assumes that loads don't alias store operations.
		353	/// Under this assumption, younger loads are always allowed to pass older
		354	/// stores (this would only affects rule 4).
		355	/// Essentially, this class doesn't perform any sort alias analysis to
		356	/// identify aliasing loads and stores.
		357	///
		358	/// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be
		359	/// set to `false` by the constructor of LSUnit.
		360	///
		361	/// Note that this class doesn't know about the existence of different memory
		362	/// types for memory operations (example: write-through, write-combining, etc.).
		363	/// Derived classes are responsible for implementing that extra knowledge, and
		364	/// provide different sets of rules for loads and stores by overriding method
		365	/// `isReady()`.
		366	/// To emulate a write-combining memory type, rule 2. must be relaxed in a
		367	/// derived class to enable the reordering of non-aliasing store operations.
		368	///
		369	/// No assumptions are made by this class on the size of the store buffer. This
		370	/// class doesn't know how to identify cases where store-to-load forwarding may
		371	/// occur.
		372	///
		373	/// LSUnit doesn't attempt to predict whether a load or store hits or misses
		374	/// the L1 cache. To be more specific, LSUnit doesn't know anything about
		375	/// cache hierarchy and memory types.
		376	/// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the
		377	/// scheduling model provides an "optimistic" load-to-use latency (which usually
		378	/// matches the load-to-use latency for when there is a hit in the L1D).
		379	/// Derived classes may expand this knowledge.
		380	///
		381	/// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor
		382	/// memory-barrier like instructions.
		383	/// LSUnit conservatively assumes that an instruction which `mayLoad` and has
		384	/// `unmodeled side effects` behave like a "soft" load-barrier. That means, it
		385	/// serializes loads without forcing a flush of the load queue.
		386	/// Similarly, instructions that both `mayStore` and have `unmodeled side
		387	/// effects` are treated like store barriers. A full memory
		388	/// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side
		389	/// effects. This is obviously inaccurate, but this is the best that we can do
		390	/// at the moment.
		391	///
		392	/// Each load/store barrier consumes one entry in the load/store queue. A
		393	/// load/store barrier enforces ordering of loads/stores:
		394	/// - A younger load cannot pass a load barrier.
		395	/// - A younger store cannot pass a store barrier.
		396	///
		397	/// A younger load has to wait for the memory load barrier to execute.
		398	/// A load/store barrier is "executed" when it becomes the oldest entry in
		399	/// the load/store queue(s). That also means, all the older loads/stores have
		400	/// already been executed.
		401	class LSUnit : public LSUnitBase {
		402	// This class doesn't know about the latency of a load instruction. So, it
		403	// conservatively/pessimistically assumes that the latency of a load opcode
		404	// matches the instruction latency.
		405	//
		406	// FIXME: In the absence of cache misses (i.e. L1I/L1D/iTLB/dTLB hits/misses),
		407	// and load/store conflicts, the latency of a load is determined by the depth
		408	// of the load pipeline. So, we could use field `LoadLatency` in the
		409	// MCSchedModel to model that latency.
		410	// Field `LoadLatency` often matches the so-called 'load-to-use' latency from
		411	// L1D, and it usually already accounts for any extra latency due to data
		412	// forwarding.
		413	// When doing throughput analysis, `LoadLatency` is likely to
		414	// be a better predictor of load latency than instruction latency. This is
		415	// particularly true when simulating code with temporal/spatial locality of
		416	// memory accesses.
		417	// Using `LoadLatency` (instead of the instruction latency) is also expected
		418	// to improve the load queue allocation for long latency instructions with
		419	// folded memory operands (See PR39829).
		420	//
		421	// FIXME: On some processors, load/store operations are split into multiple
		422	// uOps. For example, X86 AMD Jaguar natively supports 128-bit data types, but
		423	// not 256-bit data types. So, a 256-bit load is effectively split into two
		424	// 128-bit loads, and each split load consumes one 'LoadQueue' entry. For
		425	// simplicity, this class optimistically assumes that a load instruction only
		426	// consumes one entry in the LoadQueue. Similarly, store instructions only
		427	// consume a single entry in the StoreQueue.
		428	// In future, we should reassess the quality of this design, and consider
		429	// alternative approaches that let instructions specify the number of
		430	// load/store queue entries which they consume at dispatch stage (See
		431	// PR39830).
		432	//
		433	// An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
		434	// conservatively treated as a store barrier. It forces older store to be
		435	// executed before newer stores are issued.
		436	//
		437	// An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
		438	// conservatively treated as a load barrier. It forces older loads to execute
		439	// before newer loads are issued.
		440	unsigned CurrentLoadGroupID;
		441	unsigned CurrentLoadBarrierGroupID;
		442	unsigned CurrentStoreGroupID;
		443	unsigned CurrentStoreBarrierGroupID;
		444
		445	public:
		446	LSUnit(const MCSchedModel &SM)
		447	: LSUnit(SM, /* LQSize / 0, / SQSize / 0, / NoAlias */ false) {}
		448	LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ)
		449	: LSUnit(SM, LQ, SQ, /* NoAlias */ false) {}
		450	LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias)
		451	: LSUnitBase(SM, LQ, SQ, AssumeNoAlias), CurrentLoadGroupID(0),
		452	CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0),
		453	CurrentStoreBarrierGroupID(0) {}
		454
		455	/// Returns LSU_AVAILABLE if there are enough load/store queue entries to
		456	/// accomodate instruction IR.
		457	Status isAvailable(const InstRef &IR) const override;
		458
		459	/// Allocates LS resources for instruction IR.
		460	///
		461	/// This method assumes that a previous call to `isAvailable(IR)` succeeded
		462	/// returning LSU_AVAILABLE.
		463	///
		464	/// Rules are:
		465	/// By default, rules are:
		466	/// 1. A store may not pass a previous store.
		467	/// 2. A load may not pass a previous store unless flag 'NoAlias' is set.
		468	/// 3. A load may pass a previous load.
		469	/// 4. A store may not pass a previous load (regardless of flag 'NoAlias').
		470	/// 5. A load has to wait until an older load barrier is fully executed.
		471	/// 6. A store has to wait until an older store barrier is fully executed.
		472	unsigned dispatch(const InstRef &IR) override;
		473
		474	void onInstructionExecuted(const InstRef &IR) override;
		475	};
		476
		477	} // namespace mca
		478	} // namespace llvm
		479
		480	#endif // LLVM_MCA_HARDWAREUNITS_LSUNIT_H

Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

QNX 8.QNX8 LLVM/Clang compiler suite//llvm-build/x86_64/include/llvm/MCA/HardwareUnits/LSUnit.h – Rev 14