WebSVN – QNX 8.QNX8 LLVM/Clang compiler suite – Blame – /llvm-build/x86_64/include/llvm/CodeGen/ModuloSchedule.h

Rev	Author	Line No.	Line
14	pmbaty	1	//===- ModuloSchedule.h - Software pipeline schedule expansion ------------===//
		2	//
		3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
		4	// See https://llvm.org/LICENSE.txt for license information.
		5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
		6	//
		7	//===----------------------------------------------------------------------===//
		8	//
		9	// Software pipelining (SWP) is an instruction scheduling technique for loops
		10	// that overlaps loop iterations and exploits ILP via compiler transformations.
		11	//
		12	// There are multiple methods for analyzing a loop and creating a schedule.
		13	// An example algorithm is Swing Modulo Scheduling (implemented by the
		14	// MachinePipeliner). The details of how a schedule is arrived at are irrelevant
		15	// for the task of actually rewriting a loop to adhere to the schedule, which
		16	// is what this file does.
		17	//
		18	// A schedule is, for every instruction in a block, a Cycle and a Stage. Note
		19	// that we only support single-block loops, so "block" and "loop" can be used
		20	// interchangably.
		21	//
		22	// The Cycle of an instruction defines a partial order of the instructions in
		23	// the remapped loop. Instructions within a cycle must not consume the output
		24	// of any instruction in the same cycle. Cycle information is assumed to have
		25	// been calculated such that the processor will execute instructions in
		26	// lock-step (for example in a VLIW ISA).
		27	//
		28	// The Stage of an instruction defines the mapping between logical loop
		29	// iterations and pipelined loop iterations. An example (unrolled) pipeline
		30	// may look something like:
		31	//
		32	// I0[0] Execute instruction I0 of iteration 0
		33	// I1[0], I0[1] Execute I0 of iteration 1 and I1 of iteration 1
		34	// I1[1], I0[2]
		35	// I1[2], I0[3]
		36	//
		37	// In the schedule for this unrolled sequence we would say that I0 was scheduled
		38	// in stage 0 and I1 in stage 1:
		39	//
		40	// loop:
		41	// [stage 0] x = I0
		42	// [stage 1] I1 x (from stage 0)
		43	//
		44	// And to actually generate valid code we must insert a phi:
		45	//
		46	// loop:
		47	// x' = phi(x)
		48	// x = I0
		49	// I1 x'
		50	//
		51	// This is a simple example; the rules for how to generate correct code given
		52	// an arbitrary schedule containing loop-carried values are complex.
		53	//
		54	// Note that these examples only mention the steady-state kernel of the
		55	// generated loop; prologs and epilogs must be generated also that prime and
		56	// flush the pipeline. Doing so is nontrivial.
		57	//
		58	//===----------------------------------------------------------------------===//
		59
		60	#ifndef LLVM_CODEGEN_MODULOSCHEDULE_H
		61	#define LLVM_CODEGEN_MODULOSCHEDULE_H
		62
		63	#include "llvm/CodeGen/MachineFunction.h"
		64	#include "llvm/CodeGen/MachineLoopUtils.h"
		65	#include "llvm/CodeGen/TargetInstrInfo.h"
		66	#include "llvm/CodeGen/TargetSubtargetInfo.h"
		67	#include <deque>
		68	#include <vector>
		69
		70	namespace llvm {
		71	class MachineBasicBlock;
		72	class MachineLoop;
		73	class MachineRegisterInfo;
		74	class MachineInstr;
		75	class LiveIntervals;
		76
		77	/// Represents a schedule for a single-block loop. For every instruction we
		78	/// maintain a Cycle and Stage.
		79	class ModuloSchedule {
		80	private:
		81	/// The block containing the loop instructions.
		82	MachineLoop *Loop;
		83
		84	/// The instructions to be generated, in total order. Cycle provides a partial
		85	/// order; the total order within cycles has been decided by the schedule
		86	/// producer.
		87	std::vector<MachineInstr *> ScheduledInstrs;
		88
		89	/// The cycle for each instruction.
		90	DenseMap<MachineInstr *, int> Cycle;
		91
		92	/// The stage for each instruction.
		93	DenseMap<MachineInstr *, int> Stage;
		94
		95	/// The number of stages in this schedule (Max(Stage) + 1).
		96	int NumStages;
		97
		98	public:
		99	/// Create a new ModuloSchedule.
		100	/// \arg ScheduledInstrs The new loop instructions, in total resequenced
		101	/// order.
		102	/// \arg Cycle Cycle index for all instructions in ScheduledInstrs. Cycle does
		103	/// not need to start at zero. ScheduledInstrs must be partially ordered by
		104	/// Cycle.
		105	/// \arg Stage Stage index for all instructions in ScheduleInstrs.
		106	ModuloSchedule(MachineFunction &MF, MachineLoop *Loop,
		107	std::vector<MachineInstr *> ScheduledInstrs,
		108	DenseMap<MachineInstr *, int> Cycle,
		109	DenseMap<MachineInstr *, int> Stage)
		110	: Loop(Loop), ScheduledInstrs(ScheduledInstrs), Cycle(std::move(Cycle)),
		111	Stage(std::move(Stage)) {
		112	NumStages = 0;
		113	for (auto &KV : this->Stage)
		114	NumStages = std::max(NumStages, KV.second);
		115	++NumStages;
		116	}
		117
		118	/// Return the single-block loop being scheduled.
		119	MachineLoop *getLoop() const { return Loop; }
		120
		121	/// Return the number of stages contained in this schedule, which is the
		122	/// largest stage index + 1.
		123	int getNumStages() const { return NumStages; }
		124
		125	/// Return the first cycle in the schedule, which is the cycle index of the
		126	/// first instruction.
		127	int getFirstCycle() { return Cycle[ScheduledInstrs.front()]; }
		128
		129	/// Return the final cycle in the schedule, which is the cycle index of the
		130	/// last instruction.
		131	int getFinalCycle() { return Cycle[ScheduledInstrs.back()]; }
		132
		133	/// Return the stage that MI is scheduled in, or -1.
		134	int getStage(MachineInstr *MI) {
		135	auto I = Stage.find(MI);
		136	return I == Stage.end() ? -1 : I->second;
		137	}
		138
		139	/// Return the cycle that MI is scheduled at, or -1.
		140	int getCycle(MachineInstr *MI) {
		141	auto I = Cycle.find(MI);
		142	return I == Cycle.end() ? -1 : I->second;
		143	}
		144
		145	/// Set the stage of a newly created instruction.
		146	void setStage(MachineInstr *MI, int MIStage) {
		147	assert(Stage.count(MI) == 0);
		148	Stage[MI] = MIStage;
		149	}
		150
		151	/// Return the rescheduled instructions in order.
		152	ArrayRef<MachineInstr *> getInstructions() { return ScheduledInstrs; }
		153
		154	void dump() { print(dbgs()); }
		155	void print(raw_ostream &OS);
		156	};
		157
		158	/// The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place,
		159	/// rewriting the old loop and inserting prologs and epilogs as required.
		160	class ModuloScheduleExpander {
		161	public:
		162	using InstrChangesTy = DenseMap<MachineInstr *, std::pair<unsigned, int64_t>>;
		163
		164	private:
		165	using ValueMapTy = DenseMap<unsigned, unsigned>;
		166	using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
		167	using InstrMapTy = DenseMap<MachineInstr , MachineInstr >;
		168
		169	ModuloSchedule &Schedule;
		170	MachineFunction &MF;
		171	const TargetSubtargetInfo &ST;
		172	MachineRegisterInfo &MRI;
		173	const TargetInstrInfo *TII;
		174	LiveIntervals &LIS;
		175
		176	MachineBasicBlock *BB;
		177	MachineBasicBlock *Preheader;
		178	MachineBasicBlock *NewKernel = nullptr;
		179	std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
		180
		181	/// Map for each register and the max difference between its uses and def.
		182	/// The first element in the pair is the max difference in stages. The
		183	/// second is true if the register defines a Phi value and loop value is
		184	/// scheduled before the Phi.
		185	std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
		186
		187	/// Instructions to change when emitting the final schedule.
		188	InstrChangesTy InstrChanges;
		189
		190	void generatePipelinedLoop();
		191	void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
		192	ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
		193	void generateEpilog(unsigned LastStage, MachineBasicBlock *KernelBB,
		194	MachineBasicBlock OrigBB, ValueMapTy VRMap,
		195	ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
		196	MBBVectorTy &PrologBBs);
		197	void generateExistingPhis(MachineBasicBlock NewBB, MachineBasicBlock BB1,
		198	MachineBasicBlock BB2, MachineBasicBlock KernelBB,
		199	ValueMapTy *VRMap, InstrMapTy &InstrMap,
		200	unsigned LastStageNum, unsigned CurStageNum,
		201	bool IsLast);
		202	void generatePhis(MachineBasicBlock NewBB, MachineBasicBlock BB1,
		203	MachineBasicBlock BB2, MachineBasicBlock KernelBB,
		204	ValueMapTy VRMap, ValueMapTy VRMapPhi,
		205	InstrMapTy &InstrMap, unsigned LastStageNum,
		206	unsigned CurStageNum, bool IsLast);
		207	void removeDeadInstructions(MachineBasicBlock *KernelBB,
		208	MBBVectorTy &EpilogBBs);
		209	void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs);
		210	void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
		211	MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
		212	ValueMapTy *VRMap);
		213	bool computeDelta(MachineInstr &MI, unsigned &Delta);
		214	void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
		215	unsigned Num);
		216	MachineInstr cloneInstr(MachineInstr OldMI, unsigned CurStageNum,
		217	unsigned InstStageNum);
		218	MachineInstr cloneAndChangeInstr(MachineInstr OldMI, unsigned CurStageNum,
		219	unsigned InstStageNum);
		220	void updateInstruction(MachineInstr *NewMI, bool LastDef,
		221	unsigned CurStageNum, unsigned InstrStageNum,
		222	ValueMapTy *VRMap);
		223	MachineInstr *findDefInLoop(unsigned Reg);
		224	unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
		225	unsigned LoopStage, ValueMapTy *VRMap,
		226	MachineBasicBlock *BB);
		227	void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
		228	ValueMapTy *VRMap, InstrMapTy &InstrMap);
		229	void rewriteScheduledInstr(MachineBasicBlock *BB, InstrMapTy &InstrMap,
		230	unsigned CurStageNum, unsigned PhiNum,
		231	MachineInstr *Phi, unsigned OldReg,
		232	unsigned NewReg, unsigned PrevReg = 0);
		233	bool isLoopCarried(MachineInstr &Phi);
		234
		235	/// Return the max. number of stages/iterations that can occur between a
		236	/// register definition and its uses.
		237	unsigned getStagesForReg(int Reg, unsigned CurStage) {
		238	std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
		239	if ((int)CurStage > Schedule.getNumStages() - 1 && Stages.first == 0 &&
		240	Stages.second)
		241	return 1;
		242	return Stages.first;
		243	}
		244
		245	/// The number of stages for a Phi is a little different than other
		246	/// instructions. The minimum value computed in RegToStageDiff is 1
		247	/// because we assume the Phi is needed for at least 1 iteration.
		248	/// This is not the case if the loop value is scheduled prior to the
		249	/// Phi in the same stage. This function returns the number of stages
		250	/// or iterations needed between the Phi definition and any uses.
		251	unsigned getStagesForPhi(int Reg) {
		252	std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
		253	if (Stages.second)
		254	return Stages.first;
		255	return Stages.first - 1;
		256	}
		257
		258	public:
		259	/// Create a new ModuloScheduleExpander.
		260	/// \arg InstrChanges Modifications to make to instructions with memory
		261	/// operands.
		262	/// FIXME: InstrChanges is opaque and is an implementation detail of an
		263	/// optimization in MachinePipeliner that crosses abstraction boundaries.
		264	ModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S,
		265	LiveIntervals &LIS, InstrChangesTy InstrChanges)
		266	: Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
		267	TII(ST.getInstrInfo()), LIS(LIS),
		268	InstrChanges(std::move(InstrChanges)) {}
		269
		270	/// Performs the actual expansion.
		271	void expand();
		272	/// Performs final cleanup after expansion.
		273	void cleanup();
		274
		275	/// Returns the newly rewritten kernel block, or nullptr if this was
		276	/// optimized away.
		277	MachineBasicBlock *getRewrittenKernel() { return NewKernel; }
		278	};
		279
		280	/// A reimplementation of ModuloScheduleExpander. It works by generating a
		281	/// standalone kernel loop and peeling out the prologs and epilogs.
		282	class PeelingModuloScheduleExpander {
		283	public:
		284	PeelingModuloScheduleExpander(MachineFunction &MF, ModuloSchedule &S,
		285	LiveIntervals *LIS)
		286	: Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
		287	TII(ST.getInstrInfo()), LIS(LIS) {}
		288
		289	void expand();
		290
		291	/// Runs ModuloScheduleExpander and treats it as a golden input to validate
		292	/// aspects of the code generated by PeelingModuloScheduleExpander.
		293	void validateAgainstModuloScheduleExpander();
		294
		295	protected:
		296	ModuloSchedule &Schedule;
		297	MachineFunction &MF;
		298	const TargetSubtargetInfo &ST;
		299	MachineRegisterInfo &MRI;
		300	const TargetInstrInfo *TII;
		301	LiveIntervals *LIS;
		302
		303	/// The original loop block that gets rewritten in-place.
		304	MachineBasicBlock *BB;
		305	/// The original loop preheader.
		306	MachineBasicBlock *Preheader;
		307	/// All prolog and epilog blocks.
		308	SmallVector<MachineBasicBlock *, 4> Prologs, Epilogs;
		309	/// For every block, the stages that are produced.
		310	DenseMap<MachineBasicBlock *, BitVector> LiveStages;
		311	/// For every block, the stages that are available. A stage can be available
		312	/// but not produced (in the epilog) or produced but not available (in the
		313	/// prolog).
		314	DenseMap<MachineBasicBlock *, BitVector> AvailableStages;
		315	/// When peeling the epilogue keep track of the distance between the phi
		316	/// nodes and the kernel.
		317	DenseMap<MachineInstr *, unsigned> PhiNodeLoopIteration;
		318
		319	/// CanonicalMIs and BlockMIs form a bidirectional map between any of the
		320	/// loop kernel clones.
		321	DenseMap<MachineInstr , MachineInstr > CanonicalMIs;
		322	DenseMap<std::pair<MachineBasicBlock , MachineInstr >, MachineInstr *>
		323	BlockMIs;
		324
		325	/// State passed from peelKernel to peelPrologAndEpilogs().
		326	std::deque<MachineBasicBlock *> PeeledFront, PeeledBack;
		327	/// Illegal phis that need to be deleted once we re-link stages.
		328	SmallVector<MachineInstr *, 4> IllegalPhisToDelete;
		329
		330	/// Converts BB from the original loop body to the rewritten, pipelined
		331	/// steady-state.
		332	void rewriteKernel();
		333
		334	/// Peels one iteration of the rewritten kernel (BB) in the specified
		335	/// direction.
		336	MachineBasicBlock *peelKernel(LoopPeelDirection LPD);
		337	// Delete instructions whose stage is less than MinStage in the given basic
		338	// block.
		339	void filterInstructions(MachineBasicBlock *MB, int MinStage);
		340	// Move instructions of the given stage from sourceBB to DestBB. Remap the phi
		341	// instructions to keep a valid IR.
		342	void moveStageBetweenBlocks(MachineBasicBlock *DestBB,
		343	MachineBasicBlock *SourceBB, unsigned Stage);
		344	/// Peel the kernel forwards and backwards to produce prologs and epilogs,
		345	/// and stitch them together.
		346	void peelPrologAndEpilogs();
		347	/// All prolog and epilog blocks are clones of the kernel, so any produced
		348	/// register in one block has an corollary in all other blocks.
		349	Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB);
		350	/// Change all users of MI, if MI is predicated out
		351	/// (LiveStages[MI->getParent()] == false).
		352	void rewriteUsesOf(MachineInstr *MI);
		353	/// Insert branches between prologs, kernel and epilogs.
		354	void fixupBranches();
		355	/// Create a poor-man's LCSSA by cloning only the PHIs from the kernel block
		356	/// to a block dominated by all prologs and epilogs. This allows us to treat
		357	/// the loop exiting block as any other kernel clone.
		358	MachineBasicBlock *CreateLCSSAExitingBlock();
		359	/// Helper to get the stage of an instruction in the schedule.
		360	unsigned getStage(MachineInstr *MI) {
		361	if (CanonicalMIs.count(MI))
		362	MI = CanonicalMIs[MI];
		363	return Schedule.getStage(MI);
		364	}
		365	/// Helper function to find the right canonical register for a phi instruction
		366	/// coming from a peeled out prologue.
		367	Register getPhiCanonicalReg(MachineInstr* CanonicalPhi, MachineInstr* Phi);
		368	/// Target loop info before kernel peeling.
		369	std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
		370	};
		371
		372	/// Expander that simply annotates each scheduled instruction with a post-instr
		373	/// symbol that can be consumed by the ModuloScheduleTest pass.
		374	///
		375	/// The post-instr symbol is a way of annotating an instruction that can be
		376	/// roundtripped in MIR. The syntax is:
		377	/// MYINST %0, post-instr-symbol <mcsymbol Stage-1_Cycle-5>
		378	class ModuloScheduleTestAnnotater {
		379	MachineFunction &MF;
		380	ModuloSchedule &S;
		381
		382	public:
		383	ModuloScheduleTestAnnotater(MachineFunction &MF, ModuloSchedule &S)
		384	: MF(MF), S(S) {}
		385
		386	/// Performs the annotation.
		387	void annotate();
		388	};
		389
		390	} // end namespace llvm
		391
		392	#endif // LLVM_CODEGEN_MODULOSCHEDULE_H

Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

QNX 8.QNX8 LLVM/Clang compiler suite/llvm-build/x86_64/include/llvm/CodeGen/ModuloSchedule.h – Rev 14