Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h -----*- C++ -*-// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // This file contains some helper functions which try to cleanup artifacts |
||
9 | // such as G_TRUNCs/G_[ZSA]EXTENDS that were created during legalization to make |
||
10 | // the types match. This file also contains some combines of merges that happens |
||
11 | // at the end of the legalization. |
||
12 | //===----------------------------------------------------------------------===// |
||
13 | |||
14 | #ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H |
||
15 | #define LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H |
||
16 | |||
17 | #include "llvm/ADT/SmallBitVector.h" |
||
18 | #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" |
||
19 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
||
20 | #include "llvm/CodeGen/GlobalISel/Legalizer.h" |
||
21 | #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" |
||
22 | #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" |
||
23 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
||
24 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
||
25 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
||
26 | #include "llvm/CodeGen/Register.h" |
||
27 | #include "llvm/IR/Constants.h" |
||
28 | #include "llvm/Support/Debug.h" |
||
29 | |||
30 | #define DEBUG_TYPE "legalizer" |
||
31 | |||
32 | namespace llvm { |
||
33 | class LegalizationArtifactCombiner { |
||
34 | MachineIRBuilder &Builder; |
||
35 | MachineRegisterInfo &MRI; |
||
36 | const LegalizerInfo &LI; |
||
37 | |||
38 | static bool isArtifactCast(unsigned Opc) { |
||
39 | switch (Opc) { |
||
40 | case TargetOpcode::G_TRUNC: |
||
41 | case TargetOpcode::G_SEXT: |
||
42 | case TargetOpcode::G_ZEXT: |
||
43 | case TargetOpcode::G_ANYEXT: |
||
44 | return true; |
||
45 | default: |
||
46 | return false; |
||
47 | } |
||
48 | } |
||
49 | |||
50 | public: |
||
51 | LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI, |
||
52 | const LegalizerInfo &LI) |
||
53 | : Builder(B), MRI(MRI), LI(LI) {} |
||
54 | |||
55 | bool tryCombineAnyExt(MachineInstr &MI, |
||
56 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
57 | SmallVectorImpl<Register> &UpdatedDefs, |
||
58 | GISelObserverWrapper &Observer) { |
||
59 | using namespace llvm::MIPatternMatch; |
||
60 | assert(MI.getOpcode() == TargetOpcode::G_ANYEXT); |
||
61 | |||
62 | Builder.setInstrAndDebugLoc(MI); |
||
63 | Register DstReg = MI.getOperand(0).getReg(); |
||
64 | Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); |
||
65 | |||
66 | // aext(trunc x) - > aext/copy/trunc x |
||
67 | Register TruncSrc; |
||
68 | if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { |
||
69 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); |
||
70 | if (MRI.getType(DstReg) == MRI.getType(TruncSrc)) |
||
71 | replaceRegOrBuildCopy(DstReg, TruncSrc, MRI, Builder, UpdatedDefs, |
||
72 | Observer); |
||
73 | else |
||
74 | Builder.buildAnyExtOrTrunc(DstReg, TruncSrc); |
||
75 | UpdatedDefs.push_back(DstReg); |
||
76 | markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); |
||
77 | return true; |
||
78 | } |
||
79 | |||
80 | // aext([asz]ext x) -> [asz]ext x |
||
81 | Register ExtSrc; |
||
82 | MachineInstr *ExtMI; |
||
83 | if (mi_match(SrcReg, MRI, |
||
84 | m_all_of(m_MInstr(ExtMI), m_any_of(m_GAnyExt(m_Reg(ExtSrc)), |
||
85 | m_GSExt(m_Reg(ExtSrc)), |
||
86 | m_GZExt(m_Reg(ExtSrc)))))) { |
||
87 | Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc}); |
||
88 | UpdatedDefs.push_back(DstReg); |
||
89 | markInstAndDefDead(MI, *ExtMI, DeadInsts); |
||
90 | return true; |
||
91 | } |
||
92 | |||
93 | // Try to fold aext(g_constant) when the larger constant type is legal. |
||
94 | auto *SrcMI = MRI.getVRegDef(SrcReg); |
||
95 | if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
||
96 | const LLT DstTy = MRI.getType(DstReg); |
||
97 | if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { |
||
98 | auto &CstVal = SrcMI->getOperand(1); |
||
99 | Builder.buildConstant( |
||
100 | DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits())); |
||
101 | UpdatedDefs.push_back(DstReg); |
||
102 | markInstAndDefDead(MI, *SrcMI, DeadInsts); |
||
103 | return true; |
||
104 | } |
||
105 | } |
||
106 | return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); |
||
107 | } |
||
108 | |||
109 | bool tryCombineZExt(MachineInstr &MI, |
||
110 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
111 | SmallVectorImpl<Register> &UpdatedDefs, |
||
112 | GISelObserverWrapper &Observer) { |
||
113 | using namespace llvm::MIPatternMatch; |
||
114 | assert(MI.getOpcode() == TargetOpcode::G_ZEXT); |
||
115 | |||
116 | Builder.setInstrAndDebugLoc(MI); |
||
117 | Register DstReg = MI.getOperand(0).getReg(); |
||
118 | Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); |
||
119 | |||
120 | // zext(trunc x) - > and (aext/copy/trunc x), mask |
||
121 | // zext(sext x) -> and (sext x), mask |
||
122 | Register TruncSrc; |
||
123 | Register SextSrc; |
||
124 | if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))) || |
||
125 | mi_match(SrcReg, MRI, m_GSExt(m_Reg(SextSrc)))) { |
||
126 | LLT DstTy = MRI.getType(DstReg); |
||
127 | if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) || |
||
128 | isConstantUnsupported(DstTy)) |
||
129 | return false; |
||
130 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); |
||
131 | LLT SrcTy = MRI.getType(SrcReg); |
||
132 | APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits()); |
||
133 | auto Mask = Builder.buildConstant( |
||
134 | DstTy, MaskVal.zext(DstTy.getScalarSizeInBits())); |
||
135 | if (SextSrc && (DstTy != MRI.getType(SextSrc))) |
||
136 | SextSrc = Builder.buildSExtOrTrunc(DstTy, SextSrc).getReg(0); |
||
137 | if (TruncSrc && (DstTy != MRI.getType(TruncSrc))) |
||
138 | TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0); |
||
139 | Builder.buildAnd(DstReg, SextSrc ? SextSrc : TruncSrc, Mask); |
||
140 | markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); |
||
141 | return true; |
||
142 | } |
||
143 | |||
144 | // zext(zext x) -> (zext x) |
||
145 | Register ZextSrc; |
||
146 | if (mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZextSrc)))) { |
||
147 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); |
||
148 | Observer.changingInstr(MI); |
||
149 | MI.getOperand(1).setReg(ZextSrc); |
||
150 | Observer.changedInstr(MI); |
||
151 | UpdatedDefs.push_back(DstReg); |
||
152 | markDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); |
||
153 | return true; |
||
154 | } |
||
155 | |||
156 | // Try to fold zext(g_constant) when the larger constant type is legal. |
||
157 | auto *SrcMI = MRI.getVRegDef(SrcReg); |
||
158 | if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
||
159 | const LLT DstTy = MRI.getType(DstReg); |
||
160 | if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { |
||
161 | auto &CstVal = SrcMI->getOperand(1); |
||
162 | Builder.buildConstant( |
||
163 | DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits())); |
||
164 | UpdatedDefs.push_back(DstReg); |
||
165 | markInstAndDefDead(MI, *SrcMI, DeadInsts); |
||
166 | return true; |
||
167 | } |
||
168 | } |
||
169 | return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); |
||
170 | } |
||
171 | |||
172 | bool tryCombineSExt(MachineInstr &MI, |
||
173 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
174 | SmallVectorImpl<Register> &UpdatedDefs) { |
||
175 | using namespace llvm::MIPatternMatch; |
||
176 | assert(MI.getOpcode() == TargetOpcode::G_SEXT); |
||
177 | |||
178 | Builder.setInstrAndDebugLoc(MI); |
||
179 | Register DstReg = MI.getOperand(0).getReg(); |
||
180 | Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); |
||
181 | |||
182 | // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c) |
||
183 | Register TruncSrc; |
||
184 | if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { |
||
185 | LLT DstTy = MRI.getType(DstReg); |
||
186 | if (isInstUnsupported({TargetOpcode::G_SEXT_INREG, {DstTy}})) |
||
187 | return false; |
||
188 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); |
||
189 | LLT SrcTy = MRI.getType(SrcReg); |
||
190 | uint64_t SizeInBits = SrcTy.getScalarSizeInBits(); |
||
191 | if (DstTy != MRI.getType(TruncSrc)) |
||
192 | TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0); |
||
193 | Builder.buildSExtInReg(DstReg, TruncSrc, SizeInBits); |
||
194 | markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); |
||
195 | return true; |
||
196 | } |
||
197 | |||
198 | // sext(zext x) -> (zext x) |
||
199 | // sext(sext x) -> (sext x) |
||
200 | Register ExtSrc; |
||
201 | MachineInstr *ExtMI; |
||
202 | if (mi_match(SrcReg, MRI, |
||
203 | m_all_of(m_MInstr(ExtMI), m_any_of(m_GZExt(m_Reg(ExtSrc)), |
||
204 | m_GSExt(m_Reg(ExtSrc)))))) { |
||
205 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); |
||
206 | Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc}); |
||
207 | UpdatedDefs.push_back(DstReg); |
||
208 | markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); |
||
209 | return true; |
||
210 | } |
||
211 | |||
212 | // Try to fold sext(g_constant) when the larger constant type is legal. |
||
213 | auto *SrcMI = MRI.getVRegDef(SrcReg); |
||
214 | if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
||
215 | const LLT DstTy = MRI.getType(DstReg); |
||
216 | if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { |
||
217 | auto &CstVal = SrcMI->getOperand(1); |
||
218 | Builder.buildConstant( |
||
219 | DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits())); |
||
220 | UpdatedDefs.push_back(DstReg); |
||
221 | markInstAndDefDead(MI, *SrcMI, DeadInsts); |
||
222 | return true; |
||
223 | } |
||
224 | } |
||
225 | |||
226 | return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); |
||
227 | } |
||
228 | |||
229 | bool tryCombineTrunc(MachineInstr &MI, |
||
230 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
231 | SmallVectorImpl<Register> &UpdatedDefs, |
||
232 | GISelObserverWrapper &Observer) { |
||
233 | using namespace llvm::MIPatternMatch; |
||
234 | assert(MI.getOpcode() == TargetOpcode::G_TRUNC); |
||
235 | |||
236 | Builder.setInstr(MI); |
||
237 | Register DstReg = MI.getOperand(0).getReg(); |
||
238 | Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); |
||
239 | |||
240 | // Try to fold trunc(g_constant) when the smaller constant type is legal. |
||
241 | auto *SrcMI = MRI.getVRegDef(SrcReg); |
||
242 | if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
||
243 | const LLT DstTy = MRI.getType(DstReg); |
||
244 | if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { |
||
245 | auto &CstVal = SrcMI->getOperand(1); |
||
246 | Builder.buildConstant( |
||
247 | DstReg, CstVal.getCImm()->getValue().trunc(DstTy.getSizeInBits())); |
||
248 | UpdatedDefs.push_back(DstReg); |
||
249 | markInstAndDefDead(MI, *SrcMI, DeadInsts); |
||
250 | return true; |
||
251 | } |
||
252 | } |
||
253 | |||
254 | // Try to fold trunc(merge) to directly use the source of the merge. |
||
255 | // This gets rid of large, difficult to legalize, merges |
||
256 | if (auto *SrcMerge = dyn_cast<GMerge>(SrcMI)) { |
||
257 | const Register MergeSrcReg = SrcMerge->getSourceReg(0); |
||
258 | const LLT MergeSrcTy = MRI.getType(MergeSrcReg); |
||
259 | const LLT DstTy = MRI.getType(DstReg); |
||
260 | |||
261 | // We can only fold if the types are scalar |
||
262 | const unsigned DstSize = DstTy.getSizeInBits(); |
||
263 | const unsigned MergeSrcSize = MergeSrcTy.getSizeInBits(); |
||
264 | if (!DstTy.isScalar() || !MergeSrcTy.isScalar()) |
||
265 | return false; |
||
266 | |||
267 | if (DstSize < MergeSrcSize) { |
||
268 | // When the merge source is larger than the destination, we can just |
||
269 | // truncate the merge source directly |
||
270 | if (isInstUnsupported({TargetOpcode::G_TRUNC, {DstTy, MergeSrcTy}})) |
||
271 | return false; |
||
272 | |||
273 | LLVM_DEBUG(dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_TRUNC: " |
||
274 | << MI); |
||
275 | |||
276 | Builder.buildTrunc(DstReg, MergeSrcReg); |
||
277 | UpdatedDefs.push_back(DstReg); |
||
278 | } else if (DstSize == MergeSrcSize) { |
||
279 | // If the sizes match we can simply try to replace the register |
||
280 | LLVM_DEBUG( |
||
281 | dbgs() << "Replacing G_TRUNC(G_MERGE_VALUES) with merge input: " |
||
282 | << MI); |
||
283 | replaceRegOrBuildCopy(DstReg, MergeSrcReg, MRI, Builder, UpdatedDefs, |
||
284 | Observer); |
||
285 | } else if (DstSize % MergeSrcSize == 0) { |
||
286 | // If the trunc size is a multiple of the merge source size we can use |
||
287 | // a smaller merge instead |
||
288 | if (isInstUnsupported( |
||
289 | {TargetOpcode::G_MERGE_VALUES, {DstTy, MergeSrcTy}})) |
||
290 | return false; |
||
291 | |||
292 | LLVM_DEBUG( |
||
293 | dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_MERGE_VALUES: " |
||
294 | << MI); |
||
295 | |||
296 | const unsigned NumSrcs = DstSize / MergeSrcSize; |
||
297 | assert(NumSrcs < SrcMI->getNumOperands() - 1 && |
||
298 | "trunc(merge) should require less inputs than merge"); |
||
299 | SmallVector<Register, 8> SrcRegs(NumSrcs); |
||
300 | for (unsigned i = 0; i < NumSrcs; ++i) |
||
301 | SrcRegs[i] = SrcMerge->getSourceReg(i); |
||
302 | |||
303 | Builder.buildMergeValues(DstReg, SrcRegs); |
||
304 | UpdatedDefs.push_back(DstReg); |
||
305 | } else { |
||
306 | // Unable to combine |
||
307 | return false; |
||
308 | } |
||
309 | |||
310 | markInstAndDefDead(MI, *SrcMerge, DeadInsts); |
||
311 | return true; |
||
312 | } |
||
313 | |||
314 | // trunc(trunc) -> trunc |
||
315 | Register TruncSrc; |
||
316 | if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { |
||
317 | // Always combine trunc(trunc) since the eventual resulting trunc must be |
||
318 | // legal anyway as it must be legal for all outputs of the consumer type |
||
319 | // set. |
||
320 | LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_TRUNC): " << MI); |
||
321 | |||
322 | Builder.buildTrunc(DstReg, TruncSrc); |
||
323 | UpdatedDefs.push_back(DstReg); |
||
324 | markInstAndDefDead(MI, *MRI.getVRegDef(TruncSrc), DeadInsts); |
||
325 | return true; |
||
326 | } |
||
327 | |||
328 | return false; |
||
329 | } |
||
330 | |||
331 | /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF). |
||
332 | bool tryFoldImplicitDef(MachineInstr &MI, |
||
333 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
334 | SmallVectorImpl<Register> &UpdatedDefs) { |
||
335 | unsigned Opcode = MI.getOpcode(); |
||
336 | assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT || |
||
337 | Opcode == TargetOpcode::G_SEXT); |
||
338 | |||
339 | if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, |
||
340 | MI.getOperand(1).getReg(), MRI)) { |
||
341 | Builder.setInstr(MI); |
||
342 | Register DstReg = MI.getOperand(0).getReg(); |
||
343 | LLT DstTy = MRI.getType(DstReg); |
||
344 | |||
345 | if (Opcode == TargetOpcode::G_ANYEXT) { |
||
346 | // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF |
||
347 | if (!isInstLegal({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) |
||
348 | return false; |
||
349 | LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;); |
||
350 | Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {}); |
||
351 | UpdatedDefs.push_back(DstReg); |
||
352 | } else { |
||
353 | // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top |
||
354 | // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT. |
||
355 | if (isConstantUnsupported(DstTy)) |
||
356 | return false; |
||
357 | LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;); |
||
358 | Builder.buildConstant(DstReg, 0); |
||
359 | UpdatedDefs.push_back(DstReg); |
||
360 | } |
||
361 | |||
362 | markInstAndDefDead(MI, *DefMI, DeadInsts); |
||
363 | return true; |
||
364 | } |
||
365 | return false; |
||
366 | } |
||
367 | |||
368 | bool tryFoldUnmergeCast(MachineInstr &MI, MachineInstr &CastMI, |
||
369 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
370 | SmallVectorImpl<Register> &UpdatedDefs) { |
||
371 | |||
372 | assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); |
||
373 | |||
374 | const unsigned CastOpc = CastMI.getOpcode(); |
||
375 | |||
376 | if (!isArtifactCast(CastOpc)) |
||
377 | return false; |
||
378 | |||
379 | const unsigned NumDefs = MI.getNumOperands() - 1; |
||
380 | |||
381 | const Register CastSrcReg = CastMI.getOperand(1).getReg(); |
||
382 | const LLT CastSrcTy = MRI.getType(CastSrcReg); |
||
383 | const LLT DestTy = MRI.getType(MI.getOperand(0).getReg()); |
||
384 | const LLT SrcTy = MRI.getType(MI.getOperand(NumDefs).getReg()); |
||
385 | |||
386 | const unsigned CastSrcSize = CastSrcTy.getSizeInBits(); |
||
387 | const unsigned DestSize = DestTy.getSizeInBits(); |
||
388 | |||
389 | if (CastOpc == TargetOpcode::G_TRUNC) { |
||
390 | if (SrcTy.isVector() && SrcTy.getScalarType() == DestTy.getScalarType()) { |
||
391 | // %1:_(<4 x s8>) = G_TRUNC %0(<4 x s32>) |
||
392 | // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1 |
||
393 | // => |
||
394 | // %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32) = G_UNMERGE_VALUES %0 |
||
395 | // %2:_(s8) = G_TRUNC %6 |
||
396 | // %3:_(s8) = G_TRUNC %7 |
||
397 | // %4:_(s8) = G_TRUNC %8 |
||
398 | // %5:_(s8) = G_TRUNC %9 |
||
399 | |||
400 | unsigned UnmergeNumElts = |
||
401 | DestTy.isVector() ? CastSrcTy.getNumElements() / NumDefs : 1; |
||
402 | LLT UnmergeTy = CastSrcTy.changeElementCount( |
||
403 | ElementCount::getFixed(UnmergeNumElts)); |
||
404 | |||
405 | if (isInstUnsupported( |
||
406 | {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}})) |
||
407 | return false; |
||
408 | |||
409 | Builder.setInstr(MI); |
||
410 | auto NewUnmerge = Builder.buildUnmerge(UnmergeTy, CastSrcReg); |
||
411 | |||
412 | for (unsigned I = 0; I != NumDefs; ++I) { |
||
413 | Register DefReg = MI.getOperand(I).getReg(); |
||
414 | UpdatedDefs.push_back(DefReg); |
||
415 | Builder.buildTrunc(DefReg, NewUnmerge.getReg(I)); |
||
416 | } |
||
417 | |||
418 | markInstAndDefDead(MI, CastMI, DeadInsts); |
||
419 | return true; |
||
420 | } |
||
421 | |||
422 | if (CastSrcTy.isScalar() && SrcTy.isScalar() && !DestTy.isVector()) { |
||
423 | // %1:_(s16) = G_TRUNC %0(s32) |
||
424 | // %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %1 |
||
425 | // => |
||
426 | // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0 |
||
427 | |||
428 | // Unmerge(trunc) can be combined if the trunc source size is a multiple |
||
429 | // of the unmerge destination size |
||
430 | if (CastSrcSize % DestSize != 0) |
||
431 | return false; |
||
432 | |||
433 | // Check if the new unmerge is supported |
||
434 | if (isInstUnsupported( |
||
435 | {TargetOpcode::G_UNMERGE_VALUES, {DestTy, CastSrcTy}})) |
||
436 | return false; |
||
437 | |||
438 | // Gather the original destination registers and create new ones for the |
||
439 | // unused bits |
||
440 | const unsigned NewNumDefs = CastSrcSize / DestSize; |
||
441 | SmallVector<Register, 8> DstRegs(NewNumDefs); |
||
442 | for (unsigned Idx = 0; Idx < NewNumDefs; ++Idx) { |
||
443 | if (Idx < NumDefs) |
||
444 | DstRegs[Idx] = MI.getOperand(Idx).getReg(); |
||
445 | else |
||
446 | DstRegs[Idx] = MRI.createGenericVirtualRegister(DestTy); |
||
447 | } |
||
448 | |||
449 | // Build new unmerge |
||
450 | Builder.setInstr(MI); |
||
451 | Builder.buildUnmerge(DstRegs, CastSrcReg); |
||
452 | UpdatedDefs.append(DstRegs.begin(), DstRegs.begin() + NewNumDefs); |
||
453 | markInstAndDefDead(MI, CastMI, DeadInsts); |
||
454 | return true; |
||
455 | } |
||
456 | } |
||
457 | |||
458 | // TODO: support combines with other casts as well |
||
459 | return false; |
||
460 | } |
||
461 | |||
462 | static bool canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp, |
||
463 | LLT OpTy, LLT DestTy) { |
||
464 | // Check if we found a definition that is like G_MERGE_VALUES. |
||
465 | switch (MergeOp) { |
||
466 | default: |
||
467 | return false; |
||
468 | case TargetOpcode::G_BUILD_VECTOR: |
||
469 | case TargetOpcode::G_MERGE_VALUES: |
||
470 | // The convert operation that we will need to insert is |
||
471 | // going to convert the input of that type of instruction (scalar) |
||
472 | // to the destination type (DestTy). |
||
473 | // The conversion needs to stay in the same domain (scalar to scalar |
||
474 | // and vector to vector), so if we were to allow to fold the merge |
||
475 | // we would need to insert some bitcasts. |
||
476 | // E.g., |
||
477 | // <2 x s16> = build_vector s16, s16 |
||
478 | // <2 x s32> = zext <2 x s16> |
||
479 | // <2 x s16>, <2 x s16> = unmerge <2 x s32> |
||
480 | // |
||
481 | // As is the folding would produce: |
||
482 | // <2 x s16> = zext s16 <-- scalar to vector |
||
483 | // <2 x s16> = zext s16 <-- scalar to vector |
||
484 | // Which is invalid. |
||
485 | // Instead we would want to generate: |
||
486 | // s32 = zext s16 |
||
487 | // <2 x s16> = bitcast s32 |
||
488 | // s32 = zext s16 |
||
489 | // <2 x s16> = bitcast s32 |
||
490 | // |
||
491 | // That is not done yet. |
||
492 | if (ConvertOp == 0) |
||
493 | return true; |
||
494 | return !DestTy.isVector() && OpTy.isVector() && |
||
495 | DestTy == OpTy.getElementType(); |
||
496 | case TargetOpcode::G_CONCAT_VECTORS: { |
||
497 | if (ConvertOp == 0) |
||
498 | return true; |
||
499 | if (!DestTy.isVector()) |
||
500 | return false; |
||
501 | |||
502 | const unsigned OpEltSize = OpTy.getElementType().getSizeInBits(); |
||
503 | |||
504 | // Don't handle scalarization with a cast that isn't in the same |
||
505 | // direction as the vector cast. This could be handled, but it would |
||
506 | // require more intermediate unmerges. |
||
507 | if (ConvertOp == TargetOpcode::G_TRUNC) |
||
508 | return DestTy.getSizeInBits() <= OpEltSize; |
||
509 | return DestTy.getSizeInBits() >= OpEltSize; |
||
510 | } |
||
511 | } |
||
512 | } |
||
513 | |||
514 | /// Try to replace DstReg with SrcReg or build a COPY instruction |
||
515 | /// depending on the register constraints. |
||
516 | static void replaceRegOrBuildCopy(Register DstReg, Register SrcReg, |
||
517 | MachineRegisterInfo &MRI, |
||
518 | MachineIRBuilder &Builder, |
||
519 | SmallVectorImpl<Register> &UpdatedDefs, |
||
520 | GISelChangeObserver &Observer) { |
||
521 | if (!llvm::canReplaceReg(DstReg, SrcReg, MRI)) { |
||
522 | Builder.buildCopy(DstReg, SrcReg); |
||
523 | UpdatedDefs.push_back(DstReg); |
||
524 | return; |
||
525 | } |
||
526 | SmallVector<MachineInstr *, 4> UseMIs; |
||
527 | // Get the users and notify the observer before replacing. |
||
528 | for (auto &UseMI : MRI.use_instructions(DstReg)) { |
||
529 | UseMIs.push_back(&UseMI); |
||
530 | Observer.changingInstr(UseMI); |
||
531 | } |
||
532 | // Replace the registers. |
||
533 | MRI.replaceRegWith(DstReg, SrcReg); |
||
534 | UpdatedDefs.push_back(SrcReg); |
||
535 | // Notify the observer that we changed the instructions. |
||
536 | for (auto *UseMI : UseMIs) |
||
537 | Observer.changedInstr(*UseMI); |
||
538 | } |
||
539 | |||
540 | /// Return the operand index in \p MI that defines \p Def |
||
541 | static unsigned getDefIndex(const MachineInstr &MI, Register SearchDef) { |
||
542 | unsigned DefIdx = 0; |
||
543 | for (const MachineOperand &Def : MI.defs()) { |
||
544 | if (Def.getReg() == SearchDef) |
||
545 | break; |
||
546 | ++DefIdx; |
||
547 | } |
||
548 | |||
549 | return DefIdx; |
||
550 | } |
||
551 | |||
552 | /// This class provides utilities for finding source registers of specific |
||
553 | /// bit ranges in an artifact. The routines can look through the source |
||
554 | /// registers if they're other artifacts to try to find a non-artifact source |
||
555 | /// of a value. |
||
556 | class ArtifactValueFinder { |
||
557 | MachineRegisterInfo &MRI; |
||
558 | MachineIRBuilder &MIB; |
||
559 | const LegalizerInfo &LI; |
||
560 | |||
561 | // Stores the best register found in the current query so far. |
||
562 | Register CurrentBest = Register(); |
||
563 | |||
564 | /// Given an concat_vector op \p Concat and a start bit and size, try to |
||
565 | /// find the origin of the value defined by that start position and size. |
||
566 | /// |
||
567 | /// \returns a register with the requested size, or the current best |
||
568 | /// register found during the current query. |
||
569 | Register findValueFromConcat(GConcatVectors &Concat, unsigned StartBit, |
||
570 | unsigned Size) { |
||
571 | assert(Size > 0); |
||
572 | |||
573 | // Find the source operand that provides the bits requested. |
||
574 | Register Src1Reg = Concat.getSourceReg(0); |
||
575 | unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits(); |
||
576 | |||
577 | // Operand index of the source that provides the start of the bit range. |
||
578 | unsigned StartSrcIdx = (StartBit / SrcSize) + 1; |
||
579 | // Offset into the source at which the bit range starts. |
||
580 | unsigned InRegOffset = StartBit % SrcSize; |
||
581 | // Check that the bits don't span multiple sources. |
||
582 | // FIXME: we might be able return multiple sources? Or create an |
||
583 | // appropriate concat to make it fit. |
||
584 | if (InRegOffset + Size > SrcSize) |
||
585 | return CurrentBest; |
||
586 | |||
587 | Register SrcReg = Concat.getReg(StartSrcIdx); |
||
588 | if (InRegOffset == 0 && Size == SrcSize) { |
||
589 | CurrentBest = SrcReg; |
||
590 | return findValueFromDefImpl(SrcReg, 0, Size); |
||
591 | } |
||
592 | |||
593 | return findValueFromDefImpl(SrcReg, InRegOffset, Size); |
||
594 | } |
||
595 | |||
596 | /// Given an build_vector op \p BV and a start bit and size, try to find |
||
597 | /// the origin of the value defined by that start position and size. |
||
598 | /// |
||
599 | /// \returns a register with the requested size, or the current best |
||
600 | /// register found during the current query. |
||
601 | Register findValueFromBuildVector(GBuildVector &BV, unsigned StartBit, |
||
602 | unsigned Size) { |
||
603 | assert(Size > 0); |
||
604 | |||
605 | // Find the source operand that provides the bits requested. |
||
606 | Register Src1Reg = BV.getSourceReg(0); |
||
607 | unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits(); |
||
608 | |||
609 | // Operand index of the source that provides the start of the bit range. |
||
610 | unsigned StartSrcIdx = (StartBit / SrcSize) + 1; |
||
611 | // Offset into the source at which the bit range starts. |
||
612 | unsigned InRegOffset = StartBit % SrcSize; |
||
613 | |||
614 | if (InRegOffset != 0) |
||
615 | return CurrentBest; // Give up, bits don't start at a scalar source. |
||
616 | if (Size < SrcSize) |
||
617 | return CurrentBest; // Scalar source is too large for requested bits. |
||
618 | |||
619 | // If the bits cover multiple sources evenly, then create a new |
||
620 | // build_vector to synthesize the required size, if that's been requested. |
||
621 | if (Size > SrcSize) { |
||
622 | if (Size % SrcSize > 0) |
||
623 | return CurrentBest; // Isn't covered exactly by sources. |
||
624 | |||
625 | unsigned NumSrcsUsed = Size / SrcSize; |
||
626 | // If we're requesting all of the sources, just return this def. |
||
627 | if (NumSrcsUsed == BV.getNumSources()) |
||
628 | return BV.getReg(0); |
||
629 | |||
630 | LLT SrcTy = MRI.getType(Src1Reg); |
||
631 | LLT NewBVTy = LLT::fixed_vector(NumSrcsUsed, SrcTy); |
||
632 | |||
633 | // Check if the resulting build vector would be legal. |
||
634 | LegalizeActionStep ActionStep = |
||
635 | LI.getAction({TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}}); |
||
636 | if (ActionStep.Action != LegalizeActions::Legal) |
||
637 | return CurrentBest; |
||
638 | |||
639 | SmallVector<Register> NewSrcs; |
||
640 | for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed; |
||
641 | ++SrcIdx) |
||
642 | NewSrcs.push_back(BV.getReg(SrcIdx)); |
||
643 | MIB.setInstrAndDebugLoc(BV); |
||
644 | return MIB.buildBuildVector(NewBVTy, NewSrcs).getReg(0); |
||
645 | } |
||
646 | // A single source is requested, just return it. |
||
647 | return BV.getReg(StartSrcIdx); |
||
648 | } |
||
649 | |||
650 | /// Given an G_INSERT op \p MI and a start bit and size, try to find |
||
651 | /// the origin of the value defined by that start position and size. |
||
652 | /// |
||
653 | /// \returns a register with the requested size, or the current best |
||
654 | /// register found during the current query. |
||
655 | Register findValueFromInsert(MachineInstr &MI, unsigned StartBit, |
||
656 | unsigned Size) { |
||
657 | assert(MI.getOpcode() == TargetOpcode::G_INSERT); |
||
658 | assert(Size > 0); |
||
659 | |||
660 | Register ContainerSrcReg = MI.getOperand(1).getReg(); |
||
661 | Register InsertedReg = MI.getOperand(2).getReg(); |
||
662 | LLT InsertedRegTy = MRI.getType(InsertedReg); |
||
663 | unsigned InsertOffset = MI.getOperand(3).getImm(); |
||
664 | |||
665 | // There are 4 possible container/insertreg + requested bit-range layouts |
||
666 | // that the instruction and query could be representing. |
||
667 | // For: %_ = G_INSERT %CONTAINER, %INS, InsOff (abbrev. to 'IO') |
||
668 | // and a start bit 'SB', with size S, giving an end bit 'EB', we could |
||
669 | // have... |
||
670 | // Scenario A: |
||
671 | // -------------------------- |
||
672 | // | INS | CONTAINER | |
||
673 | // -------------------------- |
||
674 | // | | |
||
675 | // SB EB |
||
676 | // |
||
677 | // Scenario B: |
||
678 | // -------------------------- |
||
679 | // | INS | CONTAINER | |
||
680 | // -------------------------- |
||
681 | // | | |
||
682 | // SB EB |
||
683 | // |
||
684 | // Scenario C: |
||
685 | // -------------------------- |
||
686 | // | CONTAINER | INS | |
||
687 | // -------------------------- |
||
688 | // | | |
||
689 | // SB EB |
||
690 | // |
||
691 | // Scenario D: |
||
692 | // -------------------------- |
||
693 | // | CONTAINER | INS | |
||
694 | // -------------------------- |
||
695 | // | | |
||
696 | // SB EB |
||
697 | // |
||
698 | // So therefore, A and D are requesting data from the INS operand, while |
||
699 | // B and C are requesting from the container operand. |
||
700 | |||
701 | unsigned InsertedEndBit = InsertOffset + InsertedRegTy.getSizeInBits(); |
||
702 | unsigned EndBit = StartBit + Size; |
||
703 | unsigned NewStartBit; |
||
704 | Register SrcRegToUse; |
||
705 | if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) { |
||
706 | SrcRegToUse = ContainerSrcReg; |
||
707 | NewStartBit = StartBit; |
||
708 | return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size); |
||
709 | } |
||
710 | if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) { |
||
711 | SrcRegToUse = InsertedReg; |
||
712 | NewStartBit = StartBit - InsertOffset; |
||
713 | if (NewStartBit == 0 && |
||
714 | Size == MRI.getType(SrcRegToUse).getSizeInBits()) |
||
715 | CurrentBest = SrcRegToUse; |
||
716 | return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size); |
||
717 | } |
||
718 | // The bit range spans both the inserted and container regions. |
||
719 | return Register(); |
||
720 | } |
||
721 | |||
722 | /// Internal implementation for findValueFromDef(). findValueFromDef() |
||
723 | /// initializes some data like the CurrentBest register, which this method |
||
724 | /// and its callees rely upon. |
||
725 | Register findValueFromDefImpl(Register DefReg, unsigned StartBit, |
||
726 | unsigned Size) { |
||
727 | std::optional<DefinitionAndSourceRegister> DefSrcReg = |
||
728 | getDefSrcRegIgnoringCopies(DefReg, MRI); |
||
729 | MachineInstr *Def = DefSrcReg->MI; |
||
730 | DefReg = DefSrcReg->Reg; |
||
731 | // If the instruction has a single def, then simply delegate the search. |
||
732 | // For unmerge however with multiple defs, we need to compute the offset |
||
733 | // into the source of the unmerge. |
||
734 | switch (Def->getOpcode()) { |
||
735 | case TargetOpcode::G_CONCAT_VECTORS: |
||
736 | return findValueFromConcat(cast<GConcatVectors>(*Def), StartBit, Size); |
||
737 | case TargetOpcode::G_UNMERGE_VALUES: { |
||
738 | unsigned DefStartBit = 0; |
||
739 | unsigned DefSize = MRI.getType(DefReg).getSizeInBits(); |
||
740 | for (const auto &MO : Def->defs()) { |
||
741 | if (MO.getReg() == DefReg) |
||
742 | break; |
||
743 | DefStartBit += DefSize; |
||
744 | } |
||
745 | Register SrcReg = Def->getOperand(Def->getNumOperands() - 1).getReg(); |
||
746 | Register SrcOriginReg = |
||
747 | findValueFromDefImpl(SrcReg, StartBit + DefStartBit, Size); |
||
748 | if (SrcOriginReg) |
||
749 | return SrcOriginReg; |
||
750 | // Failed to find a further value. If the StartBit and Size perfectly |
||
751 | // covered the requested DefReg, return that since it's better than |
||
752 | // nothing. |
||
753 | if (StartBit == 0 && Size == DefSize) |
||
754 | return DefReg; |
||
755 | return CurrentBest; |
||
756 | } |
||
757 | case TargetOpcode::G_BUILD_VECTOR: |
||
758 | return findValueFromBuildVector(cast<GBuildVector>(*Def), StartBit, |
||
759 | Size); |
||
760 | case TargetOpcode::G_INSERT: |
||
761 | return findValueFromInsert(*Def, StartBit, Size); |
||
762 | default: |
||
763 | return CurrentBest; |
||
764 | } |
||
765 | } |
||
766 | |||
767 | public: |
||
768 | ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder, |
||
769 | const LegalizerInfo &Info) |
||
770 | : MRI(Mri), MIB(Builder), LI(Info) {} |
||
771 | |||
772 | /// Try to find a source of the value defined in the def \p DefReg, starting |
||
773 | /// at position \p StartBit with size \p Size. |
||
774 | /// \returns a register with the requested size, or an empty Register if no |
||
775 | /// better value could be found. |
||
776 | Register findValueFromDef(Register DefReg, unsigned StartBit, |
||
777 | unsigned Size) { |
||
778 | CurrentBest = Register(); |
||
779 | Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size); |
||
780 | return FoundReg != DefReg ? FoundReg : Register(); |
||
781 | } |
||
782 | |||
783 | /// Try to combine the defs of an unmerge \p MI by attempting to find |
||
784 | /// values that provides the bits for each def reg. |
||
785 | /// \returns true if all the defs of the unmerge have been made dead. |
||
786 | bool tryCombineUnmergeDefs(GUnmerge &MI, GISelChangeObserver &Observer, |
||
787 | SmallVectorImpl<Register> &UpdatedDefs) { |
||
788 | unsigned NumDefs = MI.getNumDefs(); |
||
789 | LLT DestTy = MRI.getType(MI.getReg(0)); |
||
790 | |||
791 | SmallBitVector DeadDefs(NumDefs); |
||
792 | for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { |
||
793 | Register DefReg = MI.getReg(DefIdx); |
||
794 | if (MRI.use_nodbg_empty(DefReg)) { |
||
795 | DeadDefs[DefIdx] = true; |
||
796 | continue; |
||
797 | } |
||
798 | Register FoundVal = findValueFromDef(DefReg, 0, DestTy.getSizeInBits()); |
||
799 | if (!FoundVal) |
||
800 | continue; |
||
801 | if (MRI.getType(FoundVal) != DestTy) |
||
802 | continue; |
||
803 | |||
804 | replaceRegOrBuildCopy(DefReg, FoundVal, MRI, MIB, UpdatedDefs, |
||
805 | Observer); |
||
806 | // We only want to replace the uses, not the def of the old reg. |
||
807 | Observer.changingInstr(MI); |
||
808 | MI.getOperand(DefIdx).setReg(DefReg); |
||
809 | Observer.changedInstr(MI); |
||
810 | DeadDefs[DefIdx] = true; |
||
811 | } |
||
812 | return DeadDefs.all(); |
||
813 | } |
||
814 | |||
815 | GUnmerge *findUnmergeThatDefinesReg(Register Reg, unsigned Size, |
||
816 | unsigned &DefOperandIdx) { |
||
817 | if (Register Def = findValueFromDefImpl(Reg, 0, Size)) { |
||
818 | if (auto *Unmerge = dyn_cast<GUnmerge>(MRI.getVRegDef(Def))) { |
||
819 | DefOperandIdx = Unmerge->findRegisterDefOperandIdx(Def); |
||
820 | return Unmerge; |
||
821 | } |
||
822 | } |
||
823 | return nullptr; |
||
824 | } |
||
825 | |||
826 | // Check if sequence of elements from merge-like instruction is defined by |
||
827 | // another sequence of elements defined by unmerge. Most often this is the |
||
828 | // same sequence. Search for elements using findValueFromDefImpl. |
||
829 | bool isSequenceFromUnmerge(GMergeLikeInstr &MI, unsigned MergeStartIdx, |
||
830 | GUnmerge *Unmerge, unsigned UnmergeIdxStart, |
||
831 | unsigned NumElts, unsigned EltSize) { |
||
832 | assert(MergeStartIdx + NumElts <= MI.getNumSources()); |
||
833 | for (unsigned i = MergeStartIdx; i < MergeStartIdx + NumElts; ++i) { |
||
834 | unsigned EltUnmergeIdx; |
||
835 | GUnmerge *EltUnmerge = findUnmergeThatDefinesReg( |
||
836 | MI.getSourceReg(i), EltSize, EltUnmergeIdx); |
||
837 | // Check if source i comes from the same Unmerge. |
||
838 | if (!EltUnmerge || EltUnmerge != Unmerge) |
||
839 | return false; |
||
840 | // Check that source i's def has same index in sequence in Unmerge. |
||
841 | if (i - MergeStartIdx != EltUnmergeIdx - UnmergeIdxStart) |
||
842 | return false; |
||
843 | } |
||
844 | return true; |
||
845 | } |
||
846 | |||
847 | bool tryCombineMergeLike(GMergeLikeInstr &MI, |
||
848 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
849 | SmallVectorImpl<Register> &UpdatedDefs, |
||
850 | GISelChangeObserver &Observer) { |
||
851 | Register Elt0 = MI.getSourceReg(0); |
||
852 | LLT EltTy = MRI.getType(Elt0); |
||
853 | unsigned EltSize = EltTy.getSizeInBits(); |
||
854 | |||
855 | unsigned Elt0UnmergeIdx; |
||
856 | // Search for unmerge that will be candidate for combine. |
||
857 | auto *Unmerge = findUnmergeThatDefinesReg(Elt0, EltSize, Elt0UnmergeIdx); |
||
858 | if (!Unmerge) |
||
859 | return false; |
||
860 | |||
861 | unsigned NumMIElts = MI.getNumSources(); |
||
862 | Register Dst = MI.getReg(0); |
||
863 | LLT DstTy = MRI.getType(Dst); |
||
864 | Register UnmergeSrc = Unmerge->getSourceReg(); |
||
865 | LLT UnmergeSrcTy = MRI.getType(UnmergeSrc); |
||
866 | |||
867 | // Recognize copy of UnmergeSrc to Dst. |
||
868 | // Unmerge UnmergeSrc and reassemble it using merge-like opcode into Dst. |
||
869 | // |
||
870 | // %0:_(EltTy), %1, ... = G_UNMERGE_VALUES %UnmergeSrc:_(Ty) |
||
871 | // %Dst:_(Ty) = G_merge_like_opcode %0:_(EltTy), %1, ... |
||
872 | // |
||
873 | // %Dst:_(Ty) = COPY %UnmergeSrc:_(Ty) |
||
874 | if ((DstTy == UnmergeSrcTy) && (Elt0UnmergeIdx == 0)) { |
||
875 | if (!isSequenceFromUnmerge(MI, 0, Unmerge, 0, NumMIElts, EltSize)) |
||
876 | return false; |
||
877 | replaceRegOrBuildCopy(Dst, UnmergeSrc, MRI, MIB, UpdatedDefs, Observer); |
||
878 | DeadInsts.push_back(&MI); |
||
879 | return true; |
||
880 | } |
||
881 | |||
882 | // Recognize UnmergeSrc that can be unmerged to DstTy directly. |
||
883 | // Types have to be either both vector or both non-vector types. |
||
884 | // Merge-like opcodes are combined one at the time. First one creates new |
||
885 | // unmerge, following should use the same unmerge (builder performs CSE). |
||
886 | // |
||
887 | // %0:_(EltTy), %1, %2, %3 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy) |
||
888 | // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1 |
||
889 | // %AnotherDst:_(DstTy) = G_merge_like_opcode %2:_(EltTy), %3 |
||
890 | // |
||
891 | // %Dst:_(DstTy), %AnotherDst = G_UNMERGE_VALUES %UnmergeSrc |
||
892 | if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && |
||
893 | (Elt0UnmergeIdx % NumMIElts == 0) && |
||
894 | getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) { |
||
895 | if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts, |
||
896 | EltSize)) |
||
897 | return false; |
||
898 | MIB.setInstrAndDebugLoc(MI); |
||
899 | auto NewUnmerge = MIB.buildUnmerge(DstTy, Unmerge->getSourceReg()); |
||
900 | unsigned DstIdx = (Elt0UnmergeIdx * EltSize) / DstTy.getSizeInBits(); |
||
901 | replaceRegOrBuildCopy(Dst, NewUnmerge.getReg(DstIdx), MRI, MIB, |
||
902 | UpdatedDefs, Observer); |
||
903 | DeadInsts.push_back(&MI); |
||
904 | return true; |
||
905 | } |
||
906 | |||
907 | // Recognize when multiple unmerged sources with UnmergeSrcTy type |
||
908 | // can be merged into Dst with DstTy type directly. |
||
909 | // Types have to be either both vector or both non-vector types. |
||
910 | |||
911 | // %0:_(EltTy), %1 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy) |
||
912 | // %2:_(EltTy), %3 = G_UNMERGE_VALUES %AnotherUnmergeSrc:_(UnmergeSrcTy) |
||
913 | // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1, %2, %3 |
||
914 | // |
||
915 | // %Dst:_(DstTy) = G_merge_like_opcode %UnmergeSrc, %AnotherUnmergeSrc |
||
916 | |||
917 | if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && |
||
918 | getCoverTy(DstTy, UnmergeSrcTy) == DstTy) { |
||
919 | SmallVector<Register, 4> ConcatSources; |
||
920 | unsigned NumElts = Unmerge->getNumDefs(); |
||
921 | for (unsigned i = 0; i < MI.getNumSources(); i += NumElts) { |
||
922 | unsigned EltUnmergeIdx; |
||
923 | auto *UnmergeI = findUnmergeThatDefinesReg(MI.getSourceReg(i), |
||
924 | EltSize, EltUnmergeIdx); |
||
925 | // All unmerges have to be the same size. |
||
926 | if ((!UnmergeI) || (UnmergeI->getNumDefs() != NumElts) || |
||
927 | (EltUnmergeIdx != 0)) |
||
928 | return false; |
||
929 | if (!isSequenceFromUnmerge(MI, i, UnmergeI, 0, NumElts, EltSize)) |
||
930 | return false; |
||
931 | ConcatSources.push_back(UnmergeI->getSourceReg()); |
||
932 | } |
||
933 | |||
934 | MIB.setInstrAndDebugLoc(MI); |
||
935 | MIB.buildMergeLikeInstr(Dst, ConcatSources); |
||
936 | DeadInsts.push_back(&MI); |
||
937 | return true; |
||
938 | } |
||
939 | |||
940 | return false; |
||
941 | } |
||
942 | }; |
||
943 | |||
944 | bool tryCombineUnmergeValues(GUnmerge &MI, |
||
945 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
946 | SmallVectorImpl<Register> &UpdatedDefs, |
||
947 | GISelChangeObserver &Observer) { |
||
948 | unsigned NumDefs = MI.getNumDefs(); |
||
949 | Register SrcReg = MI.getSourceReg(); |
||
950 | MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI); |
||
951 | if (!SrcDef) |
||
952 | return false; |
||
953 | |||
954 | LLT OpTy = MRI.getType(SrcReg); |
||
955 | LLT DestTy = MRI.getType(MI.getReg(0)); |
||
956 | unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg); |
||
957 | |||
958 | Builder.setInstrAndDebugLoc(MI); |
||
959 | |||
960 | ArtifactValueFinder Finder(MRI, Builder, LI); |
||
961 | if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) { |
||
962 | markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx); |
||
963 | return true; |
||
964 | } |
||
965 | |||
966 | if (auto *SrcUnmerge = dyn_cast<GUnmerge>(SrcDef)) { |
||
967 | // %0:_(<4 x s16>) = G_FOO |
||
968 | // %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0 |
||
969 | // %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1 |
||
970 | // |
||
971 | // %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0 |
||
972 | Register SrcUnmergeSrc = SrcUnmerge->getSourceReg(); |
||
973 | LLT SrcUnmergeSrcTy = MRI.getType(SrcUnmergeSrc); |
||
974 | |||
975 | // If we need to decrease the number of vector elements in the result type |
||
976 | // of an unmerge, this would involve the creation of an equivalent unmerge |
||
977 | // to copy back to the original result registers. |
||
978 | LegalizeActionStep ActionStep = LI.getAction( |
||
979 | {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}}); |
||
980 | switch (ActionStep.Action) { |
||
981 | case LegalizeActions::Lower: |
||
982 | case LegalizeActions::Unsupported: |
||
983 | break; |
||
984 | case LegalizeActions::FewerElements: |
||
985 | case LegalizeActions::NarrowScalar: |
||
986 | if (ActionStep.TypeIdx == 1) |
||
987 | return false; |
||
988 | break; |
||
989 | default: |
||
990 | return false; |
||
991 | } |
||
992 | |||
993 | auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc); |
||
994 | |||
995 | // TODO: Should we try to process out the other defs now? If the other |
||
996 | // defs of the source unmerge are also unmerged, we end up with a separate |
||
997 | // unmerge for each one. |
||
998 | for (unsigned I = 0; I != NumDefs; ++I) { |
||
999 | Register Def = MI.getReg(I); |
||
1000 | replaceRegOrBuildCopy(Def, NewUnmerge.getReg(SrcDefIdx * NumDefs + I), |
||
1001 | MRI, Builder, UpdatedDefs, Observer); |
||
1002 | } |
||
1003 | |||
1004 | markInstAndDefDead(MI, *SrcUnmerge, DeadInsts, SrcDefIdx); |
||
1005 | return true; |
||
1006 | } |
||
1007 | |||
1008 | MachineInstr *MergeI = SrcDef; |
||
1009 | unsigned ConvertOp = 0; |
||
1010 | |||
1011 | // Handle intermediate conversions |
||
1012 | unsigned SrcOp = SrcDef->getOpcode(); |
||
1013 | if (isArtifactCast(SrcOp)) { |
||
1014 | ConvertOp = SrcOp; |
||
1015 | MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI); |
||
1016 | } |
||
1017 | |||
1018 | if (!MergeI || !canFoldMergeOpcode(MergeI->getOpcode(), |
||
1019 | ConvertOp, OpTy, DestTy)) { |
||
1020 | // We might have a chance to combine later by trying to combine |
||
1021 | // unmerge(cast) first |
||
1022 | return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs); |
||
1023 | } |
||
1024 | |||
1025 | const unsigned NumMergeRegs = MergeI->getNumOperands() - 1; |
||
1026 | |||
1027 | if (NumMergeRegs < NumDefs) { |
||
1028 | if (NumDefs % NumMergeRegs != 0) |
||
1029 | return false; |
||
1030 | |||
1031 | Builder.setInstr(MI); |
||
1032 | // Transform to UNMERGEs, for example |
||
1033 | // %1 = G_MERGE_VALUES %4, %5 |
||
1034 | // %9, %10, %11, %12 = G_UNMERGE_VALUES %1 |
||
1035 | // to |
||
1036 | // %9, %10 = G_UNMERGE_VALUES %4 |
||
1037 | // %11, %12 = G_UNMERGE_VALUES %5 |
||
1038 | |||
1039 | const unsigned NewNumDefs = NumDefs / NumMergeRegs; |
||
1040 | for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) { |
||
1041 | SmallVector<Register, 8> DstRegs; |
||
1042 | for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs; |
||
1043 | ++j, ++DefIdx) |
||
1044 | DstRegs.push_back(MI.getReg(DefIdx)); |
||
1045 | |||
1046 | if (ConvertOp) { |
||
1047 | LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg()); |
||
1048 | |||
1049 | // This is a vector that is being split and casted. Extract to the |
||
1050 | // element type, and do the conversion on the scalars (or smaller |
||
1051 | // vectors). |
||
1052 | LLT MergeEltTy = MergeSrcTy.divide(NewNumDefs); |
||
1053 | |||
1054 | // Handle split to smaller vectors, with conversions. |
||
1055 | // %2(<8 x s8>) = G_CONCAT_VECTORS %0(<4 x s8>), %1(<4 x s8>) |
||
1056 | // %3(<8 x s16>) = G_SEXT %2 |
||
1057 | // %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>), %7(<2 x s16>) = G_UNMERGE_VALUES %3 |
||
1058 | // |
||
1059 | // => |
||
1060 | // |
||
1061 | // %8(<2 x s8>), %9(<2 x s8>) = G_UNMERGE_VALUES %0 |
||
1062 | // %10(<2 x s8>), %11(<2 x s8>) = G_UNMERGE_VALUES %1 |
||
1063 | // %4(<2 x s16>) = G_SEXT %8 |
||
1064 | // %5(<2 x s16>) = G_SEXT %9 |
||
1065 | // %6(<2 x s16>) = G_SEXT %10 |
||
1066 | // %7(<2 x s16>)= G_SEXT %11 |
||
1067 | |||
1068 | SmallVector<Register, 4> TmpRegs(NewNumDefs); |
||
1069 | for (unsigned k = 0; k < NewNumDefs; ++k) |
||
1070 | TmpRegs[k] = MRI.createGenericVirtualRegister(MergeEltTy); |
||
1071 | |||
1072 | Builder.buildUnmerge(TmpRegs, MergeI->getOperand(Idx + 1).getReg()); |
||
1073 | |||
1074 | for (unsigned k = 0; k < NewNumDefs; ++k) |
||
1075 | Builder.buildInstr(ConvertOp, {DstRegs[k]}, {TmpRegs[k]}); |
||
1076 | } else { |
||
1077 | Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg()); |
||
1078 | } |
||
1079 | UpdatedDefs.append(DstRegs.begin(), DstRegs.end()); |
||
1080 | } |
||
1081 | |||
1082 | } else if (NumMergeRegs > NumDefs) { |
||
1083 | if (ConvertOp != 0 || NumMergeRegs % NumDefs != 0) |
||
1084 | return false; |
||
1085 | |||
1086 | Builder.setInstr(MI); |
||
1087 | // Transform to MERGEs |
||
1088 | // %6 = G_MERGE_VALUES %17, %18, %19, %20 |
||
1089 | // %7, %8 = G_UNMERGE_VALUES %6 |
||
1090 | // to |
||
1091 | // %7 = G_MERGE_VALUES %17, %18 |
||
1092 | // %8 = G_MERGE_VALUES %19, %20 |
||
1093 | |||
1094 | const unsigned NumRegs = NumMergeRegs / NumDefs; |
||
1095 | for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { |
||
1096 | SmallVector<Register, 8> Regs; |
||
1097 | for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; |
||
1098 | ++j, ++Idx) |
||
1099 | Regs.push_back(MergeI->getOperand(Idx).getReg()); |
||
1100 | |||
1101 | Register DefReg = MI.getReg(DefIdx); |
||
1102 | Builder.buildMergeLikeInstr(DefReg, Regs); |
||
1103 | UpdatedDefs.push_back(DefReg); |
||
1104 | } |
||
1105 | |||
1106 | } else { |
||
1107 | LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg()); |
||
1108 | |||
1109 | if (!ConvertOp && DestTy != MergeSrcTy) |
||
1110 | ConvertOp = TargetOpcode::G_BITCAST; |
||
1111 | |||
1112 | if (ConvertOp) { |
||
1113 | Builder.setInstr(MI); |
||
1114 | |||
1115 | for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { |
||
1116 | Register DefReg = MI.getOperand(Idx).getReg(); |
||
1117 | Register MergeSrc = MergeI->getOperand(Idx + 1).getReg(); |
||
1118 | |||
1119 | if (!MRI.use_empty(DefReg)) { |
||
1120 | Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc}); |
||
1121 | UpdatedDefs.push_back(DefReg); |
||
1122 | } |
||
1123 | } |
||
1124 | |||
1125 | markInstAndDefDead(MI, *MergeI, DeadInsts); |
||
1126 | return true; |
||
1127 | } |
||
1128 | |||
1129 | assert(DestTy == MergeSrcTy && |
||
1130 | "Bitcast and the other kinds of conversions should " |
||
1131 | "have happened earlier"); |
||
1132 | |||
1133 | Builder.setInstr(MI); |
||
1134 | for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { |
||
1135 | Register DstReg = MI.getOperand(Idx).getReg(); |
||
1136 | Register SrcReg = MergeI->getOperand(Idx + 1).getReg(); |
||
1137 | replaceRegOrBuildCopy(DstReg, SrcReg, MRI, Builder, UpdatedDefs, |
||
1138 | Observer); |
||
1139 | } |
||
1140 | } |
||
1141 | |||
1142 | markInstAndDefDead(MI, *MergeI, DeadInsts); |
||
1143 | return true; |
||
1144 | } |
||
1145 | |||
1146 | bool tryCombineExtract(MachineInstr &MI, |
||
1147 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
1148 | SmallVectorImpl<Register> &UpdatedDefs) { |
||
1149 | assert(MI.getOpcode() == TargetOpcode::G_EXTRACT); |
||
1150 | |||
1151 | // Try to use the source registers from a G_MERGE_VALUES |
||
1152 | // |
||
1153 | // %2 = G_MERGE_VALUES %0, %1 |
||
1154 | // %3 = G_EXTRACT %2, N |
||
1155 | // => |
||
1156 | // |
||
1157 | // for N < %2.getSizeInBits() / 2 |
||
1158 | // %3 = G_EXTRACT %0, N |
||
1159 | // |
||
1160 | // for N >= %2.getSizeInBits() / 2 |
||
1161 | // %3 = G_EXTRACT %1, (N - %0.getSizeInBits() |
||
1162 | |||
1163 | Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); |
||
1164 | MachineInstr *MergeI = MRI.getVRegDef(SrcReg); |
||
1165 | if (!MergeI || !isa<GMergeLikeInstr>(MergeI)) |
||
1166 | return false; |
||
1167 | |||
1168 | Register DstReg = MI.getOperand(0).getReg(); |
||
1169 | LLT DstTy = MRI.getType(DstReg); |
||
1170 | LLT SrcTy = MRI.getType(SrcReg); |
||
1171 | |||
1172 | // TODO: Do we need to check if the resulting extract is supported? |
||
1173 | unsigned ExtractDstSize = DstTy.getSizeInBits(); |
||
1174 | unsigned Offset = MI.getOperand(2).getImm(); |
||
1175 | unsigned NumMergeSrcs = MergeI->getNumOperands() - 1; |
||
1176 | unsigned MergeSrcSize = SrcTy.getSizeInBits() / NumMergeSrcs; |
||
1177 | unsigned MergeSrcIdx = Offset / MergeSrcSize; |
||
1178 | |||
1179 | // Compute the offset of the last bit the extract needs. |
||
1180 | unsigned EndMergeSrcIdx = (Offset + ExtractDstSize - 1) / MergeSrcSize; |
||
1181 | |||
1182 | // Can't handle the case where the extract spans multiple inputs. |
||
1183 | if (MergeSrcIdx != EndMergeSrcIdx) |
||
1184 | return false; |
||
1185 | |||
1186 | // TODO: We could modify MI in place in most cases. |
||
1187 | Builder.setInstr(MI); |
||
1188 | Builder.buildExtract(DstReg, MergeI->getOperand(MergeSrcIdx + 1).getReg(), |
||
1189 | Offset - MergeSrcIdx * MergeSrcSize); |
||
1190 | UpdatedDefs.push_back(DstReg); |
||
1191 | markInstAndDefDead(MI, *MergeI, DeadInsts); |
||
1192 | return true; |
||
1193 | } |
||
1194 | |||
1195 | /// Try to combine away MI. |
||
1196 | /// Returns true if it combined away the MI. |
||
1197 | /// Adds instructions that are dead as a result of the combine |
||
1198 | /// into DeadInsts, which can include MI. |
||
1199 | bool tryCombineInstruction(MachineInstr &MI, |
||
1200 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
1201 | GISelObserverWrapper &WrapperObserver) { |
||
1202 | ArtifactValueFinder Finder(MRI, Builder, LI); |
||
1203 | |||
1204 | // This might be a recursive call, and we might have DeadInsts already |
||
1205 | // populated. To avoid bad things happening later with multiple vreg defs |
||
1206 | // etc, process the dead instructions now if any. |
||
1207 | if (!DeadInsts.empty()) |
||
1208 | deleteMarkedDeadInsts(DeadInsts, WrapperObserver); |
||
1209 | |||
1210 | // Put here every vreg that was redefined in such a way that it's at least |
||
1211 | // possible that one (or more) of its users (immediate or COPY-separated) |
||
1212 | // could become artifact combinable with the new definition (or the |
||
1213 | // instruction reachable from it through a chain of copies if any). |
||
1214 | SmallVector<Register, 4> UpdatedDefs; |
||
1215 | bool Changed = false; |
||
1216 | switch (MI.getOpcode()) { |
||
1217 | default: |
||
1218 | return false; |
||
1219 | case TargetOpcode::G_ANYEXT: |
||
1220 | Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs, WrapperObserver); |
||
1221 | break; |
||
1222 | case TargetOpcode::G_ZEXT: |
||
1223 | Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver); |
||
1224 | break; |
||
1225 | case TargetOpcode::G_SEXT: |
||
1226 | Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs); |
||
1227 | break; |
||
1228 | case TargetOpcode::G_UNMERGE_VALUES: |
||
1229 | Changed = tryCombineUnmergeValues(cast<GUnmerge>(MI), DeadInsts, |
||
1230 | UpdatedDefs, WrapperObserver); |
||
1231 | break; |
||
1232 | case TargetOpcode::G_MERGE_VALUES: |
||
1233 | case TargetOpcode::G_BUILD_VECTOR: |
||
1234 | case TargetOpcode::G_CONCAT_VECTORS: |
||
1235 | // If any of the users of this merge are an unmerge, then add them to the |
||
1236 | // artifact worklist in case there's folding that can be done looking up. |
||
1237 | for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) { |
||
1238 | if (U.getOpcode() == TargetOpcode::G_UNMERGE_VALUES || |
||
1239 | U.getOpcode() == TargetOpcode::G_TRUNC) { |
||
1240 | UpdatedDefs.push_back(MI.getOperand(0).getReg()); |
||
1241 | break; |
||
1242 | } |
||
1243 | } |
||
1244 | Changed = Finder.tryCombineMergeLike(cast<GMergeLikeInstr>(MI), DeadInsts, |
||
1245 | UpdatedDefs, WrapperObserver); |
||
1246 | break; |
||
1247 | case TargetOpcode::G_EXTRACT: |
||
1248 | Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs); |
||
1249 | break; |
||
1250 | case TargetOpcode::G_TRUNC: |
||
1251 | Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs, WrapperObserver); |
||
1252 | if (!Changed) { |
||
1253 | // Try to combine truncates away even if they are legal. As all artifact |
||
1254 | // combines at the moment look only "up" the def-use chains, we achieve |
||
1255 | // that by throwing truncates' users (with look through copies) into the |
||
1256 | // ArtifactList again. |
||
1257 | UpdatedDefs.push_back(MI.getOperand(0).getReg()); |
||
1258 | } |
||
1259 | break; |
||
1260 | } |
||
1261 | // If the main loop through the ArtifactList found at least one combinable |
||
1262 | // pair of artifacts, not only combine it away (as done above), but also |
||
1263 | // follow the def-use chain from there to combine everything that can be |
||
1264 | // combined within this def-use chain of artifacts. |
||
1265 | while (!UpdatedDefs.empty()) { |
||
1266 | Register NewDef = UpdatedDefs.pop_back_val(); |
||
1267 | assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg"); |
||
1268 | for (MachineInstr &Use : MRI.use_instructions(NewDef)) { |
||
1269 | switch (Use.getOpcode()) { |
||
1270 | // Keep this list in sync with the list of all artifact combines. |
||
1271 | case TargetOpcode::G_ANYEXT: |
||
1272 | case TargetOpcode::G_ZEXT: |
||
1273 | case TargetOpcode::G_SEXT: |
||
1274 | case TargetOpcode::G_UNMERGE_VALUES: |
||
1275 | case TargetOpcode::G_EXTRACT: |
||
1276 | case TargetOpcode::G_TRUNC: |
||
1277 | case TargetOpcode::G_BUILD_VECTOR: |
||
1278 | // Adding Use to ArtifactList. |
||
1279 | WrapperObserver.changedInstr(Use); |
||
1280 | break; |
||
1281 | case TargetOpcode::COPY: { |
||
1282 | Register Copy = Use.getOperand(0).getReg(); |
||
1283 | if (Copy.isVirtual()) |
||
1284 | UpdatedDefs.push_back(Copy); |
||
1285 | break; |
||
1286 | } |
||
1287 | default: |
||
1288 | // If we do not have an artifact combine for the opcode, there is no |
||
1289 | // point in adding it to the ArtifactList as nothing interesting will |
||
1290 | // be done to it anyway. |
||
1291 | break; |
||
1292 | } |
||
1293 | } |
||
1294 | } |
||
1295 | return Changed; |
||
1296 | } |
||
1297 | |||
1298 | private: |
||
1299 | static Register getArtifactSrcReg(const MachineInstr &MI) { |
||
1300 | switch (MI.getOpcode()) { |
||
1301 | case TargetOpcode::COPY: |
||
1302 | case TargetOpcode::G_TRUNC: |
||
1303 | case TargetOpcode::G_ZEXT: |
||
1304 | case TargetOpcode::G_ANYEXT: |
||
1305 | case TargetOpcode::G_SEXT: |
||
1306 | case TargetOpcode::G_EXTRACT: |
||
1307 | return MI.getOperand(1).getReg(); |
||
1308 | case TargetOpcode::G_UNMERGE_VALUES: |
||
1309 | return MI.getOperand(MI.getNumOperands() - 1).getReg(); |
||
1310 | default: |
||
1311 | llvm_unreachable("Not a legalization artifact happen"); |
||
1312 | } |
||
1313 | } |
||
1314 | |||
1315 | /// Mark a def of one of MI's original operands, DefMI, as dead if changing MI |
||
1316 | /// (either by killing it or changing operands) results in DefMI being dead |
||
1317 | /// too. In-between COPYs or artifact-casts are also collected if they are |
||
1318 | /// dead. |
||
1319 | /// MI is not marked dead. |
||
1320 | void markDefDead(MachineInstr &MI, MachineInstr &DefMI, |
||
1321 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
1322 | unsigned DefIdx = 0) { |
||
1323 | // Collect all the copy instructions that are made dead, due to deleting |
||
1324 | // this instruction. Collect all of them until the Trunc(DefMI). |
||
1325 | // Eg, |
||
1326 | // %1(s1) = G_TRUNC %0(s32) |
||
1327 | // %2(s1) = COPY %1(s1) |
||
1328 | // %3(s1) = COPY %2(s1) |
||
1329 | // %4(s32) = G_ANYEXT %3(s1) |
||
1330 | // In this case, we would have replaced %4 with a copy of %0, |
||
1331 | // and as a result, %3, %2, %1 are dead. |
||
1332 | MachineInstr *PrevMI = &MI; |
||
1333 | while (PrevMI != &DefMI) { |
||
1334 | Register PrevRegSrc = getArtifactSrcReg(*PrevMI); |
||
1335 | |||
1336 | MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc); |
||
1337 | if (MRI.hasOneUse(PrevRegSrc)) { |
||
1338 | if (TmpDef != &DefMI) { |
||
1339 | assert((TmpDef->getOpcode() == TargetOpcode::COPY || |
||
1340 | isArtifactCast(TmpDef->getOpcode())) && |
||
1341 | "Expecting copy or artifact cast here"); |
||
1342 | |||
1343 | DeadInsts.push_back(TmpDef); |
||
1344 | } |
||
1345 | } else |
||
1346 | break; |
||
1347 | PrevMI = TmpDef; |
||
1348 | } |
||
1349 | |||
1350 | if (PrevMI == &DefMI) { |
||
1351 | unsigned I = 0; |
||
1352 | bool IsDead = true; |
||
1353 | for (MachineOperand &Def : DefMI.defs()) { |
||
1354 | if (I != DefIdx) { |
||
1355 | if (!MRI.use_empty(Def.getReg())) { |
||
1356 | IsDead = false; |
||
1357 | break; |
||
1358 | } |
||
1359 | } else { |
||
1360 | if (!MRI.hasOneUse(DefMI.getOperand(DefIdx).getReg())) |
||
1361 | break; |
||
1362 | } |
||
1363 | |||
1364 | ++I; |
||
1365 | } |
||
1366 | |||
1367 | if (IsDead) |
||
1368 | DeadInsts.push_back(&DefMI); |
||
1369 | } |
||
1370 | } |
||
1371 | |||
1372 | /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be |
||
1373 | /// dead due to MI being killed, then mark DefMI as dead too. |
||
1374 | /// Some of the combines (extends(trunc)), try to walk through redundant |
||
1375 | /// copies in between the extends and the truncs, and this attempts to collect |
||
1376 | /// the in between copies if they're dead. |
||
1377 | void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI, |
||
1378 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
1379 | unsigned DefIdx = 0) { |
||
1380 | DeadInsts.push_back(&MI); |
||
1381 | markDefDead(MI, DefMI, DeadInsts, DefIdx); |
||
1382 | } |
||
1383 | |||
1384 | /// Erase the dead instructions in the list and call the observer hooks. |
||
1385 | /// Normally the Legalizer will deal with erasing instructions that have been |
||
1386 | /// marked dead. However, for the trunc(ext(x)) cases we can end up trying to |
||
1387 | /// process instructions which have been marked dead, but otherwise break the |
||
1388 | /// MIR by introducing multiple vreg defs. For those cases, allow the combines |
||
1389 | /// to explicitly delete the instructions before we run into trouble. |
||
1390 | void deleteMarkedDeadInsts(SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
1391 | GISelObserverWrapper &WrapperObserver) { |
||
1392 | for (auto *DeadMI : DeadInsts) { |
||
1393 | LLVM_DEBUG(dbgs() << *DeadMI << "Is dead, eagerly deleting\n"); |
||
1394 | WrapperObserver.erasingInstr(*DeadMI); |
||
1395 | DeadMI->eraseFromParent(); |
||
1396 | } |
||
1397 | DeadInsts.clear(); |
||
1398 | } |
||
1399 | |||
1400 | /// Checks if the target legalizer info has specified anything about the |
||
1401 | /// instruction, or if unsupported. |
||
1402 | bool isInstUnsupported(const LegalityQuery &Query) const { |
||
1403 | using namespace LegalizeActions; |
||
1404 | auto Step = LI.getAction(Query); |
||
1405 | return Step.Action == Unsupported || Step.Action == NotFound; |
||
1406 | } |
||
1407 | |||
1408 | bool isInstLegal(const LegalityQuery &Query) const { |
||
1409 | return LI.getAction(Query).Action == LegalizeActions::Legal; |
||
1410 | } |
||
1411 | |||
1412 | bool isConstantUnsupported(LLT Ty) const { |
||
1413 | if (!Ty.isVector()) |
||
1414 | return isInstUnsupported({TargetOpcode::G_CONSTANT, {Ty}}); |
||
1415 | |||
1416 | LLT EltTy = Ty.getElementType(); |
||
1417 | return isInstUnsupported({TargetOpcode::G_CONSTANT, {EltTy}}) || |
||
1418 | isInstUnsupported({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}); |
||
1419 | } |
||
1420 | |||
1421 | /// Looks through copy instructions and returns the actual |
||
1422 | /// source register. |
||
1423 | Register lookThroughCopyInstrs(Register Reg) { |
||
1424 | using namespace llvm::MIPatternMatch; |
||
1425 | |||
1426 | Register TmpReg; |
||
1427 | while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) { |
||
1428 | if (MRI.getType(TmpReg).isValid()) |
||
1429 | Reg = TmpReg; |
||
1430 | else |
||
1431 | break; |
||
1432 | } |
||
1433 | return Reg; |
||
1434 | } |
||
1435 | }; |
||
1436 | |||
1437 | } // namespace llvm |
||
1438 | |||
1439 | #endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H |