Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
//===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h -----*- C++ -*-//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
// This file contains some helper functions which try to cleanup artifacts
9
// such as G_TRUNCs/G_[ZSA]EXTENDS that were created during legalization to make
10
// the types match. This file also contains some combines of merges that happens
11
// at the end of the legalization.
12
//===----------------------------------------------------------------------===//
13
 
14
#ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H
15
#define LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H
16
 
17
#include "llvm/ADT/SmallBitVector.h"
18
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
19
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
21
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
22
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
23
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
24
#include "llvm/CodeGen/GlobalISel/Utils.h"
25
#include "llvm/CodeGen/MachineRegisterInfo.h"
26
#include "llvm/CodeGen/Register.h"
27
#include "llvm/IR/Constants.h"
28
#include "llvm/Support/Debug.h"
29
 
30
#define DEBUG_TYPE "legalizer"
31
 
32
namespace llvm {
33
class LegalizationArtifactCombiner {
34
  MachineIRBuilder &Builder;
35
  MachineRegisterInfo &MRI;
36
  const LegalizerInfo &LI;
37
 
38
  static bool isArtifactCast(unsigned Opc) {
39
    switch (Opc) {
40
    case TargetOpcode::G_TRUNC:
41
    case TargetOpcode::G_SEXT:
42
    case TargetOpcode::G_ZEXT:
43
    case TargetOpcode::G_ANYEXT:
44
      return true;
45
    default:
46
      return false;
47
    }
48
  }
49
 
50
public:
51
  LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI,
52
                    const LegalizerInfo &LI)
53
      : Builder(B), MRI(MRI), LI(LI) {}
54
 
55
  bool tryCombineAnyExt(MachineInstr &MI,
56
                        SmallVectorImpl<MachineInstr *> &DeadInsts,
57
                        SmallVectorImpl<Register> &UpdatedDefs,
58
                        GISelObserverWrapper &Observer) {
59
    using namespace llvm::MIPatternMatch;
60
    assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
61
 
62
    Builder.setInstrAndDebugLoc(MI);
63
    Register DstReg = MI.getOperand(0).getReg();
64
    Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
65
 
66
    // aext(trunc x) - > aext/copy/trunc x
67
    Register TruncSrc;
68
    if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
69
      LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
70
      if (MRI.getType(DstReg) == MRI.getType(TruncSrc))
71
        replaceRegOrBuildCopy(DstReg, TruncSrc, MRI, Builder, UpdatedDefs,
72
                              Observer);
73
      else
74
        Builder.buildAnyExtOrTrunc(DstReg, TruncSrc);
75
      UpdatedDefs.push_back(DstReg);
76
      markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
77
      return true;
78
    }
79
 
80
    // aext([asz]ext x) -> [asz]ext x
81
    Register ExtSrc;
82
    MachineInstr *ExtMI;
83
    if (mi_match(SrcReg, MRI,
84
                 m_all_of(m_MInstr(ExtMI), m_any_of(m_GAnyExt(m_Reg(ExtSrc)),
85
                                                    m_GSExt(m_Reg(ExtSrc)),
86
                                                    m_GZExt(m_Reg(ExtSrc)))))) {
87
      Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
88
      UpdatedDefs.push_back(DstReg);
89
      markInstAndDefDead(MI, *ExtMI, DeadInsts);
90
      return true;
91
    }
92
 
93
    // Try to fold aext(g_constant) when the larger constant type is legal.
94
    auto *SrcMI = MRI.getVRegDef(SrcReg);
95
    if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
96
      const LLT DstTy = MRI.getType(DstReg);
97
      if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
98
        auto &CstVal = SrcMI->getOperand(1);
99
        Builder.buildConstant(
100
            DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits()));
101
        UpdatedDefs.push_back(DstReg);
102
        markInstAndDefDead(MI, *SrcMI, DeadInsts);
103
        return true;
104
      }
105
    }
106
    return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
107
  }
108
 
109
  bool tryCombineZExt(MachineInstr &MI,
110
                      SmallVectorImpl<MachineInstr *> &DeadInsts,
111
                      SmallVectorImpl<Register> &UpdatedDefs,
112
                      GISelObserverWrapper &Observer) {
113
    using namespace llvm::MIPatternMatch;
114
    assert(MI.getOpcode() == TargetOpcode::G_ZEXT);
115
 
116
    Builder.setInstrAndDebugLoc(MI);
117
    Register DstReg = MI.getOperand(0).getReg();
118
    Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
119
 
120
    // zext(trunc x) - > and (aext/copy/trunc x), mask
121
    // zext(sext x) -> and (sext x), mask
122
    Register TruncSrc;
123
    Register SextSrc;
124
    if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))) ||
125
        mi_match(SrcReg, MRI, m_GSExt(m_Reg(SextSrc)))) {
126
      LLT DstTy = MRI.getType(DstReg);
127
      if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) ||
128
          isConstantUnsupported(DstTy))
129
        return false;
130
      LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
131
      LLT SrcTy = MRI.getType(SrcReg);
132
      APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits());
133
      auto Mask = Builder.buildConstant(
134
        DstTy, MaskVal.zext(DstTy.getScalarSizeInBits()));
135
      if (SextSrc && (DstTy != MRI.getType(SextSrc)))
136
        SextSrc = Builder.buildSExtOrTrunc(DstTy, SextSrc).getReg(0);
137
      if (TruncSrc && (DstTy != MRI.getType(TruncSrc)))
138
        TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0);
139
      Builder.buildAnd(DstReg, SextSrc ? SextSrc : TruncSrc, Mask);
140
      markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
141
      return true;
142
    }
143
 
144
    // zext(zext x) -> (zext x)
145
    Register ZextSrc;
146
    if (mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZextSrc)))) {
147
      LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI);
148
      Observer.changingInstr(MI);
149
      MI.getOperand(1).setReg(ZextSrc);
150
      Observer.changedInstr(MI);
151
      UpdatedDefs.push_back(DstReg);
152
      markDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
153
      return true;
154
    }
155
 
156
    // Try to fold zext(g_constant) when the larger constant type is legal.
157
    auto *SrcMI = MRI.getVRegDef(SrcReg);
158
    if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
159
      const LLT DstTy = MRI.getType(DstReg);
160
      if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
161
        auto &CstVal = SrcMI->getOperand(1);
162
        Builder.buildConstant(
163
            DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits()));
164
        UpdatedDefs.push_back(DstReg);
165
        markInstAndDefDead(MI, *SrcMI, DeadInsts);
166
        return true;
167
      }
168
    }
169
    return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
170
  }
171
 
172
  bool tryCombineSExt(MachineInstr &MI,
173
                      SmallVectorImpl<MachineInstr *> &DeadInsts,
174
                      SmallVectorImpl<Register> &UpdatedDefs) {
175
    using namespace llvm::MIPatternMatch;
176
    assert(MI.getOpcode() == TargetOpcode::G_SEXT);
177
 
178
    Builder.setInstrAndDebugLoc(MI);
179
    Register DstReg = MI.getOperand(0).getReg();
180
    Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
181
 
182
    // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c)
183
    Register TruncSrc;
184
    if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
185
      LLT DstTy = MRI.getType(DstReg);
186
      if (isInstUnsupported({TargetOpcode::G_SEXT_INREG, {DstTy}}))
187
        return false;
188
      LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
189
      LLT SrcTy = MRI.getType(SrcReg);
190
      uint64_t SizeInBits = SrcTy.getScalarSizeInBits();
191
      if (DstTy != MRI.getType(TruncSrc))
192
        TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0);
193
      Builder.buildSExtInReg(DstReg, TruncSrc, SizeInBits);
194
      markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
195
      return true;
196
    }
197
 
198
    // sext(zext x) -> (zext x)
199
    // sext(sext x) -> (sext x)
200
    Register ExtSrc;
201
    MachineInstr *ExtMI;
202
    if (mi_match(SrcReg, MRI,
203
                 m_all_of(m_MInstr(ExtMI), m_any_of(m_GZExt(m_Reg(ExtSrc)),
204
                                                    m_GSExt(m_Reg(ExtSrc)))))) {
205
      LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI);
206
      Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
207
      UpdatedDefs.push_back(DstReg);
208
      markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
209
      return true;
210
    }
211
 
212
    // Try to fold sext(g_constant) when the larger constant type is legal.
213
    auto *SrcMI = MRI.getVRegDef(SrcReg);
214
    if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
215
      const LLT DstTy = MRI.getType(DstReg);
216
      if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
217
        auto &CstVal = SrcMI->getOperand(1);
218
        Builder.buildConstant(
219
            DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits()));
220
        UpdatedDefs.push_back(DstReg);
221
        markInstAndDefDead(MI, *SrcMI, DeadInsts);
222
        return true;
223
      }
224
    }
225
 
226
    return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
227
  }
228
 
229
  bool tryCombineTrunc(MachineInstr &MI,
230
                       SmallVectorImpl<MachineInstr *> &DeadInsts,
231
                       SmallVectorImpl<Register> &UpdatedDefs,
232
                       GISelObserverWrapper &Observer) {
233
    using namespace llvm::MIPatternMatch;
234
    assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
235
 
236
    Builder.setInstr(MI);
237
    Register DstReg = MI.getOperand(0).getReg();
238
    Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
239
 
240
    // Try to fold trunc(g_constant) when the smaller constant type is legal.
241
    auto *SrcMI = MRI.getVRegDef(SrcReg);
242
    if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
243
      const LLT DstTy = MRI.getType(DstReg);
244
      if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
245
        auto &CstVal = SrcMI->getOperand(1);
246
        Builder.buildConstant(
247
            DstReg, CstVal.getCImm()->getValue().trunc(DstTy.getSizeInBits()));
248
        UpdatedDefs.push_back(DstReg);
249
        markInstAndDefDead(MI, *SrcMI, DeadInsts);
250
        return true;
251
      }
252
    }
253
 
254
    // Try to fold trunc(merge) to directly use the source of the merge.
255
    // This gets rid of large, difficult to legalize, merges
256
    if (auto *SrcMerge = dyn_cast<GMerge>(SrcMI)) {
257
      const Register MergeSrcReg = SrcMerge->getSourceReg(0);
258
      const LLT MergeSrcTy = MRI.getType(MergeSrcReg);
259
      const LLT DstTy = MRI.getType(DstReg);
260
 
261
      // We can only fold if the types are scalar
262
      const unsigned DstSize = DstTy.getSizeInBits();
263
      const unsigned MergeSrcSize = MergeSrcTy.getSizeInBits();
264
      if (!DstTy.isScalar() || !MergeSrcTy.isScalar())
265
        return false;
266
 
267
      if (DstSize < MergeSrcSize) {
268
        // When the merge source is larger than the destination, we can just
269
        // truncate the merge source directly
270
        if (isInstUnsupported({TargetOpcode::G_TRUNC, {DstTy, MergeSrcTy}}))
271
          return false;
272
 
273
        LLVM_DEBUG(dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_TRUNC: "
274
                          << MI);
275
 
276
        Builder.buildTrunc(DstReg, MergeSrcReg);
277
        UpdatedDefs.push_back(DstReg);
278
      } else if (DstSize == MergeSrcSize) {
279
        // If the sizes match we can simply try to replace the register
280
        LLVM_DEBUG(
281
            dbgs() << "Replacing G_TRUNC(G_MERGE_VALUES) with merge input: "
282
                   << MI);
283
        replaceRegOrBuildCopy(DstReg, MergeSrcReg, MRI, Builder, UpdatedDefs,
284
                              Observer);
285
      } else if (DstSize % MergeSrcSize == 0) {
286
        // If the trunc size is a multiple of the merge source size we can use
287
        // a smaller merge instead
288
        if (isInstUnsupported(
289
                {TargetOpcode::G_MERGE_VALUES, {DstTy, MergeSrcTy}}))
290
          return false;
291
 
292
        LLVM_DEBUG(
293
            dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_MERGE_VALUES: "
294
                   << MI);
295
 
296
        const unsigned NumSrcs = DstSize / MergeSrcSize;
297
        assert(NumSrcs < SrcMI->getNumOperands() - 1 &&
298
               "trunc(merge) should require less inputs than merge");
299
        SmallVector<Register, 8> SrcRegs(NumSrcs);
300
        for (unsigned i = 0; i < NumSrcs; ++i)
301
          SrcRegs[i] = SrcMerge->getSourceReg(i);
302
 
303
        Builder.buildMergeValues(DstReg, SrcRegs);
304
        UpdatedDefs.push_back(DstReg);
305
      } else {
306
        // Unable to combine
307
        return false;
308
      }
309
 
310
      markInstAndDefDead(MI, *SrcMerge, DeadInsts);
311
      return true;
312
    }
313
 
314
    // trunc(trunc) -> trunc
315
    Register TruncSrc;
316
    if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
317
      // Always combine trunc(trunc) since the eventual resulting trunc must be
318
      // legal anyway as it must be legal for all outputs of the consumer type
319
      // set.
320
      LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_TRUNC): " << MI);
321
 
322
      Builder.buildTrunc(DstReg, TruncSrc);
323
      UpdatedDefs.push_back(DstReg);
324
      markInstAndDefDead(MI, *MRI.getVRegDef(TruncSrc), DeadInsts);
325
      return true;
326
    }
327
 
328
    return false;
329
  }
330
 
331
  /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF).
332
  bool tryFoldImplicitDef(MachineInstr &MI,
333
                          SmallVectorImpl<MachineInstr *> &DeadInsts,
334
                          SmallVectorImpl<Register> &UpdatedDefs) {
335
    unsigned Opcode = MI.getOpcode();
336
    assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT ||
337
           Opcode == TargetOpcode::G_SEXT);
338
 
339
    if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF,
340
                                           MI.getOperand(1).getReg(), MRI)) {
341
      Builder.setInstr(MI);
342
      Register DstReg = MI.getOperand(0).getReg();
343
      LLT DstTy = MRI.getType(DstReg);
344
 
345
      if (Opcode == TargetOpcode::G_ANYEXT) {
346
        // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF
347
        if (!isInstLegal({TargetOpcode::G_IMPLICIT_DEF, {DstTy}}))
348
          return false;
349
        LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;);
350
        Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {});
351
        UpdatedDefs.push_back(DstReg);
352
      } else {
353
        // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top
354
        // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT.
355
        if (isConstantUnsupported(DstTy))
356
          return false;
357
        LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;);
358
        Builder.buildConstant(DstReg, 0);
359
        UpdatedDefs.push_back(DstReg);
360
      }
361
 
362
      markInstAndDefDead(MI, *DefMI, DeadInsts);
363
      return true;
364
    }
365
    return false;
366
  }
367
 
368
  bool tryFoldUnmergeCast(MachineInstr &MI, MachineInstr &CastMI,
369
                          SmallVectorImpl<MachineInstr *> &DeadInsts,
370
                          SmallVectorImpl<Register> &UpdatedDefs) {
371
 
372
    assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
373
 
374
    const unsigned CastOpc = CastMI.getOpcode();
375
 
376
    if (!isArtifactCast(CastOpc))
377
      return false;
378
 
379
    const unsigned NumDefs = MI.getNumOperands() - 1;
380
 
381
    const Register CastSrcReg = CastMI.getOperand(1).getReg();
382
    const LLT CastSrcTy = MRI.getType(CastSrcReg);
383
    const LLT DestTy = MRI.getType(MI.getOperand(0).getReg());
384
    const LLT SrcTy = MRI.getType(MI.getOperand(NumDefs).getReg());
385
 
386
    const unsigned CastSrcSize = CastSrcTy.getSizeInBits();
387
    const unsigned DestSize = DestTy.getSizeInBits();
388
 
389
    if (CastOpc == TargetOpcode::G_TRUNC) {
390
      if (SrcTy.isVector() && SrcTy.getScalarType() == DestTy.getScalarType()) {
391
        //  %1:_(<4 x s8>) = G_TRUNC %0(<4 x s32>)
392
        //  %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1
393
        // =>
394
        //  %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32) = G_UNMERGE_VALUES %0
395
        //  %2:_(s8) = G_TRUNC %6
396
        //  %3:_(s8) = G_TRUNC %7
397
        //  %4:_(s8) = G_TRUNC %8
398
        //  %5:_(s8) = G_TRUNC %9
399
 
400
        unsigned UnmergeNumElts =
401
            DestTy.isVector() ? CastSrcTy.getNumElements() / NumDefs : 1;
402
        LLT UnmergeTy = CastSrcTy.changeElementCount(
403
            ElementCount::getFixed(UnmergeNumElts));
404
 
405
        if (isInstUnsupported(
406
                {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}}))
407
          return false;
408
 
409
        Builder.setInstr(MI);
410
        auto NewUnmerge = Builder.buildUnmerge(UnmergeTy, CastSrcReg);
411
 
412
        for (unsigned I = 0; I != NumDefs; ++I) {
413
          Register DefReg = MI.getOperand(I).getReg();
414
          UpdatedDefs.push_back(DefReg);
415
          Builder.buildTrunc(DefReg, NewUnmerge.getReg(I));
416
        }
417
 
418
        markInstAndDefDead(MI, CastMI, DeadInsts);
419
        return true;
420
      }
421
 
422
      if (CastSrcTy.isScalar() && SrcTy.isScalar() && !DestTy.isVector()) {
423
        //  %1:_(s16) = G_TRUNC %0(s32)
424
        //  %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %1
425
        // =>
426
        //  %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0
427
 
428
        // Unmerge(trunc) can be combined if the trunc source size is a multiple
429
        // of the unmerge destination size
430
        if (CastSrcSize % DestSize != 0)
431
          return false;
432
 
433
        // Check if the new unmerge is supported
434
        if (isInstUnsupported(
435
                {TargetOpcode::G_UNMERGE_VALUES, {DestTy, CastSrcTy}}))
436
          return false;
437
 
438
        // Gather the original destination registers and create new ones for the
439
        // unused bits
440
        const unsigned NewNumDefs = CastSrcSize / DestSize;
441
        SmallVector<Register, 8> DstRegs(NewNumDefs);
442
        for (unsigned Idx = 0; Idx < NewNumDefs; ++Idx) {
443
          if (Idx < NumDefs)
444
            DstRegs[Idx] = MI.getOperand(Idx).getReg();
445
          else
446
            DstRegs[Idx] = MRI.createGenericVirtualRegister(DestTy);
447
        }
448
 
449
        // Build new unmerge
450
        Builder.setInstr(MI);
451
        Builder.buildUnmerge(DstRegs, CastSrcReg);
452
        UpdatedDefs.append(DstRegs.begin(), DstRegs.begin() + NewNumDefs);
453
        markInstAndDefDead(MI, CastMI, DeadInsts);
454
        return true;
455
      }
456
    }
457
 
458
    // TODO: support combines with other casts as well
459
    return false;
460
  }
461
 
462
  static bool canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp,
463
                                 LLT OpTy, LLT DestTy) {
464
    // Check if we found a definition that is like G_MERGE_VALUES.
465
    switch (MergeOp) {
466
    default:
467
      return false;
468
    case TargetOpcode::G_BUILD_VECTOR:
469
    case TargetOpcode::G_MERGE_VALUES:
470
      // The convert operation that we will need to insert is
471
      // going to convert the input of that type of instruction (scalar)
472
      // to the destination type (DestTy).
473
      // The conversion needs to stay in the same domain (scalar to scalar
474
      // and vector to vector), so if we were to allow to fold the merge
475
      // we would need to insert some bitcasts.
476
      // E.g.,
477
      // <2 x s16> = build_vector s16, s16
478
      // <2 x s32> = zext <2 x s16>
479
      // <2 x s16>, <2 x s16> = unmerge <2 x s32>
480
      //
481
      // As is the folding would produce:
482
      // <2 x s16> = zext s16  <-- scalar to vector
483
      // <2 x s16> = zext s16  <-- scalar to vector
484
      // Which is invalid.
485
      // Instead we would want to generate:
486
      // s32 = zext s16
487
      // <2 x s16> = bitcast s32
488
      // s32 = zext s16
489
      // <2 x s16> = bitcast s32
490
      //
491
      // That is not done yet.
492
      if (ConvertOp == 0)
493
        return true;
494
      return !DestTy.isVector() && OpTy.isVector() &&
495
             DestTy == OpTy.getElementType();
496
    case TargetOpcode::G_CONCAT_VECTORS: {
497
      if (ConvertOp == 0)
498
        return true;
499
      if (!DestTy.isVector())
500
        return false;
501
 
502
      const unsigned OpEltSize = OpTy.getElementType().getSizeInBits();
503
 
504
      // Don't handle scalarization with a cast that isn't in the same
505
      // direction as the vector cast. This could be handled, but it would
506
      // require more intermediate unmerges.
507
      if (ConvertOp == TargetOpcode::G_TRUNC)
508
        return DestTy.getSizeInBits() <= OpEltSize;
509
      return DestTy.getSizeInBits() >= OpEltSize;
510
    }
511
    }
512
  }
513
 
514
  /// Try to replace DstReg with SrcReg or build a COPY instruction
515
  /// depending on the register constraints.
516
  static void replaceRegOrBuildCopy(Register DstReg, Register SrcReg,
517
                                    MachineRegisterInfo &MRI,
518
                                    MachineIRBuilder &Builder,
519
                                    SmallVectorImpl<Register> &UpdatedDefs,
520
                                    GISelChangeObserver &Observer) {
521
    if (!llvm::canReplaceReg(DstReg, SrcReg, MRI)) {
522
      Builder.buildCopy(DstReg, SrcReg);
523
      UpdatedDefs.push_back(DstReg);
524
      return;
525
    }
526
    SmallVector<MachineInstr *, 4> UseMIs;
527
    // Get the users and notify the observer before replacing.
528
    for (auto &UseMI : MRI.use_instructions(DstReg)) {
529
      UseMIs.push_back(&UseMI);
530
      Observer.changingInstr(UseMI);
531
    }
532
    // Replace the registers.
533
    MRI.replaceRegWith(DstReg, SrcReg);
534
    UpdatedDefs.push_back(SrcReg);
535
    // Notify the observer that we changed the instructions.
536
    for (auto *UseMI : UseMIs)
537
      Observer.changedInstr(*UseMI);
538
  }
539
 
540
  /// Return the operand index in \p MI that defines \p Def
541
  static unsigned getDefIndex(const MachineInstr &MI, Register SearchDef) {
542
    unsigned DefIdx = 0;
543
    for (const MachineOperand &Def : MI.defs()) {
544
      if (Def.getReg() == SearchDef)
545
        break;
546
      ++DefIdx;
547
    }
548
 
549
    return DefIdx;
550
  }
551
 
552
  /// This class provides utilities for finding source registers of specific
553
  /// bit ranges in an artifact. The routines can look through the source
554
  /// registers if they're other artifacts to try to find a non-artifact source
555
  /// of a value.
556
  class ArtifactValueFinder {
557
    MachineRegisterInfo &MRI;
558
    MachineIRBuilder &MIB;
559
    const LegalizerInfo &LI;
560
 
561
    // Stores the best register found in the current query so far.
562
    Register CurrentBest = Register();
563
 
564
    /// Given an concat_vector op \p Concat and a start bit and size, try to
565
    /// find the origin of the value defined by that start position and size.
566
    ///
567
    /// \returns a register with the requested size, or the current best
568
    /// register found during the current query.
569
    Register findValueFromConcat(GConcatVectors &Concat, unsigned StartBit,
570
                                 unsigned Size) {
571
      assert(Size > 0);
572
 
573
      // Find the source operand that provides the bits requested.
574
      Register Src1Reg = Concat.getSourceReg(0);
575
      unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits();
576
 
577
      // Operand index of the source that provides the start of the bit range.
578
      unsigned StartSrcIdx = (StartBit / SrcSize) + 1;
579
      // Offset into the source at which the bit range starts.
580
      unsigned InRegOffset = StartBit % SrcSize;
581
      // Check that the bits don't span multiple sources.
582
      // FIXME: we might be able return multiple sources? Or create an
583
      // appropriate concat to make it fit.
584
      if (InRegOffset + Size > SrcSize)
585
        return CurrentBest;
586
 
587
      Register SrcReg = Concat.getReg(StartSrcIdx);
588
      if (InRegOffset == 0 && Size == SrcSize) {
589
        CurrentBest = SrcReg;
590
        return findValueFromDefImpl(SrcReg, 0, Size);
591
      }
592
 
593
      return findValueFromDefImpl(SrcReg, InRegOffset, Size);
594
    }
595
 
596
    /// Given an build_vector op \p BV and a start bit and size, try to find
597
    /// the origin of the value defined by that start position and size.
598
    ///
599
    /// \returns a register with the requested size, or the current best
600
    /// register found during the current query.
601
    Register findValueFromBuildVector(GBuildVector &BV, unsigned StartBit,
602
                                      unsigned Size) {
603
      assert(Size > 0);
604
 
605
      // Find the source operand that provides the bits requested.
606
      Register Src1Reg = BV.getSourceReg(0);
607
      unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits();
608
 
609
      // Operand index of the source that provides the start of the bit range.
610
      unsigned StartSrcIdx = (StartBit / SrcSize) + 1;
611
      // Offset into the source at which the bit range starts.
612
      unsigned InRegOffset = StartBit % SrcSize;
613
 
614
      if (InRegOffset != 0)
615
        return CurrentBest; // Give up, bits don't start at a scalar source.
616
      if (Size < SrcSize)
617
        return CurrentBest; // Scalar source is too large for requested bits.
618
 
619
      // If the bits cover multiple sources evenly, then create a new
620
      // build_vector to synthesize the required size, if that's been requested.
621
      if (Size > SrcSize) {
622
        if (Size % SrcSize > 0)
623
          return CurrentBest; // Isn't covered exactly by sources.
624
 
625
        unsigned NumSrcsUsed = Size / SrcSize;
626
        // If we're requesting all of the sources, just return this def.
627
        if (NumSrcsUsed == BV.getNumSources())
628
          return BV.getReg(0);
629
 
630
        LLT SrcTy = MRI.getType(Src1Reg);
631
        LLT NewBVTy = LLT::fixed_vector(NumSrcsUsed, SrcTy);
632
 
633
        // Check if the resulting build vector would be legal.
634
        LegalizeActionStep ActionStep =
635
            LI.getAction({TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}});
636
        if (ActionStep.Action != LegalizeActions::Legal)
637
          return CurrentBest;
638
 
639
        SmallVector<Register> NewSrcs;
640
        for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed;
641
             ++SrcIdx)
642
          NewSrcs.push_back(BV.getReg(SrcIdx));
643
        MIB.setInstrAndDebugLoc(BV);
644
        return MIB.buildBuildVector(NewBVTy, NewSrcs).getReg(0);
645
      }
646
      // A single source is requested, just return it.
647
      return BV.getReg(StartSrcIdx);
648
    }
649
 
650
    /// Given an G_INSERT op \p MI and a start bit and size, try to find
651
    /// the origin of the value defined by that start position and size.
652
    ///
653
    /// \returns a register with the requested size, or the current best
654
    /// register found during the current query.
655
    Register findValueFromInsert(MachineInstr &MI, unsigned StartBit,
656
                                 unsigned Size) {
657
      assert(MI.getOpcode() == TargetOpcode::G_INSERT);
658
      assert(Size > 0);
659
 
660
      Register ContainerSrcReg = MI.getOperand(1).getReg();
661
      Register InsertedReg = MI.getOperand(2).getReg();
662
      LLT InsertedRegTy = MRI.getType(InsertedReg);
663
      unsigned InsertOffset = MI.getOperand(3).getImm();
664
 
665
      // There are 4 possible container/insertreg + requested bit-range layouts
666
      // that the instruction and query could be representing.
667
      // For: %_ = G_INSERT %CONTAINER, %INS, InsOff (abbrev. to 'IO')
668
      // and a start bit 'SB', with size S, giving an end bit 'EB', we could
669
      // have...
670
      // Scenario A:
671
      //   --------------------------
672
      //  |  INS    |  CONTAINER     |
673
      //   --------------------------
674
      //       |   |
675
      //       SB  EB
676
      //
677
      // Scenario B:
678
      //   --------------------------
679
      //  |  INS    |  CONTAINER     |
680
      //   --------------------------
681
      //                |    |
682
      //                SB   EB
683
      //
684
      // Scenario C:
685
      //   --------------------------
686
      //  |  CONTAINER    |  INS     |
687
      //   --------------------------
688
      //       |    |
689
      //       SB   EB
690
      //
691
      // Scenario D:
692
      //   --------------------------
693
      //  |  CONTAINER    |  INS     |
694
      //   --------------------------
695
      //                     |   |
696
      //                     SB  EB
697
      //
698
      // So therefore, A and D are requesting data from the INS operand, while
699
      // B and C are requesting from the container operand.
700
 
701
      unsigned InsertedEndBit = InsertOffset + InsertedRegTy.getSizeInBits();
702
      unsigned EndBit = StartBit + Size;
703
      unsigned NewStartBit;
704
      Register SrcRegToUse;
705
      if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) {
706
        SrcRegToUse = ContainerSrcReg;
707
        NewStartBit = StartBit;
708
        return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size);
709
      }
710
      if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) {
711
        SrcRegToUse = InsertedReg;
712
        NewStartBit = StartBit - InsertOffset;
713
        if (NewStartBit == 0 &&
714
            Size == MRI.getType(SrcRegToUse).getSizeInBits())
715
          CurrentBest = SrcRegToUse;
716
        return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size);
717
      }
718
      // The bit range spans both the inserted and container regions.
719
      return Register();
720
    }
721
 
722
    /// Internal implementation for findValueFromDef(). findValueFromDef()
723
    /// initializes some data like the CurrentBest register, which this method
724
    /// and its callees rely upon.
725
    Register findValueFromDefImpl(Register DefReg, unsigned StartBit,
726
                                  unsigned Size) {
727
      std::optional<DefinitionAndSourceRegister> DefSrcReg =
728
          getDefSrcRegIgnoringCopies(DefReg, MRI);
729
      MachineInstr *Def = DefSrcReg->MI;
730
      DefReg = DefSrcReg->Reg;
731
      // If the instruction has a single def, then simply delegate the search.
732
      // For unmerge however with multiple defs, we need to compute the offset
733
      // into the source of the unmerge.
734
      switch (Def->getOpcode()) {
735
      case TargetOpcode::G_CONCAT_VECTORS:
736
        return findValueFromConcat(cast<GConcatVectors>(*Def), StartBit, Size);
737
      case TargetOpcode::G_UNMERGE_VALUES: {
738
        unsigned DefStartBit = 0;
739
        unsigned DefSize = MRI.getType(DefReg).getSizeInBits();
740
        for (const auto &MO : Def->defs()) {
741
          if (MO.getReg() == DefReg)
742
            break;
743
          DefStartBit += DefSize;
744
        }
745
        Register SrcReg = Def->getOperand(Def->getNumOperands() - 1).getReg();
746
        Register SrcOriginReg =
747
            findValueFromDefImpl(SrcReg, StartBit + DefStartBit, Size);
748
        if (SrcOriginReg)
749
          return SrcOriginReg;
750
        // Failed to find a further value. If the StartBit and Size perfectly
751
        // covered the requested DefReg, return that since it's better than
752
        // nothing.
753
        if (StartBit == 0 && Size == DefSize)
754
          return DefReg;
755
        return CurrentBest;
756
      }
757
      case TargetOpcode::G_BUILD_VECTOR:
758
        return findValueFromBuildVector(cast<GBuildVector>(*Def), StartBit,
759
                                        Size);
760
      case TargetOpcode::G_INSERT:
761
        return findValueFromInsert(*Def, StartBit, Size);
762
      default:
763
        return CurrentBest;
764
      }
765
    }
766
 
767
  public:
768
    ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder,
769
                        const LegalizerInfo &Info)
770
        : MRI(Mri), MIB(Builder), LI(Info) {}
771
 
772
    /// Try to find a source of the value defined in the def \p DefReg, starting
773
    /// at position \p StartBit with size \p Size.
774
    /// \returns a register with the requested size, or an empty Register if no
775
    /// better value could be found.
776
    Register findValueFromDef(Register DefReg, unsigned StartBit,
777
                              unsigned Size) {
778
      CurrentBest = Register();
779
      Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size);
780
      return FoundReg != DefReg ? FoundReg : Register();
781
    }
782
 
783
    /// Try to combine the defs of an unmerge \p MI by attempting to find
784
    /// values that provides the bits for each def reg.
785
    /// \returns true if all the defs of the unmerge have been made dead.
786
    bool tryCombineUnmergeDefs(GUnmerge &MI, GISelChangeObserver &Observer,
787
                               SmallVectorImpl<Register> &UpdatedDefs) {
788
      unsigned NumDefs = MI.getNumDefs();
789
      LLT DestTy = MRI.getType(MI.getReg(0));
790
 
791
      SmallBitVector DeadDefs(NumDefs);
792
      for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
793
        Register DefReg = MI.getReg(DefIdx);
794
        if (MRI.use_nodbg_empty(DefReg)) {
795
          DeadDefs[DefIdx] = true;
796
          continue;
797
        }
798
        Register FoundVal = findValueFromDef(DefReg, 0, DestTy.getSizeInBits());
799
        if (!FoundVal)
800
          continue;
801
        if (MRI.getType(FoundVal) != DestTy)
802
          continue;
803
 
804
        replaceRegOrBuildCopy(DefReg, FoundVal, MRI, MIB, UpdatedDefs,
805
                              Observer);
806
        // We only want to replace the uses, not the def of the old reg.
807
        Observer.changingInstr(MI);
808
        MI.getOperand(DefIdx).setReg(DefReg);
809
        Observer.changedInstr(MI);
810
        DeadDefs[DefIdx] = true;
811
      }
812
      return DeadDefs.all();
813
    }
814
 
815
    GUnmerge *findUnmergeThatDefinesReg(Register Reg, unsigned Size,
816
                                        unsigned &DefOperandIdx) {
817
      if (Register Def = findValueFromDefImpl(Reg, 0, Size)) {
818
        if (auto *Unmerge = dyn_cast<GUnmerge>(MRI.getVRegDef(Def))) {
819
          DefOperandIdx = Unmerge->findRegisterDefOperandIdx(Def);
820
          return Unmerge;
821
        }
822
      }
823
      return nullptr;
824
    }
825
 
826
    // Check if sequence of elements from merge-like instruction is defined by
827
    // another sequence of elements defined by unmerge. Most often this is the
828
    // same sequence. Search for elements using findValueFromDefImpl.
829
    bool isSequenceFromUnmerge(GMergeLikeInstr &MI, unsigned MergeStartIdx,
830
                               GUnmerge *Unmerge, unsigned UnmergeIdxStart,
831
                               unsigned NumElts, unsigned EltSize) {
832
      assert(MergeStartIdx + NumElts <= MI.getNumSources());
833
      for (unsigned i = MergeStartIdx; i < MergeStartIdx + NumElts; ++i) {
834
        unsigned EltUnmergeIdx;
835
        GUnmerge *EltUnmerge = findUnmergeThatDefinesReg(
836
            MI.getSourceReg(i), EltSize, EltUnmergeIdx);
837
        // Check if source i comes from the same Unmerge.
838
        if (!EltUnmerge || EltUnmerge != Unmerge)
839
          return false;
840
        // Check that source i's def has same index in sequence in Unmerge.
841
        if (i - MergeStartIdx != EltUnmergeIdx - UnmergeIdxStart)
842
          return false;
843
      }
844
      return true;
845
    }
846
 
847
    bool tryCombineMergeLike(GMergeLikeInstr &MI,
848
                             SmallVectorImpl<MachineInstr *> &DeadInsts,
849
                             SmallVectorImpl<Register> &UpdatedDefs,
850
                             GISelChangeObserver &Observer) {
851
      Register Elt0 = MI.getSourceReg(0);
852
      LLT EltTy = MRI.getType(Elt0);
853
      unsigned EltSize = EltTy.getSizeInBits();
854
 
855
      unsigned Elt0UnmergeIdx;
856
      // Search for unmerge that will be candidate for combine.
857
      auto *Unmerge = findUnmergeThatDefinesReg(Elt0, EltSize, Elt0UnmergeIdx);
858
      if (!Unmerge)
859
        return false;
860
 
861
      unsigned NumMIElts = MI.getNumSources();
862
      Register Dst = MI.getReg(0);
863
      LLT DstTy = MRI.getType(Dst);
864
      Register UnmergeSrc = Unmerge->getSourceReg();
865
      LLT UnmergeSrcTy = MRI.getType(UnmergeSrc);
866
 
867
      // Recognize copy of UnmergeSrc to Dst.
868
      // Unmerge UnmergeSrc and reassemble it using merge-like opcode into Dst.
869
      //
870
      // %0:_(EltTy), %1, ... = G_UNMERGE_VALUES %UnmergeSrc:_(Ty)
871
      // %Dst:_(Ty) = G_merge_like_opcode %0:_(EltTy), %1, ...
872
      //
873
      // %Dst:_(Ty) = COPY %UnmergeSrc:_(Ty)
874
      if ((DstTy == UnmergeSrcTy) && (Elt0UnmergeIdx == 0)) {
875
        if (!isSequenceFromUnmerge(MI, 0, Unmerge, 0, NumMIElts, EltSize))
876
          return false;
877
        replaceRegOrBuildCopy(Dst, UnmergeSrc, MRI, MIB, UpdatedDefs, Observer);
878
        DeadInsts.push_back(&MI);
879
        return true;
880
      }
881
 
882
      // Recognize UnmergeSrc that can be unmerged to DstTy directly.
883
      // Types have to be either both vector or both non-vector types.
884
      // Merge-like opcodes are combined one at the time. First one creates new
885
      // unmerge, following should use the same unmerge (builder performs CSE).
886
      //
887
      // %0:_(EltTy), %1, %2, %3 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy)
888
      // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1
889
      // %AnotherDst:_(DstTy) = G_merge_like_opcode %2:_(EltTy), %3
890
      //
891
      // %Dst:_(DstTy), %AnotherDst = G_UNMERGE_VALUES %UnmergeSrc
892
      if ((DstTy.isVector() == UnmergeSrcTy.isVector()) &&
893
          (Elt0UnmergeIdx % NumMIElts == 0) &&
894
          getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) {
895
        if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts,
896
                                   EltSize))
897
          return false;
898
        MIB.setInstrAndDebugLoc(MI);
899
        auto NewUnmerge = MIB.buildUnmerge(DstTy, Unmerge->getSourceReg());
900
        unsigned DstIdx = (Elt0UnmergeIdx * EltSize) / DstTy.getSizeInBits();
901
        replaceRegOrBuildCopy(Dst, NewUnmerge.getReg(DstIdx), MRI, MIB,
902
                              UpdatedDefs, Observer);
903
        DeadInsts.push_back(&MI);
904
        return true;
905
      }
906
 
907
      // Recognize when multiple unmerged sources with UnmergeSrcTy type
908
      // can be merged into Dst with DstTy type directly.
909
      // Types have to be either both vector or both non-vector types.
910
 
911
      // %0:_(EltTy), %1 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy)
912
      // %2:_(EltTy), %3 = G_UNMERGE_VALUES %AnotherUnmergeSrc:_(UnmergeSrcTy)
913
      // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1, %2, %3
914
      //
915
      // %Dst:_(DstTy) = G_merge_like_opcode %UnmergeSrc, %AnotherUnmergeSrc
916
 
917
      if ((DstTy.isVector() == UnmergeSrcTy.isVector()) &&
918
          getCoverTy(DstTy, UnmergeSrcTy) == DstTy) {
919
        SmallVector<Register, 4> ConcatSources;
920
        unsigned NumElts = Unmerge->getNumDefs();
921
        for (unsigned i = 0; i < MI.getNumSources(); i += NumElts) {
922
          unsigned EltUnmergeIdx;
923
          auto *UnmergeI = findUnmergeThatDefinesReg(MI.getSourceReg(i),
924
                                                     EltSize, EltUnmergeIdx);
925
          // All unmerges have to be the same size.
926
          if ((!UnmergeI) || (UnmergeI->getNumDefs() != NumElts) ||
927
              (EltUnmergeIdx != 0))
928
            return false;
929
          if (!isSequenceFromUnmerge(MI, i, UnmergeI, 0, NumElts, EltSize))
930
            return false;
931
          ConcatSources.push_back(UnmergeI->getSourceReg());
932
        }
933
 
934
        MIB.setInstrAndDebugLoc(MI);
935
        MIB.buildMergeLikeInstr(Dst, ConcatSources);
936
        DeadInsts.push_back(&MI);
937
        return true;
938
      }
939
 
940
      return false;
941
    }
942
  };
943
 
944
  bool tryCombineUnmergeValues(GUnmerge &MI,
945
                               SmallVectorImpl<MachineInstr *> &DeadInsts,
946
                               SmallVectorImpl<Register> &UpdatedDefs,
947
                               GISelChangeObserver &Observer) {
948
    unsigned NumDefs = MI.getNumDefs();
949
    Register SrcReg = MI.getSourceReg();
950
    MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI);
951
    if (!SrcDef)
952
      return false;
953
 
954
    LLT OpTy = MRI.getType(SrcReg);
955
    LLT DestTy = MRI.getType(MI.getReg(0));
956
    unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg);
957
 
958
    Builder.setInstrAndDebugLoc(MI);
959
 
960
    ArtifactValueFinder Finder(MRI, Builder, LI);
961
    if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) {
962
      markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx);
963
      return true;
964
    }
965
 
966
    if (auto *SrcUnmerge = dyn_cast<GUnmerge>(SrcDef)) {
967
      // %0:_(<4 x s16>) = G_FOO
968
      // %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0
969
      // %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1
970
      //
971
      // %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0
972
      Register SrcUnmergeSrc = SrcUnmerge->getSourceReg();
973
      LLT SrcUnmergeSrcTy = MRI.getType(SrcUnmergeSrc);
974
 
975
      // If we need to decrease the number of vector elements in the result type
976
      // of an unmerge, this would involve the creation of an equivalent unmerge
977
      // to copy back to the original result registers.
978
      LegalizeActionStep ActionStep = LI.getAction(
979
          {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}});
980
      switch (ActionStep.Action) {
981
      case LegalizeActions::Lower:
982
      case LegalizeActions::Unsupported:
983
        break;
984
      case LegalizeActions::FewerElements:
985
      case LegalizeActions::NarrowScalar:
986
        if (ActionStep.TypeIdx == 1)
987
          return false;
988
        break;
989
      default:
990
        return false;
991
      }
992
 
993
      auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc);
994
 
995
      // TODO: Should we try to process out the other defs now? If the other
996
      // defs of the source unmerge are also unmerged, we end up with a separate
997
      // unmerge for each one.
998
      for (unsigned I = 0; I != NumDefs; ++I) {
999
        Register Def = MI.getReg(I);
1000
        replaceRegOrBuildCopy(Def, NewUnmerge.getReg(SrcDefIdx * NumDefs + I),
1001
                              MRI, Builder, UpdatedDefs, Observer);
1002
      }
1003
 
1004
      markInstAndDefDead(MI, *SrcUnmerge, DeadInsts, SrcDefIdx);
1005
      return true;
1006
    }
1007
 
1008
    MachineInstr *MergeI = SrcDef;
1009
    unsigned ConvertOp = 0;
1010
 
1011
    // Handle intermediate conversions
1012
    unsigned SrcOp = SrcDef->getOpcode();
1013
    if (isArtifactCast(SrcOp)) {
1014
      ConvertOp = SrcOp;
1015
      MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI);
1016
    }
1017
 
1018
    if (!MergeI || !canFoldMergeOpcode(MergeI->getOpcode(),
1019
                                       ConvertOp, OpTy, DestTy)) {
1020
      // We might have a chance to combine later by trying to combine
1021
      // unmerge(cast) first
1022
      return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs);
1023
    }
1024
 
1025
    const unsigned NumMergeRegs = MergeI->getNumOperands() - 1;
1026
 
1027
    if (NumMergeRegs < NumDefs) {
1028
      if (NumDefs % NumMergeRegs != 0)
1029
        return false;
1030
 
1031
      Builder.setInstr(MI);
1032
      // Transform to UNMERGEs, for example
1033
      //   %1 = G_MERGE_VALUES %4, %5
1034
      //   %9, %10, %11, %12 = G_UNMERGE_VALUES %1
1035
      // to
1036
      //   %9, %10 = G_UNMERGE_VALUES %4
1037
      //   %11, %12 = G_UNMERGE_VALUES %5
1038
 
1039
      const unsigned NewNumDefs = NumDefs / NumMergeRegs;
1040
      for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) {
1041
        SmallVector<Register, 8> DstRegs;
1042
        for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs;
1043
             ++j, ++DefIdx)
1044
          DstRegs.push_back(MI.getReg(DefIdx));
1045
 
1046
        if (ConvertOp) {
1047
          LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
1048
 
1049
          // This is a vector that is being split and casted. Extract to the
1050
          // element type, and do the conversion on the scalars (or smaller
1051
          // vectors).
1052
          LLT MergeEltTy = MergeSrcTy.divide(NewNumDefs);
1053
 
1054
          // Handle split to smaller vectors, with conversions.
1055
          // %2(<8 x s8>) = G_CONCAT_VECTORS %0(<4 x s8>), %1(<4 x s8>)
1056
          // %3(<8 x s16>) = G_SEXT %2
1057
          // %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>), %7(<2 x s16>) = G_UNMERGE_VALUES %3
1058
          //
1059
          // =>
1060
          //
1061
          // %8(<2 x s8>), %9(<2 x s8>) = G_UNMERGE_VALUES %0
1062
          // %10(<2 x s8>), %11(<2 x s8>) = G_UNMERGE_VALUES %1
1063
          // %4(<2 x s16>) = G_SEXT %8
1064
          // %5(<2 x s16>) = G_SEXT %9
1065
          // %6(<2 x s16>) = G_SEXT %10
1066
          // %7(<2 x s16>)= G_SEXT %11
1067
 
1068
          SmallVector<Register, 4> TmpRegs(NewNumDefs);
1069
          for (unsigned k = 0; k < NewNumDefs; ++k)
1070
            TmpRegs[k] = MRI.createGenericVirtualRegister(MergeEltTy);
1071
 
1072
          Builder.buildUnmerge(TmpRegs, MergeI->getOperand(Idx + 1).getReg());
1073
 
1074
          for (unsigned k = 0; k < NewNumDefs; ++k)
1075
            Builder.buildInstr(ConvertOp, {DstRegs[k]}, {TmpRegs[k]});
1076
        } else {
1077
          Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg());
1078
        }
1079
        UpdatedDefs.append(DstRegs.begin(), DstRegs.end());
1080
      }
1081
 
1082
    } else if (NumMergeRegs > NumDefs) {
1083
      if (ConvertOp != 0 || NumMergeRegs % NumDefs != 0)
1084
        return false;
1085
 
1086
      Builder.setInstr(MI);
1087
      // Transform to MERGEs
1088
      //   %6 = G_MERGE_VALUES %17, %18, %19, %20
1089
      //   %7, %8 = G_UNMERGE_VALUES %6
1090
      // to
1091
      //   %7 = G_MERGE_VALUES %17, %18
1092
      //   %8 = G_MERGE_VALUES %19, %20
1093
 
1094
      const unsigned NumRegs = NumMergeRegs / NumDefs;
1095
      for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
1096
        SmallVector<Register, 8> Regs;
1097
        for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs;
1098
             ++j, ++Idx)
1099
          Regs.push_back(MergeI->getOperand(Idx).getReg());
1100
 
1101
        Register DefReg = MI.getReg(DefIdx);
1102
        Builder.buildMergeLikeInstr(DefReg, Regs);
1103
        UpdatedDefs.push_back(DefReg);
1104
      }
1105
 
1106
    } else {
1107
      LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
1108
 
1109
      if (!ConvertOp && DestTy != MergeSrcTy)
1110
        ConvertOp = TargetOpcode::G_BITCAST;
1111
 
1112
      if (ConvertOp) {
1113
        Builder.setInstr(MI);
1114
 
1115
        for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
1116
          Register DefReg = MI.getOperand(Idx).getReg();
1117
          Register MergeSrc = MergeI->getOperand(Idx + 1).getReg();
1118
 
1119
          if (!MRI.use_empty(DefReg)) {
1120
            Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc});
1121
            UpdatedDefs.push_back(DefReg);
1122
          }
1123
        }
1124
 
1125
        markInstAndDefDead(MI, *MergeI, DeadInsts);
1126
        return true;
1127
      }
1128
 
1129
      assert(DestTy == MergeSrcTy &&
1130
             "Bitcast and the other kinds of conversions should "
1131
             "have happened earlier");
1132
 
1133
      Builder.setInstr(MI);
1134
      for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
1135
        Register DstReg = MI.getOperand(Idx).getReg();
1136
        Register SrcReg = MergeI->getOperand(Idx + 1).getReg();
1137
        replaceRegOrBuildCopy(DstReg, SrcReg, MRI, Builder, UpdatedDefs,
1138
                              Observer);
1139
      }
1140
    }
1141
 
1142
    markInstAndDefDead(MI, *MergeI, DeadInsts);
1143
    return true;
1144
  }
1145
 
1146
  bool tryCombineExtract(MachineInstr &MI,
1147
                         SmallVectorImpl<MachineInstr *> &DeadInsts,
1148
                         SmallVectorImpl<Register> &UpdatedDefs) {
1149
    assert(MI.getOpcode() == TargetOpcode::G_EXTRACT);
1150
 
1151
    // Try to use the source registers from a G_MERGE_VALUES
1152
    //
1153
    // %2 = G_MERGE_VALUES %0, %1
1154
    // %3 = G_EXTRACT %2, N
1155
    // =>
1156
    //
1157
    // for N < %2.getSizeInBits() / 2
1158
    //     %3 = G_EXTRACT %0, N
1159
    //
1160
    // for N >= %2.getSizeInBits() / 2
1161
    //    %3 = G_EXTRACT %1, (N - %0.getSizeInBits()
1162
 
1163
    Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
1164
    MachineInstr *MergeI = MRI.getVRegDef(SrcReg);
1165
    if (!MergeI || !isa<GMergeLikeInstr>(MergeI))
1166
      return false;
1167
 
1168
    Register DstReg = MI.getOperand(0).getReg();
1169
    LLT DstTy = MRI.getType(DstReg);
1170
    LLT SrcTy = MRI.getType(SrcReg);
1171
 
1172
    // TODO: Do we need to check if the resulting extract is supported?
1173
    unsigned ExtractDstSize = DstTy.getSizeInBits();
1174
    unsigned Offset = MI.getOperand(2).getImm();
1175
    unsigned NumMergeSrcs = MergeI->getNumOperands() - 1;
1176
    unsigned MergeSrcSize = SrcTy.getSizeInBits() / NumMergeSrcs;
1177
    unsigned MergeSrcIdx = Offset / MergeSrcSize;
1178
 
1179
    // Compute the offset of the last bit the extract needs.
1180
    unsigned EndMergeSrcIdx = (Offset + ExtractDstSize - 1) / MergeSrcSize;
1181
 
1182
    // Can't handle the case where the extract spans multiple inputs.
1183
    if (MergeSrcIdx != EndMergeSrcIdx)
1184
      return false;
1185
 
1186
    // TODO: We could modify MI in place in most cases.
1187
    Builder.setInstr(MI);
1188
    Builder.buildExtract(DstReg, MergeI->getOperand(MergeSrcIdx + 1).getReg(),
1189
                         Offset - MergeSrcIdx * MergeSrcSize);
1190
    UpdatedDefs.push_back(DstReg);
1191
    markInstAndDefDead(MI, *MergeI, DeadInsts);
1192
    return true;
1193
  }
1194
 
1195
  /// Try to combine away MI.
1196
  /// Returns true if it combined away the MI.
1197
  /// Adds instructions that are dead as a result of the combine
1198
  /// into DeadInsts, which can include MI.
1199
  bool tryCombineInstruction(MachineInstr &MI,
1200
                             SmallVectorImpl<MachineInstr *> &DeadInsts,
1201
                             GISelObserverWrapper &WrapperObserver) {
1202
    ArtifactValueFinder Finder(MRI, Builder, LI);
1203
 
1204
    // This might be a recursive call, and we might have DeadInsts already
1205
    // populated. To avoid bad things happening later with multiple vreg defs
1206
    // etc, process the dead instructions now if any.
1207
    if (!DeadInsts.empty())
1208
      deleteMarkedDeadInsts(DeadInsts, WrapperObserver);
1209
 
1210
    // Put here every vreg that was redefined in such a way that it's at least
1211
    // possible that one (or more) of its users (immediate or COPY-separated)
1212
    // could become artifact combinable with the new definition (or the
1213
    // instruction reachable from it through a chain of copies if any).
1214
    SmallVector<Register, 4> UpdatedDefs;
1215
    bool Changed = false;
1216
    switch (MI.getOpcode()) {
1217
    default:
1218
      return false;
1219
    case TargetOpcode::G_ANYEXT:
1220
      Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
1221
      break;
1222
    case TargetOpcode::G_ZEXT:
1223
      Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
1224
      break;
1225
    case TargetOpcode::G_SEXT:
1226
      Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs);
1227
      break;
1228
    case TargetOpcode::G_UNMERGE_VALUES:
1229
      Changed = tryCombineUnmergeValues(cast<GUnmerge>(MI), DeadInsts,
1230
                                        UpdatedDefs, WrapperObserver);
1231
      break;
1232
    case TargetOpcode::G_MERGE_VALUES:
1233
    case TargetOpcode::G_BUILD_VECTOR:
1234
    case TargetOpcode::G_CONCAT_VECTORS:
1235
      // If any of the users of this merge are an unmerge, then add them to the
1236
      // artifact worklist in case there's folding that can be done looking up.
1237
      for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) {
1238
        if (U.getOpcode() == TargetOpcode::G_UNMERGE_VALUES ||
1239
            U.getOpcode() == TargetOpcode::G_TRUNC) {
1240
          UpdatedDefs.push_back(MI.getOperand(0).getReg());
1241
          break;
1242
        }
1243
      }
1244
      Changed = Finder.tryCombineMergeLike(cast<GMergeLikeInstr>(MI), DeadInsts,
1245
                                           UpdatedDefs, WrapperObserver);
1246
      break;
1247
    case TargetOpcode::G_EXTRACT:
1248
      Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs);
1249
      break;
1250
    case TargetOpcode::G_TRUNC:
1251
      Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs, WrapperObserver);
1252
      if (!Changed) {
1253
        // Try to combine truncates away even if they are legal. As all artifact
1254
        // combines at the moment look only "up" the def-use chains, we achieve
1255
        // that by throwing truncates' users (with look through copies) into the
1256
        // ArtifactList again.
1257
        UpdatedDefs.push_back(MI.getOperand(0).getReg());
1258
      }
1259
      break;
1260
    }
1261
    // If the main loop through the ArtifactList found at least one combinable
1262
    // pair of artifacts, not only combine it away (as done above), but also
1263
    // follow the def-use chain from there to combine everything that can be
1264
    // combined within this def-use chain of artifacts.
1265
    while (!UpdatedDefs.empty()) {
1266
      Register NewDef = UpdatedDefs.pop_back_val();
1267
      assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg");
1268
      for (MachineInstr &Use : MRI.use_instructions(NewDef)) {
1269
        switch (Use.getOpcode()) {
1270
        // Keep this list in sync with the list of all artifact combines.
1271
        case TargetOpcode::G_ANYEXT:
1272
        case TargetOpcode::G_ZEXT:
1273
        case TargetOpcode::G_SEXT:
1274
        case TargetOpcode::G_UNMERGE_VALUES:
1275
        case TargetOpcode::G_EXTRACT:
1276
        case TargetOpcode::G_TRUNC:
1277
        case TargetOpcode::G_BUILD_VECTOR:
1278
          // Adding Use to ArtifactList.
1279
          WrapperObserver.changedInstr(Use);
1280
          break;
1281
        case TargetOpcode::COPY: {
1282
          Register Copy = Use.getOperand(0).getReg();
1283
          if (Copy.isVirtual())
1284
            UpdatedDefs.push_back(Copy);
1285
          break;
1286
        }
1287
        default:
1288
          // If we do not have an artifact combine for the opcode, there is no
1289
          // point in adding it to the ArtifactList as nothing interesting will
1290
          // be done to it anyway.
1291
          break;
1292
        }
1293
      }
1294
    }
1295
    return Changed;
1296
  }
1297
 
1298
private:
1299
  static Register getArtifactSrcReg(const MachineInstr &MI) {
1300
    switch (MI.getOpcode()) {
1301
    case TargetOpcode::COPY:
1302
    case TargetOpcode::G_TRUNC:
1303
    case TargetOpcode::G_ZEXT:
1304
    case TargetOpcode::G_ANYEXT:
1305
    case TargetOpcode::G_SEXT:
1306
    case TargetOpcode::G_EXTRACT:
1307
      return MI.getOperand(1).getReg();
1308
    case TargetOpcode::G_UNMERGE_VALUES:
1309
      return MI.getOperand(MI.getNumOperands() - 1).getReg();
1310
    default:
1311
      llvm_unreachable("Not a legalization artifact happen");
1312
    }
1313
  }
1314
 
1315
  /// Mark a def of one of MI's original operands, DefMI, as dead if changing MI
1316
  /// (either by killing it or changing operands) results in DefMI being dead
1317
  /// too. In-between COPYs or artifact-casts are also collected if they are
1318
  /// dead.
1319
  /// MI is not marked dead.
1320
  void markDefDead(MachineInstr &MI, MachineInstr &DefMI,
1321
                   SmallVectorImpl<MachineInstr *> &DeadInsts,
1322
                   unsigned DefIdx = 0) {
1323
    // Collect all the copy instructions that are made dead, due to deleting
1324
    // this instruction. Collect all of them until the Trunc(DefMI).
1325
    // Eg,
1326
    // %1(s1) = G_TRUNC %0(s32)
1327
    // %2(s1) = COPY %1(s1)
1328
    // %3(s1) = COPY %2(s1)
1329
    // %4(s32) = G_ANYEXT %3(s1)
1330
    // In this case, we would have replaced %4 with a copy of %0,
1331
    // and as a result, %3, %2, %1 are dead.
1332
    MachineInstr *PrevMI = &MI;
1333
    while (PrevMI != &DefMI) {
1334
      Register PrevRegSrc = getArtifactSrcReg(*PrevMI);
1335
 
1336
      MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc);
1337
      if (MRI.hasOneUse(PrevRegSrc)) {
1338
        if (TmpDef != &DefMI) {
1339
          assert((TmpDef->getOpcode() == TargetOpcode::COPY ||
1340
                  isArtifactCast(TmpDef->getOpcode())) &&
1341
                 "Expecting copy or artifact cast here");
1342
 
1343
          DeadInsts.push_back(TmpDef);
1344
        }
1345
      } else
1346
        break;
1347
      PrevMI = TmpDef;
1348
    }
1349
 
1350
    if (PrevMI == &DefMI) {
1351
      unsigned I = 0;
1352
      bool IsDead = true;
1353
      for (MachineOperand &Def : DefMI.defs()) {
1354
        if (I != DefIdx) {
1355
          if (!MRI.use_empty(Def.getReg())) {
1356
            IsDead = false;
1357
            break;
1358
          }
1359
        } else {
1360
          if (!MRI.hasOneUse(DefMI.getOperand(DefIdx).getReg()))
1361
            break;
1362
        }
1363
 
1364
        ++I;
1365
      }
1366
 
1367
      if (IsDead)
1368
        DeadInsts.push_back(&DefMI);
1369
    }
1370
  }
1371
 
1372
  /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be
1373
  /// dead due to MI being killed, then mark DefMI as dead too.
1374
  /// Some of the combines (extends(trunc)), try to walk through redundant
1375
  /// copies in between the extends and the truncs, and this attempts to collect
1376
  /// the in between copies if they're dead.
1377
  void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI,
1378
                          SmallVectorImpl<MachineInstr *> &DeadInsts,
1379
                          unsigned DefIdx = 0) {
1380
    DeadInsts.push_back(&MI);
1381
    markDefDead(MI, DefMI, DeadInsts, DefIdx);
1382
  }
1383
 
1384
  /// Erase the dead instructions in the list and call the observer hooks.
1385
  /// Normally the Legalizer will deal with erasing instructions that have been
1386
  /// marked dead. However, for the trunc(ext(x)) cases we can end up trying to
1387
  /// process instructions which have been marked dead, but otherwise break the
1388
  /// MIR by introducing multiple vreg defs. For those cases, allow the combines
1389
  /// to explicitly delete the instructions before we run into trouble.
1390
  void deleteMarkedDeadInsts(SmallVectorImpl<MachineInstr *> &DeadInsts,
1391
                             GISelObserverWrapper &WrapperObserver) {
1392
    for (auto *DeadMI : DeadInsts) {
1393
      LLVM_DEBUG(dbgs() << *DeadMI << "Is dead, eagerly deleting\n");
1394
      WrapperObserver.erasingInstr(*DeadMI);
1395
      DeadMI->eraseFromParent();
1396
    }
1397
    DeadInsts.clear();
1398
  }
1399
 
1400
  /// Checks if the target legalizer info has specified anything about the
1401
  /// instruction, or if unsupported.
1402
  bool isInstUnsupported(const LegalityQuery &Query) const {
1403
    using namespace LegalizeActions;
1404
    auto Step = LI.getAction(Query);
1405
    return Step.Action == Unsupported || Step.Action == NotFound;
1406
  }
1407
 
1408
  bool isInstLegal(const LegalityQuery &Query) const {
1409
    return LI.getAction(Query).Action == LegalizeActions::Legal;
1410
  }
1411
 
1412
  bool isConstantUnsupported(LLT Ty) const {
1413
    if (!Ty.isVector())
1414
      return isInstUnsupported({TargetOpcode::G_CONSTANT, {Ty}});
1415
 
1416
    LLT EltTy = Ty.getElementType();
1417
    return isInstUnsupported({TargetOpcode::G_CONSTANT, {EltTy}}) ||
1418
           isInstUnsupported({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}});
1419
  }
1420
 
1421
  /// Looks through copy instructions and returns the actual
1422
  /// source register.
1423
  Register lookThroughCopyInstrs(Register Reg) {
1424
    using namespace llvm::MIPatternMatch;
1425
 
1426
    Register TmpReg;
1427
    while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) {
1428
      if (MRI.getType(TmpReg).isValid())
1429
        Reg = TmpReg;
1430
      else
1431
        break;
1432
    }
1433
    return Reg;
1434
  }
1435
};
1436
 
1437
} // namespace llvm
1438
 
1439
#endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H