Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | //===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h -----*- C++ -*-// |
| 2 | // |
||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 6 | // |
||
| 7 | //===----------------------------------------------------------------------===// |
||
| 8 | // This file contains some helper functions which try to cleanup artifacts |
||
| 9 | // such as G_TRUNCs/G_[ZSA]EXTENDS that were created during legalization to make |
||
| 10 | // the types match. This file also contains some combines of merges that happens |
||
| 11 | // at the end of the legalization. |
||
| 12 | //===----------------------------------------------------------------------===// |
||
| 13 | |||
| 14 | #ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H |
||
| 15 | #define LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H |
||
| 16 | |||
| 17 | #include "llvm/ADT/SmallBitVector.h" |
||
| 18 | #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" |
||
| 19 | #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" |
||
| 20 | #include "llvm/CodeGen/GlobalISel/Legalizer.h" |
||
| 21 | #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" |
||
| 22 | #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" |
||
| 23 | #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" |
||
| 24 | #include "llvm/CodeGen/GlobalISel/Utils.h" |
||
| 25 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
||
| 26 | #include "llvm/CodeGen/Register.h" |
||
| 27 | #include "llvm/IR/Constants.h" |
||
| 28 | #include "llvm/Support/Debug.h" |
||
| 29 | |||
| 30 | #define DEBUG_TYPE "legalizer" |
||
| 31 | |||
| 32 | namespace llvm { |
||
| 33 | class LegalizationArtifactCombiner { |
||
| 34 | MachineIRBuilder &Builder; |
||
| 35 | MachineRegisterInfo &MRI; |
||
| 36 | const LegalizerInfo &LI; |
||
| 37 | |||
| 38 | static bool isArtifactCast(unsigned Opc) { |
||
| 39 | switch (Opc) { |
||
| 40 | case TargetOpcode::G_TRUNC: |
||
| 41 | case TargetOpcode::G_SEXT: |
||
| 42 | case TargetOpcode::G_ZEXT: |
||
| 43 | case TargetOpcode::G_ANYEXT: |
||
| 44 | return true; |
||
| 45 | default: |
||
| 46 | return false; |
||
| 47 | } |
||
| 48 | } |
||
| 49 | |||
| 50 | public: |
||
| 51 | LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI, |
||
| 52 | const LegalizerInfo &LI) |
||
| 53 | : Builder(B), MRI(MRI), LI(LI) {} |
||
| 54 | |||
| 55 | bool tryCombineAnyExt(MachineInstr &MI, |
||
| 56 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
| 57 | SmallVectorImpl<Register> &UpdatedDefs, |
||
| 58 | GISelObserverWrapper &Observer) { |
||
| 59 | using namespace llvm::MIPatternMatch; |
||
| 60 | assert(MI.getOpcode() == TargetOpcode::G_ANYEXT); |
||
| 61 | |||
| 62 | Builder.setInstrAndDebugLoc(MI); |
||
| 63 | Register DstReg = MI.getOperand(0).getReg(); |
||
| 64 | Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); |
||
| 65 | |||
| 66 | // aext(trunc x) - > aext/copy/trunc x |
||
| 67 | Register TruncSrc; |
||
| 68 | if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { |
||
| 69 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); |
||
| 70 | if (MRI.getType(DstReg) == MRI.getType(TruncSrc)) |
||
| 71 | replaceRegOrBuildCopy(DstReg, TruncSrc, MRI, Builder, UpdatedDefs, |
||
| 72 | Observer); |
||
| 73 | else |
||
| 74 | Builder.buildAnyExtOrTrunc(DstReg, TruncSrc); |
||
| 75 | UpdatedDefs.push_back(DstReg); |
||
| 76 | markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); |
||
| 77 | return true; |
||
| 78 | } |
||
| 79 | |||
| 80 | // aext([asz]ext x) -> [asz]ext x |
||
| 81 | Register ExtSrc; |
||
| 82 | MachineInstr *ExtMI; |
||
| 83 | if (mi_match(SrcReg, MRI, |
||
| 84 | m_all_of(m_MInstr(ExtMI), m_any_of(m_GAnyExt(m_Reg(ExtSrc)), |
||
| 85 | m_GSExt(m_Reg(ExtSrc)), |
||
| 86 | m_GZExt(m_Reg(ExtSrc)))))) { |
||
| 87 | Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc}); |
||
| 88 | UpdatedDefs.push_back(DstReg); |
||
| 89 | markInstAndDefDead(MI, *ExtMI, DeadInsts); |
||
| 90 | return true; |
||
| 91 | } |
||
| 92 | |||
| 93 | // Try to fold aext(g_constant) when the larger constant type is legal. |
||
| 94 | auto *SrcMI = MRI.getVRegDef(SrcReg); |
||
| 95 | if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
||
| 96 | const LLT DstTy = MRI.getType(DstReg); |
||
| 97 | if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { |
||
| 98 | auto &CstVal = SrcMI->getOperand(1); |
||
| 99 | Builder.buildConstant( |
||
| 100 | DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits())); |
||
| 101 | UpdatedDefs.push_back(DstReg); |
||
| 102 | markInstAndDefDead(MI, *SrcMI, DeadInsts); |
||
| 103 | return true; |
||
| 104 | } |
||
| 105 | } |
||
| 106 | return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); |
||
| 107 | } |
||
| 108 | |||
| 109 | bool tryCombineZExt(MachineInstr &MI, |
||
| 110 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
| 111 | SmallVectorImpl<Register> &UpdatedDefs, |
||
| 112 | GISelObserverWrapper &Observer) { |
||
| 113 | using namespace llvm::MIPatternMatch; |
||
| 114 | assert(MI.getOpcode() == TargetOpcode::G_ZEXT); |
||
| 115 | |||
| 116 | Builder.setInstrAndDebugLoc(MI); |
||
| 117 | Register DstReg = MI.getOperand(0).getReg(); |
||
| 118 | Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); |
||
| 119 | |||
| 120 | // zext(trunc x) - > and (aext/copy/trunc x), mask |
||
| 121 | // zext(sext x) -> and (sext x), mask |
||
| 122 | Register TruncSrc; |
||
| 123 | Register SextSrc; |
||
| 124 | if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))) || |
||
| 125 | mi_match(SrcReg, MRI, m_GSExt(m_Reg(SextSrc)))) { |
||
| 126 | LLT DstTy = MRI.getType(DstReg); |
||
| 127 | if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) || |
||
| 128 | isConstantUnsupported(DstTy)) |
||
| 129 | return false; |
||
| 130 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); |
||
| 131 | LLT SrcTy = MRI.getType(SrcReg); |
||
| 132 | APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits()); |
||
| 133 | auto Mask = Builder.buildConstant( |
||
| 134 | DstTy, MaskVal.zext(DstTy.getScalarSizeInBits())); |
||
| 135 | if (SextSrc && (DstTy != MRI.getType(SextSrc))) |
||
| 136 | SextSrc = Builder.buildSExtOrTrunc(DstTy, SextSrc).getReg(0); |
||
| 137 | if (TruncSrc && (DstTy != MRI.getType(TruncSrc))) |
||
| 138 | TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0); |
||
| 139 | Builder.buildAnd(DstReg, SextSrc ? SextSrc : TruncSrc, Mask); |
||
| 140 | markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); |
||
| 141 | return true; |
||
| 142 | } |
||
| 143 | |||
| 144 | // zext(zext x) -> (zext x) |
||
| 145 | Register ZextSrc; |
||
| 146 | if (mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZextSrc)))) { |
||
| 147 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); |
||
| 148 | Observer.changingInstr(MI); |
||
| 149 | MI.getOperand(1).setReg(ZextSrc); |
||
| 150 | Observer.changedInstr(MI); |
||
| 151 | UpdatedDefs.push_back(DstReg); |
||
| 152 | markDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); |
||
| 153 | return true; |
||
| 154 | } |
||
| 155 | |||
| 156 | // Try to fold zext(g_constant) when the larger constant type is legal. |
||
| 157 | auto *SrcMI = MRI.getVRegDef(SrcReg); |
||
| 158 | if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
||
| 159 | const LLT DstTy = MRI.getType(DstReg); |
||
| 160 | if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { |
||
| 161 | auto &CstVal = SrcMI->getOperand(1); |
||
| 162 | Builder.buildConstant( |
||
| 163 | DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits())); |
||
| 164 | UpdatedDefs.push_back(DstReg); |
||
| 165 | markInstAndDefDead(MI, *SrcMI, DeadInsts); |
||
| 166 | return true; |
||
| 167 | } |
||
| 168 | } |
||
| 169 | return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); |
||
| 170 | } |
||
| 171 | |||
| 172 | bool tryCombineSExt(MachineInstr &MI, |
||
| 173 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
| 174 | SmallVectorImpl<Register> &UpdatedDefs) { |
||
| 175 | using namespace llvm::MIPatternMatch; |
||
| 176 | assert(MI.getOpcode() == TargetOpcode::G_SEXT); |
||
| 177 | |||
| 178 | Builder.setInstrAndDebugLoc(MI); |
||
| 179 | Register DstReg = MI.getOperand(0).getReg(); |
||
| 180 | Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); |
||
| 181 | |||
| 182 | // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c) |
||
| 183 | Register TruncSrc; |
||
| 184 | if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { |
||
| 185 | LLT DstTy = MRI.getType(DstReg); |
||
| 186 | if (isInstUnsupported({TargetOpcode::G_SEXT_INREG, {DstTy}})) |
||
| 187 | return false; |
||
| 188 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); |
||
| 189 | LLT SrcTy = MRI.getType(SrcReg); |
||
| 190 | uint64_t SizeInBits = SrcTy.getScalarSizeInBits(); |
||
| 191 | if (DstTy != MRI.getType(TruncSrc)) |
||
| 192 | TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0); |
||
| 193 | Builder.buildSExtInReg(DstReg, TruncSrc, SizeInBits); |
||
| 194 | markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); |
||
| 195 | return true; |
||
| 196 | } |
||
| 197 | |||
| 198 | // sext(zext x) -> (zext x) |
||
| 199 | // sext(sext x) -> (sext x) |
||
| 200 | Register ExtSrc; |
||
| 201 | MachineInstr *ExtMI; |
||
| 202 | if (mi_match(SrcReg, MRI, |
||
| 203 | m_all_of(m_MInstr(ExtMI), m_any_of(m_GZExt(m_Reg(ExtSrc)), |
||
| 204 | m_GSExt(m_Reg(ExtSrc)))))) { |
||
| 205 | LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); |
||
| 206 | Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc}); |
||
| 207 | UpdatedDefs.push_back(DstReg); |
||
| 208 | markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); |
||
| 209 | return true; |
||
| 210 | } |
||
| 211 | |||
| 212 | // Try to fold sext(g_constant) when the larger constant type is legal. |
||
| 213 | auto *SrcMI = MRI.getVRegDef(SrcReg); |
||
| 214 | if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
||
| 215 | const LLT DstTy = MRI.getType(DstReg); |
||
| 216 | if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { |
||
| 217 | auto &CstVal = SrcMI->getOperand(1); |
||
| 218 | Builder.buildConstant( |
||
| 219 | DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits())); |
||
| 220 | UpdatedDefs.push_back(DstReg); |
||
| 221 | markInstAndDefDead(MI, *SrcMI, DeadInsts); |
||
| 222 | return true; |
||
| 223 | } |
||
| 224 | } |
||
| 225 | |||
| 226 | return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); |
||
| 227 | } |
||
| 228 | |||
| 229 | bool tryCombineTrunc(MachineInstr &MI, |
||
| 230 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
| 231 | SmallVectorImpl<Register> &UpdatedDefs, |
||
| 232 | GISelObserverWrapper &Observer) { |
||
| 233 | using namespace llvm::MIPatternMatch; |
||
| 234 | assert(MI.getOpcode() == TargetOpcode::G_TRUNC); |
||
| 235 | |||
| 236 | Builder.setInstr(MI); |
||
| 237 | Register DstReg = MI.getOperand(0).getReg(); |
||
| 238 | Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); |
||
| 239 | |||
| 240 | // Try to fold trunc(g_constant) when the smaller constant type is legal. |
||
| 241 | auto *SrcMI = MRI.getVRegDef(SrcReg); |
||
| 242 | if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { |
||
| 243 | const LLT DstTy = MRI.getType(DstReg); |
||
| 244 | if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { |
||
| 245 | auto &CstVal = SrcMI->getOperand(1); |
||
| 246 | Builder.buildConstant( |
||
| 247 | DstReg, CstVal.getCImm()->getValue().trunc(DstTy.getSizeInBits())); |
||
| 248 | UpdatedDefs.push_back(DstReg); |
||
| 249 | markInstAndDefDead(MI, *SrcMI, DeadInsts); |
||
| 250 | return true; |
||
| 251 | } |
||
| 252 | } |
||
| 253 | |||
| 254 | // Try to fold trunc(merge) to directly use the source of the merge. |
||
| 255 | // This gets rid of large, difficult to legalize, merges |
||
| 256 | if (auto *SrcMerge = dyn_cast<GMerge>(SrcMI)) { |
||
| 257 | const Register MergeSrcReg = SrcMerge->getSourceReg(0); |
||
| 258 | const LLT MergeSrcTy = MRI.getType(MergeSrcReg); |
||
| 259 | const LLT DstTy = MRI.getType(DstReg); |
||
| 260 | |||
| 261 | // We can only fold if the types are scalar |
||
| 262 | const unsigned DstSize = DstTy.getSizeInBits(); |
||
| 263 | const unsigned MergeSrcSize = MergeSrcTy.getSizeInBits(); |
||
| 264 | if (!DstTy.isScalar() || !MergeSrcTy.isScalar()) |
||
| 265 | return false; |
||
| 266 | |||
| 267 | if (DstSize < MergeSrcSize) { |
||
| 268 | // When the merge source is larger than the destination, we can just |
||
| 269 | // truncate the merge source directly |
||
| 270 | if (isInstUnsupported({TargetOpcode::G_TRUNC, {DstTy, MergeSrcTy}})) |
||
| 271 | return false; |
||
| 272 | |||
| 273 | LLVM_DEBUG(dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_TRUNC: " |
||
| 274 | << MI); |
||
| 275 | |||
| 276 | Builder.buildTrunc(DstReg, MergeSrcReg); |
||
| 277 | UpdatedDefs.push_back(DstReg); |
||
| 278 | } else if (DstSize == MergeSrcSize) { |
||
| 279 | // If the sizes match we can simply try to replace the register |
||
| 280 | LLVM_DEBUG( |
||
| 281 | dbgs() << "Replacing G_TRUNC(G_MERGE_VALUES) with merge input: " |
||
| 282 | << MI); |
||
| 283 | replaceRegOrBuildCopy(DstReg, MergeSrcReg, MRI, Builder, UpdatedDefs, |
||
| 284 | Observer); |
||
| 285 | } else if (DstSize % MergeSrcSize == 0) { |
||
| 286 | // If the trunc size is a multiple of the merge source size we can use |
||
| 287 | // a smaller merge instead |
||
| 288 | if (isInstUnsupported( |
||
| 289 | {TargetOpcode::G_MERGE_VALUES, {DstTy, MergeSrcTy}})) |
||
| 290 | return false; |
||
| 291 | |||
| 292 | LLVM_DEBUG( |
||
| 293 | dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_MERGE_VALUES: " |
||
| 294 | << MI); |
||
| 295 | |||
| 296 | const unsigned NumSrcs = DstSize / MergeSrcSize; |
||
| 297 | assert(NumSrcs < SrcMI->getNumOperands() - 1 && |
||
| 298 | "trunc(merge) should require less inputs than merge"); |
||
| 299 | SmallVector<Register, 8> SrcRegs(NumSrcs); |
||
| 300 | for (unsigned i = 0; i < NumSrcs; ++i) |
||
| 301 | SrcRegs[i] = SrcMerge->getSourceReg(i); |
||
| 302 | |||
| 303 | Builder.buildMergeValues(DstReg, SrcRegs); |
||
| 304 | UpdatedDefs.push_back(DstReg); |
||
| 305 | } else { |
||
| 306 | // Unable to combine |
||
| 307 | return false; |
||
| 308 | } |
||
| 309 | |||
| 310 | markInstAndDefDead(MI, *SrcMerge, DeadInsts); |
||
| 311 | return true; |
||
| 312 | } |
||
| 313 | |||
| 314 | // trunc(trunc) -> trunc |
||
| 315 | Register TruncSrc; |
||
| 316 | if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { |
||
| 317 | // Always combine trunc(trunc) since the eventual resulting trunc must be |
||
| 318 | // legal anyway as it must be legal for all outputs of the consumer type |
||
| 319 | // set. |
||
| 320 | LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_TRUNC): " << MI); |
||
| 321 | |||
| 322 | Builder.buildTrunc(DstReg, TruncSrc); |
||
| 323 | UpdatedDefs.push_back(DstReg); |
||
| 324 | markInstAndDefDead(MI, *MRI.getVRegDef(TruncSrc), DeadInsts); |
||
| 325 | return true; |
||
| 326 | } |
||
| 327 | |||
| 328 | return false; |
||
| 329 | } |
||
| 330 | |||
| 331 | /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF). |
||
| 332 | bool tryFoldImplicitDef(MachineInstr &MI, |
||
| 333 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
| 334 | SmallVectorImpl<Register> &UpdatedDefs) { |
||
| 335 | unsigned Opcode = MI.getOpcode(); |
||
| 336 | assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT || |
||
| 337 | Opcode == TargetOpcode::G_SEXT); |
||
| 338 | |||
| 339 | if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, |
||
| 340 | MI.getOperand(1).getReg(), MRI)) { |
||
| 341 | Builder.setInstr(MI); |
||
| 342 | Register DstReg = MI.getOperand(0).getReg(); |
||
| 343 | LLT DstTy = MRI.getType(DstReg); |
||
| 344 | |||
| 345 | if (Opcode == TargetOpcode::G_ANYEXT) { |
||
| 346 | // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF |
||
| 347 | if (!isInstLegal({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) |
||
| 348 | return false; |
||
| 349 | LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;); |
||
| 350 | Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {}); |
||
| 351 | UpdatedDefs.push_back(DstReg); |
||
| 352 | } else { |
||
| 353 | // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top |
||
| 354 | // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT. |
||
| 355 | if (isConstantUnsupported(DstTy)) |
||
| 356 | return false; |
||
| 357 | LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;); |
||
| 358 | Builder.buildConstant(DstReg, 0); |
||
| 359 | UpdatedDefs.push_back(DstReg); |
||
| 360 | } |
||
| 361 | |||
| 362 | markInstAndDefDead(MI, *DefMI, DeadInsts); |
||
| 363 | return true; |
||
| 364 | } |
||
| 365 | return false; |
||
| 366 | } |
||
| 367 | |||
| 368 | bool tryFoldUnmergeCast(MachineInstr &MI, MachineInstr &CastMI, |
||
| 369 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
| 370 | SmallVectorImpl<Register> &UpdatedDefs) { |
||
| 371 | |||
| 372 | assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); |
||
| 373 | |||
| 374 | const unsigned CastOpc = CastMI.getOpcode(); |
||
| 375 | |||
| 376 | if (!isArtifactCast(CastOpc)) |
||
| 377 | return false; |
||
| 378 | |||
| 379 | const unsigned NumDefs = MI.getNumOperands() - 1; |
||
| 380 | |||
| 381 | const Register CastSrcReg = CastMI.getOperand(1).getReg(); |
||
| 382 | const LLT CastSrcTy = MRI.getType(CastSrcReg); |
||
| 383 | const LLT DestTy = MRI.getType(MI.getOperand(0).getReg()); |
||
| 384 | const LLT SrcTy = MRI.getType(MI.getOperand(NumDefs).getReg()); |
||
| 385 | |||
| 386 | const unsigned CastSrcSize = CastSrcTy.getSizeInBits(); |
||
| 387 | const unsigned DestSize = DestTy.getSizeInBits(); |
||
| 388 | |||
| 389 | if (CastOpc == TargetOpcode::G_TRUNC) { |
||
| 390 | if (SrcTy.isVector() && SrcTy.getScalarType() == DestTy.getScalarType()) { |
||
| 391 | // %1:_(<4 x s8>) = G_TRUNC %0(<4 x s32>) |
||
| 392 | // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1 |
||
| 393 | // => |
||
| 394 | // %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32) = G_UNMERGE_VALUES %0 |
||
| 395 | // %2:_(s8) = G_TRUNC %6 |
||
| 396 | // %3:_(s8) = G_TRUNC %7 |
||
| 397 | // %4:_(s8) = G_TRUNC %8 |
||
| 398 | // %5:_(s8) = G_TRUNC %9 |
||
| 399 | |||
| 400 | unsigned UnmergeNumElts = |
||
| 401 | DestTy.isVector() ? CastSrcTy.getNumElements() / NumDefs : 1; |
||
| 402 | LLT UnmergeTy = CastSrcTy.changeElementCount( |
||
| 403 | ElementCount::getFixed(UnmergeNumElts)); |
||
| 404 | |||
| 405 | if (isInstUnsupported( |
||
| 406 | {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}})) |
||
| 407 | return false; |
||
| 408 | |||
| 409 | Builder.setInstr(MI); |
||
| 410 | auto NewUnmerge = Builder.buildUnmerge(UnmergeTy, CastSrcReg); |
||
| 411 | |||
| 412 | for (unsigned I = 0; I != NumDefs; ++I) { |
||
| 413 | Register DefReg = MI.getOperand(I).getReg(); |
||
| 414 | UpdatedDefs.push_back(DefReg); |
||
| 415 | Builder.buildTrunc(DefReg, NewUnmerge.getReg(I)); |
||
| 416 | } |
||
| 417 | |||
| 418 | markInstAndDefDead(MI, CastMI, DeadInsts); |
||
| 419 | return true; |
||
| 420 | } |
||
| 421 | |||
| 422 | if (CastSrcTy.isScalar() && SrcTy.isScalar() && !DestTy.isVector()) { |
||
| 423 | // %1:_(s16) = G_TRUNC %0(s32) |
||
| 424 | // %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %1 |
||
| 425 | // => |
||
| 426 | // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0 |
||
| 427 | |||
| 428 | // Unmerge(trunc) can be combined if the trunc source size is a multiple |
||
| 429 | // of the unmerge destination size |
||
| 430 | if (CastSrcSize % DestSize != 0) |
||
| 431 | return false; |
||
| 432 | |||
| 433 | // Check if the new unmerge is supported |
||
| 434 | if (isInstUnsupported( |
||
| 435 | {TargetOpcode::G_UNMERGE_VALUES, {DestTy, CastSrcTy}})) |
||
| 436 | return false; |
||
| 437 | |||
| 438 | // Gather the original destination registers and create new ones for the |
||
| 439 | // unused bits |
||
| 440 | const unsigned NewNumDefs = CastSrcSize / DestSize; |
||
| 441 | SmallVector<Register, 8> DstRegs(NewNumDefs); |
||
| 442 | for (unsigned Idx = 0; Idx < NewNumDefs; ++Idx) { |
||
| 443 | if (Idx < NumDefs) |
||
| 444 | DstRegs[Idx] = MI.getOperand(Idx).getReg(); |
||
| 445 | else |
||
| 446 | DstRegs[Idx] = MRI.createGenericVirtualRegister(DestTy); |
||
| 447 | } |
||
| 448 | |||
| 449 | // Build new unmerge |
||
| 450 | Builder.setInstr(MI); |
||
| 451 | Builder.buildUnmerge(DstRegs, CastSrcReg); |
||
| 452 | UpdatedDefs.append(DstRegs.begin(), DstRegs.begin() + NewNumDefs); |
||
| 453 | markInstAndDefDead(MI, CastMI, DeadInsts); |
||
| 454 | return true; |
||
| 455 | } |
||
| 456 | } |
||
| 457 | |||
| 458 | // TODO: support combines with other casts as well |
||
| 459 | return false; |
||
| 460 | } |
||
| 461 | |||
| 462 | static bool canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp, |
||
| 463 | LLT OpTy, LLT DestTy) { |
||
| 464 | // Check if we found a definition that is like G_MERGE_VALUES. |
||
| 465 | switch (MergeOp) { |
||
| 466 | default: |
||
| 467 | return false; |
||
| 468 | case TargetOpcode::G_BUILD_VECTOR: |
||
| 469 | case TargetOpcode::G_MERGE_VALUES: |
||
| 470 | // The convert operation that we will need to insert is |
||
| 471 | // going to convert the input of that type of instruction (scalar) |
||
| 472 | // to the destination type (DestTy). |
||
| 473 | // The conversion needs to stay in the same domain (scalar to scalar |
||
| 474 | // and vector to vector), so if we were to allow to fold the merge |
||
| 475 | // we would need to insert some bitcasts. |
||
| 476 | // E.g., |
||
| 477 | // <2 x s16> = build_vector s16, s16 |
||
| 478 | // <2 x s32> = zext <2 x s16> |
||
| 479 | // <2 x s16>, <2 x s16> = unmerge <2 x s32> |
||
| 480 | // |
||
| 481 | // As is the folding would produce: |
||
| 482 | // <2 x s16> = zext s16 <-- scalar to vector |
||
| 483 | // <2 x s16> = zext s16 <-- scalar to vector |
||
| 484 | // Which is invalid. |
||
| 485 | // Instead we would want to generate: |
||
| 486 | // s32 = zext s16 |
||
| 487 | // <2 x s16> = bitcast s32 |
||
| 488 | // s32 = zext s16 |
||
| 489 | // <2 x s16> = bitcast s32 |
||
| 490 | // |
||
| 491 | // That is not done yet. |
||
| 492 | if (ConvertOp == 0) |
||
| 493 | return true; |
||
| 494 | return !DestTy.isVector() && OpTy.isVector() && |
||
| 495 | DestTy == OpTy.getElementType(); |
||
| 496 | case TargetOpcode::G_CONCAT_VECTORS: { |
||
| 497 | if (ConvertOp == 0) |
||
| 498 | return true; |
||
| 499 | if (!DestTy.isVector()) |
||
| 500 | return false; |
||
| 501 | |||
| 502 | const unsigned OpEltSize = OpTy.getElementType().getSizeInBits(); |
||
| 503 | |||
| 504 | // Don't handle scalarization with a cast that isn't in the same |
||
| 505 | // direction as the vector cast. This could be handled, but it would |
||
| 506 | // require more intermediate unmerges. |
||
| 507 | if (ConvertOp == TargetOpcode::G_TRUNC) |
||
| 508 | return DestTy.getSizeInBits() <= OpEltSize; |
||
| 509 | return DestTy.getSizeInBits() >= OpEltSize; |
||
| 510 | } |
||
| 511 | } |
||
| 512 | } |
||
| 513 | |||
| 514 | /// Try to replace DstReg with SrcReg or build a COPY instruction |
||
| 515 | /// depending on the register constraints. |
||
| 516 | static void replaceRegOrBuildCopy(Register DstReg, Register SrcReg, |
||
| 517 | MachineRegisterInfo &MRI, |
||
| 518 | MachineIRBuilder &Builder, |
||
| 519 | SmallVectorImpl<Register> &UpdatedDefs, |
||
| 520 | GISelChangeObserver &Observer) { |
||
| 521 | if (!llvm::canReplaceReg(DstReg, SrcReg, MRI)) { |
||
| 522 | Builder.buildCopy(DstReg, SrcReg); |
||
| 523 | UpdatedDefs.push_back(DstReg); |
||
| 524 | return; |
||
| 525 | } |
||
| 526 | SmallVector<MachineInstr *, 4> UseMIs; |
||
| 527 | // Get the users and notify the observer before replacing. |
||
| 528 | for (auto &UseMI : MRI.use_instructions(DstReg)) { |
||
| 529 | UseMIs.push_back(&UseMI); |
||
| 530 | Observer.changingInstr(UseMI); |
||
| 531 | } |
||
| 532 | // Replace the registers. |
||
| 533 | MRI.replaceRegWith(DstReg, SrcReg); |
||
| 534 | UpdatedDefs.push_back(SrcReg); |
||
| 535 | // Notify the observer that we changed the instructions. |
||
| 536 | for (auto *UseMI : UseMIs) |
||
| 537 | Observer.changedInstr(*UseMI); |
||
| 538 | } |
||
| 539 | |||
| 540 | /// Return the operand index in \p MI that defines \p Def |
||
| 541 | static unsigned getDefIndex(const MachineInstr &MI, Register SearchDef) { |
||
| 542 | unsigned DefIdx = 0; |
||
| 543 | for (const MachineOperand &Def : MI.defs()) { |
||
| 544 | if (Def.getReg() == SearchDef) |
||
| 545 | break; |
||
| 546 | ++DefIdx; |
||
| 547 | } |
||
| 548 | |||
| 549 | return DefIdx; |
||
| 550 | } |
||
| 551 | |||
| 552 | /// This class provides utilities for finding source registers of specific |
||
| 553 | /// bit ranges in an artifact. The routines can look through the source |
||
| 554 | /// registers if they're other artifacts to try to find a non-artifact source |
||
| 555 | /// of a value. |
||
| 556 | class ArtifactValueFinder { |
||
| 557 | MachineRegisterInfo &MRI; |
||
| 558 | MachineIRBuilder &MIB; |
||
| 559 | const LegalizerInfo &LI; |
||
| 560 | |||
| 561 | // Stores the best register found in the current query so far. |
||
| 562 | Register CurrentBest = Register(); |
||
| 563 | |||
| 564 | /// Given an concat_vector op \p Concat and a start bit and size, try to |
||
| 565 | /// find the origin of the value defined by that start position and size. |
||
| 566 | /// |
||
| 567 | /// \returns a register with the requested size, or the current best |
||
| 568 | /// register found during the current query. |
||
| 569 | Register findValueFromConcat(GConcatVectors &Concat, unsigned StartBit, |
||
| 570 | unsigned Size) { |
||
| 571 | assert(Size > 0); |
||
| 572 | |||
| 573 | // Find the source operand that provides the bits requested. |
||
| 574 | Register Src1Reg = Concat.getSourceReg(0); |
||
| 575 | unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits(); |
||
| 576 | |||
| 577 | // Operand index of the source that provides the start of the bit range. |
||
| 578 | unsigned StartSrcIdx = (StartBit / SrcSize) + 1; |
||
| 579 | // Offset into the source at which the bit range starts. |
||
| 580 | unsigned InRegOffset = StartBit % SrcSize; |
||
| 581 | // Check that the bits don't span multiple sources. |
||
| 582 | // FIXME: we might be able return multiple sources? Or create an |
||
| 583 | // appropriate concat to make it fit. |
||
| 584 | if (InRegOffset + Size > SrcSize) |
||
| 585 | return CurrentBest; |
||
| 586 | |||
| 587 | Register SrcReg = Concat.getReg(StartSrcIdx); |
||
| 588 | if (InRegOffset == 0 && Size == SrcSize) { |
||
| 589 | CurrentBest = SrcReg; |
||
| 590 | return findValueFromDefImpl(SrcReg, 0, Size); |
||
| 591 | } |
||
| 592 | |||
| 593 | return findValueFromDefImpl(SrcReg, InRegOffset, Size); |
||
| 594 | } |
||
| 595 | |||
| 596 | /// Given an build_vector op \p BV and a start bit and size, try to find |
||
| 597 | /// the origin of the value defined by that start position and size. |
||
| 598 | /// |
||
| 599 | /// \returns a register with the requested size, or the current best |
||
| 600 | /// register found during the current query. |
||
| 601 | Register findValueFromBuildVector(GBuildVector &BV, unsigned StartBit, |
||
| 602 | unsigned Size) { |
||
| 603 | assert(Size > 0); |
||
| 604 | |||
| 605 | // Find the source operand that provides the bits requested. |
||
| 606 | Register Src1Reg = BV.getSourceReg(0); |
||
| 607 | unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits(); |
||
| 608 | |||
| 609 | // Operand index of the source that provides the start of the bit range. |
||
| 610 | unsigned StartSrcIdx = (StartBit / SrcSize) + 1; |
||
| 611 | // Offset into the source at which the bit range starts. |
||
| 612 | unsigned InRegOffset = StartBit % SrcSize; |
||
| 613 | |||
| 614 | if (InRegOffset != 0) |
||
| 615 | return CurrentBest; // Give up, bits don't start at a scalar source. |
||
| 616 | if (Size < SrcSize) |
||
| 617 | return CurrentBest; // Scalar source is too large for requested bits. |
||
| 618 | |||
| 619 | // If the bits cover multiple sources evenly, then create a new |
||
| 620 | // build_vector to synthesize the required size, if that's been requested. |
||
| 621 | if (Size > SrcSize) { |
||
| 622 | if (Size % SrcSize > 0) |
||
| 623 | return CurrentBest; // Isn't covered exactly by sources. |
||
| 624 | |||
| 625 | unsigned NumSrcsUsed = Size / SrcSize; |
||
| 626 | // If we're requesting all of the sources, just return this def. |
||
| 627 | if (NumSrcsUsed == BV.getNumSources()) |
||
| 628 | return BV.getReg(0); |
||
| 629 | |||
| 630 | LLT SrcTy = MRI.getType(Src1Reg); |
||
| 631 | LLT NewBVTy = LLT::fixed_vector(NumSrcsUsed, SrcTy); |
||
| 632 | |||
| 633 | // Check if the resulting build vector would be legal. |
||
| 634 | LegalizeActionStep ActionStep = |
||
| 635 | LI.getAction({TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}}); |
||
| 636 | if (ActionStep.Action != LegalizeActions::Legal) |
||
| 637 | return CurrentBest; |
||
| 638 | |||
| 639 | SmallVector<Register> NewSrcs; |
||
| 640 | for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed; |
||
| 641 | ++SrcIdx) |
||
| 642 | NewSrcs.push_back(BV.getReg(SrcIdx)); |
||
| 643 | MIB.setInstrAndDebugLoc(BV); |
||
| 644 | return MIB.buildBuildVector(NewBVTy, NewSrcs).getReg(0); |
||
| 645 | } |
||
| 646 | // A single source is requested, just return it. |
||
| 647 | return BV.getReg(StartSrcIdx); |
||
| 648 | } |
||
| 649 | |||
| 650 | /// Given an G_INSERT op \p MI and a start bit and size, try to find |
||
| 651 | /// the origin of the value defined by that start position and size. |
||
| 652 | /// |
||
| 653 | /// \returns a register with the requested size, or the current best |
||
| 654 | /// register found during the current query. |
||
| 655 | Register findValueFromInsert(MachineInstr &MI, unsigned StartBit, |
||
| 656 | unsigned Size) { |
||
| 657 | assert(MI.getOpcode() == TargetOpcode::G_INSERT); |
||
| 658 | assert(Size > 0); |
||
| 659 | |||
| 660 | Register ContainerSrcReg = MI.getOperand(1).getReg(); |
||
| 661 | Register InsertedReg = MI.getOperand(2).getReg(); |
||
| 662 | LLT InsertedRegTy = MRI.getType(InsertedReg); |
||
| 663 | unsigned InsertOffset = MI.getOperand(3).getImm(); |
||
| 664 | |||
| 665 | // There are 4 possible container/insertreg + requested bit-range layouts |
||
| 666 | // that the instruction and query could be representing. |
||
| 667 | // For: %_ = G_INSERT %CONTAINER, %INS, InsOff (abbrev. to 'IO') |
||
| 668 | // and a start bit 'SB', with size S, giving an end bit 'EB', we could |
||
| 669 | // have... |
||
| 670 | // Scenario A: |
||
| 671 | // -------------------------- |
||
| 672 | // | INS | CONTAINER | |
||
| 673 | // -------------------------- |
||
| 674 | // | | |
||
| 675 | // SB EB |
||
| 676 | // |
||
| 677 | // Scenario B: |
||
| 678 | // -------------------------- |
||
| 679 | // | INS | CONTAINER | |
||
| 680 | // -------------------------- |
||
| 681 | // | | |
||
| 682 | // SB EB |
||
| 683 | // |
||
| 684 | // Scenario C: |
||
| 685 | // -------------------------- |
||
| 686 | // | CONTAINER | INS | |
||
| 687 | // -------------------------- |
||
| 688 | // | | |
||
| 689 | // SB EB |
||
| 690 | // |
||
| 691 | // Scenario D: |
||
| 692 | // -------------------------- |
||
| 693 | // | CONTAINER | INS | |
||
| 694 | // -------------------------- |
||
| 695 | // | | |
||
| 696 | // SB EB |
||
| 697 | // |
||
| 698 | // So therefore, A and D are requesting data from the INS operand, while |
||
| 699 | // B and C are requesting from the container operand. |
||
| 700 | |||
| 701 | unsigned InsertedEndBit = InsertOffset + InsertedRegTy.getSizeInBits(); |
||
| 702 | unsigned EndBit = StartBit + Size; |
||
| 703 | unsigned NewStartBit; |
||
| 704 | Register SrcRegToUse; |
||
| 705 | if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) { |
||
| 706 | SrcRegToUse = ContainerSrcReg; |
||
| 707 | NewStartBit = StartBit; |
||
| 708 | return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size); |
||
| 709 | } |
||
| 710 | if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) { |
||
| 711 | SrcRegToUse = InsertedReg; |
||
| 712 | NewStartBit = StartBit - InsertOffset; |
||
| 713 | if (NewStartBit == 0 && |
||
| 714 | Size == MRI.getType(SrcRegToUse).getSizeInBits()) |
||
| 715 | CurrentBest = SrcRegToUse; |
||
| 716 | return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size); |
||
| 717 | } |
||
| 718 | // The bit range spans both the inserted and container regions. |
||
| 719 | return Register(); |
||
| 720 | } |
||
| 721 | |||
| 722 | /// Internal implementation for findValueFromDef(). findValueFromDef() |
||
| 723 | /// initializes some data like the CurrentBest register, which this method |
||
| 724 | /// and its callees rely upon. |
||
| 725 | Register findValueFromDefImpl(Register DefReg, unsigned StartBit, |
||
| 726 | unsigned Size) { |
||
| 727 | std::optional<DefinitionAndSourceRegister> DefSrcReg = |
||
| 728 | getDefSrcRegIgnoringCopies(DefReg, MRI); |
||
| 729 | MachineInstr *Def = DefSrcReg->MI; |
||
| 730 | DefReg = DefSrcReg->Reg; |
||
| 731 | // If the instruction has a single def, then simply delegate the search. |
||
| 732 | // For unmerge however with multiple defs, we need to compute the offset |
||
| 733 | // into the source of the unmerge. |
||
| 734 | switch (Def->getOpcode()) { |
||
| 735 | case TargetOpcode::G_CONCAT_VECTORS: |
||
| 736 | return findValueFromConcat(cast<GConcatVectors>(*Def), StartBit, Size); |
||
| 737 | case TargetOpcode::G_UNMERGE_VALUES: { |
||
| 738 | unsigned DefStartBit = 0; |
||
| 739 | unsigned DefSize = MRI.getType(DefReg).getSizeInBits(); |
||
| 740 | for (const auto &MO : Def->defs()) { |
||
| 741 | if (MO.getReg() == DefReg) |
||
| 742 | break; |
||
| 743 | DefStartBit += DefSize; |
||
| 744 | } |
||
| 745 | Register SrcReg = Def->getOperand(Def->getNumOperands() - 1).getReg(); |
||
| 746 | Register SrcOriginReg = |
||
| 747 | findValueFromDefImpl(SrcReg, StartBit + DefStartBit, Size); |
||
| 748 | if (SrcOriginReg) |
||
| 749 | return SrcOriginReg; |
||
| 750 | // Failed to find a further value. If the StartBit and Size perfectly |
||
| 751 | // covered the requested DefReg, return that since it's better than |
||
| 752 | // nothing. |
||
| 753 | if (StartBit == 0 && Size == DefSize) |
||
| 754 | return DefReg; |
||
| 755 | return CurrentBest; |
||
| 756 | } |
||
| 757 | case TargetOpcode::G_BUILD_VECTOR: |
||
| 758 | return findValueFromBuildVector(cast<GBuildVector>(*Def), StartBit, |
||
| 759 | Size); |
||
| 760 | case TargetOpcode::G_INSERT: |
||
| 761 | return findValueFromInsert(*Def, StartBit, Size); |
||
| 762 | default: |
||
| 763 | return CurrentBest; |
||
| 764 | } |
||
| 765 | } |
||
| 766 | |||
| 767 | public: |
||
| 768 | ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder, |
||
| 769 | const LegalizerInfo &Info) |
||
| 770 | : MRI(Mri), MIB(Builder), LI(Info) {} |
||
| 771 | |||
| 772 | /// Try to find a source of the value defined in the def \p DefReg, starting |
||
| 773 | /// at position \p StartBit with size \p Size. |
||
| 774 | /// \returns a register with the requested size, or an empty Register if no |
||
| 775 | /// better value could be found. |
||
| 776 | Register findValueFromDef(Register DefReg, unsigned StartBit, |
||
| 777 | unsigned Size) { |
||
| 778 | CurrentBest = Register(); |
||
| 779 | Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size); |
||
| 780 | return FoundReg != DefReg ? FoundReg : Register(); |
||
| 781 | } |
||
| 782 | |||
| 783 | /// Try to combine the defs of an unmerge \p MI by attempting to find |
||
| 784 | /// values that provides the bits for each def reg. |
||
| 785 | /// \returns true if all the defs of the unmerge have been made dead. |
||
| 786 | bool tryCombineUnmergeDefs(GUnmerge &MI, GISelChangeObserver &Observer, |
||
| 787 | SmallVectorImpl<Register> &UpdatedDefs) { |
||
| 788 | unsigned NumDefs = MI.getNumDefs(); |
||
| 789 | LLT DestTy = MRI.getType(MI.getReg(0)); |
||
| 790 | |||
| 791 | SmallBitVector DeadDefs(NumDefs); |
||
| 792 | for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { |
||
| 793 | Register DefReg = MI.getReg(DefIdx); |
||
| 794 | if (MRI.use_nodbg_empty(DefReg)) { |
||
| 795 | DeadDefs[DefIdx] = true; |
||
| 796 | continue; |
||
| 797 | } |
||
| 798 | Register FoundVal = findValueFromDef(DefReg, 0, DestTy.getSizeInBits()); |
||
| 799 | if (!FoundVal) |
||
| 800 | continue; |
||
| 801 | if (MRI.getType(FoundVal) != DestTy) |
||
| 802 | continue; |
||
| 803 | |||
| 804 | replaceRegOrBuildCopy(DefReg, FoundVal, MRI, MIB, UpdatedDefs, |
||
| 805 | Observer); |
||
| 806 | // We only want to replace the uses, not the def of the old reg. |
||
| 807 | Observer.changingInstr(MI); |
||
| 808 | MI.getOperand(DefIdx).setReg(DefReg); |
||
| 809 | Observer.changedInstr(MI); |
||
| 810 | DeadDefs[DefIdx] = true; |
||
| 811 | } |
||
| 812 | return DeadDefs.all(); |
||
| 813 | } |
||
| 814 | |||
| 815 | GUnmerge *findUnmergeThatDefinesReg(Register Reg, unsigned Size, |
||
| 816 | unsigned &DefOperandIdx) { |
||
| 817 | if (Register Def = findValueFromDefImpl(Reg, 0, Size)) { |
||
| 818 | if (auto *Unmerge = dyn_cast<GUnmerge>(MRI.getVRegDef(Def))) { |
||
| 819 | DefOperandIdx = Unmerge->findRegisterDefOperandIdx(Def); |
||
| 820 | return Unmerge; |
||
| 821 | } |
||
| 822 | } |
||
| 823 | return nullptr; |
||
| 824 | } |
||
| 825 | |||
| 826 | // Check if sequence of elements from merge-like instruction is defined by |
||
| 827 | // another sequence of elements defined by unmerge. Most often this is the |
||
| 828 | // same sequence. Search for elements using findValueFromDefImpl. |
||
| 829 | bool isSequenceFromUnmerge(GMergeLikeInstr &MI, unsigned MergeStartIdx, |
||
| 830 | GUnmerge *Unmerge, unsigned UnmergeIdxStart, |
||
| 831 | unsigned NumElts, unsigned EltSize) { |
||
| 832 | assert(MergeStartIdx + NumElts <= MI.getNumSources()); |
||
| 833 | for (unsigned i = MergeStartIdx; i < MergeStartIdx + NumElts; ++i) { |
||
| 834 | unsigned EltUnmergeIdx; |
||
| 835 | GUnmerge *EltUnmerge = findUnmergeThatDefinesReg( |
||
| 836 | MI.getSourceReg(i), EltSize, EltUnmergeIdx); |
||
| 837 | // Check if source i comes from the same Unmerge. |
||
| 838 | if (!EltUnmerge || EltUnmerge != Unmerge) |
||
| 839 | return false; |
||
| 840 | // Check that source i's def has same index in sequence in Unmerge. |
||
| 841 | if (i - MergeStartIdx != EltUnmergeIdx - UnmergeIdxStart) |
||
| 842 | return false; |
||
| 843 | } |
||
| 844 | return true; |
||
| 845 | } |
||
| 846 | |||
| 847 | bool tryCombineMergeLike(GMergeLikeInstr &MI, |
||
| 848 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
| 849 | SmallVectorImpl<Register> &UpdatedDefs, |
||
| 850 | GISelChangeObserver &Observer) { |
||
| 851 | Register Elt0 = MI.getSourceReg(0); |
||
| 852 | LLT EltTy = MRI.getType(Elt0); |
||
| 853 | unsigned EltSize = EltTy.getSizeInBits(); |
||
| 854 | |||
| 855 | unsigned Elt0UnmergeIdx; |
||
| 856 | // Search for unmerge that will be candidate for combine. |
||
| 857 | auto *Unmerge = findUnmergeThatDefinesReg(Elt0, EltSize, Elt0UnmergeIdx); |
||
| 858 | if (!Unmerge) |
||
| 859 | return false; |
||
| 860 | |||
| 861 | unsigned NumMIElts = MI.getNumSources(); |
||
| 862 | Register Dst = MI.getReg(0); |
||
| 863 | LLT DstTy = MRI.getType(Dst); |
||
| 864 | Register UnmergeSrc = Unmerge->getSourceReg(); |
||
| 865 | LLT UnmergeSrcTy = MRI.getType(UnmergeSrc); |
||
| 866 | |||
| 867 | // Recognize copy of UnmergeSrc to Dst. |
||
| 868 | // Unmerge UnmergeSrc and reassemble it using merge-like opcode into Dst. |
||
| 869 | // |
||
| 870 | // %0:_(EltTy), %1, ... = G_UNMERGE_VALUES %UnmergeSrc:_(Ty) |
||
| 871 | // %Dst:_(Ty) = G_merge_like_opcode %0:_(EltTy), %1, ... |
||
| 872 | // |
||
| 873 | // %Dst:_(Ty) = COPY %UnmergeSrc:_(Ty) |
||
| 874 | if ((DstTy == UnmergeSrcTy) && (Elt0UnmergeIdx == 0)) { |
||
| 875 | if (!isSequenceFromUnmerge(MI, 0, Unmerge, 0, NumMIElts, EltSize)) |
||
| 876 | return false; |
||
| 877 | replaceRegOrBuildCopy(Dst, UnmergeSrc, MRI, MIB, UpdatedDefs, Observer); |
||
| 878 | DeadInsts.push_back(&MI); |
||
| 879 | return true; |
||
| 880 | } |
||
| 881 | |||
| 882 | // Recognize UnmergeSrc that can be unmerged to DstTy directly. |
||
| 883 | // Types have to be either both vector or both non-vector types. |
||
| 884 | // Merge-like opcodes are combined one at the time. First one creates new |
||
| 885 | // unmerge, following should use the same unmerge (builder performs CSE). |
||
| 886 | // |
||
| 887 | // %0:_(EltTy), %1, %2, %3 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy) |
||
| 888 | // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1 |
||
| 889 | // %AnotherDst:_(DstTy) = G_merge_like_opcode %2:_(EltTy), %3 |
||
| 890 | // |
||
| 891 | // %Dst:_(DstTy), %AnotherDst = G_UNMERGE_VALUES %UnmergeSrc |
||
| 892 | if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && |
||
| 893 | (Elt0UnmergeIdx % NumMIElts == 0) && |
||
| 894 | getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) { |
||
| 895 | if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts, |
||
| 896 | EltSize)) |
||
| 897 | return false; |
||
| 898 | MIB.setInstrAndDebugLoc(MI); |
||
| 899 | auto NewUnmerge = MIB.buildUnmerge(DstTy, Unmerge->getSourceReg()); |
||
| 900 | unsigned DstIdx = (Elt0UnmergeIdx * EltSize) / DstTy.getSizeInBits(); |
||
| 901 | replaceRegOrBuildCopy(Dst, NewUnmerge.getReg(DstIdx), MRI, MIB, |
||
| 902 | UpdatedDefs, Observer); |
||
| 903 | DeadInsts.push_back(&MI); |
||
| 904 | return true; |
||
| 905 | } |
||
| 906 | |||
| 907 | // Recognize when multiple unmerged sources with UnmergeSrcTy type |
||
| 908 | // can be merged into Dst with DstTy type directly. |
||
| 909 | // Types have to be either both vector or both non-vector types. |
||
| 910 | |||
| 911 | // %0:_(EltTy), %1 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy) |
||
| 912 | // %2:_(EltTy), %3 = G_UNMERGE_VALUES %AnotherUnmergeSrc:_(UnmergeSrcTy) |
||
| 913 | // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1, %2, %3 |
||
| 914 | // |
||
| 915 | // %Dst:_(DstTy) = G_merge_like_opcode %UnmergeSrc, %AnotherUnmergeSrc |
||
| 916 | |||
| 917 | if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && |
||
| 918 | getCoverTy(DstTy, UnmergeSrcTy) == DstTy) { |
||
| 919 | SmallVector<Register, 4> ConcatSources; |
||
| 920 | unsigned NumElts = Unmerge->getNumDefs(); |
||
| 921 | for (unsigned i = 0; i < MI.getNumSources(); i += NumElts) { |
||
| 922 | unsigned EltUnmergeIdx; |
||
| 923 | auto *UnmergeI = findUnmergeThatDefinesReg(MI.getSourceReg(i), |
||
| 924 | EltSize, EltUnmergeIdx); |
||
| 925 | // All unmerges have to be the same size. |
||
| 926 | if ((!UnmergeI) || (UnmergeI->getNumDefs() != NumElts) || |
||
| 927 | (EltUnmergeIdx != 0)) |
||
| 928 | return false; |
||
| 929 | if (!isSequenceFromUnmerge(MI, i, UnmergeI, 0, NumElts, EltSize)) |
||
| 930 | return false; |
||
| 931 | ConcatSources.push_back(UnmergeI->getSourceReg()); |
||
| 932 | } |
||
| 933 | |||
| 934 | MIB.setInstrAndDebugLoc(MI); |
||
| 935 | MIB.buildMergeLikeInstr(Dst, ConcatSources); |
||
| 936 | DeadInsts.push_back(&MI); |
||
| 937 | return true; |
||
| 938 | } |
||
| 939 | |||
| 940 | return false; |
||
| 941 | } |
||
| 942 | }; |
||
| 943 | |||
| 944 | bool tryCombineUnmergeValues(GUnmerge &MI, |
||
| 945 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
| 946 | SmallVectorImpl<Register> &UpdatedDefs, |
||
| 947 | GISelChangeObserver &Observer) { |
||
| 948 | unsigned NumDefs = MI.getNumDefs(); |
||
| 949 | Register SrcReg = MI.getSourceReg(); |
||
| 950 | MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI); |
||
| 951 | if (!SrcDef) |
||
| 952 | return false; |
||
| 953 | |||
| 954 | LLT OpTy = MRI.getType(SrcReg); |
||
| 955 | LLT DestTy = MRI.getType(MI.getReg(0)); |
||
| 956 | unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg); |
||
| 957 | |||
| 958 | Builder.setInstrAndDebugLoc(MI); |
||
| 959 | |||
| 960 | ArtifactValueFinder Finder(MRI, Builder, LI); |
||
| 961 | if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) { |
||
| 962 | markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx); |
||
| 963 | return true; |
||
| 964 | } |
||
| 965 | |||
| 966 | if (auto *SrcUnmerge = dyn_cast<GUnmerge>(SrcDef)) { |
||
| 967 | // %0:_(<4 x s16>) = G_FOO |
||
| 968 | // %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0 |
||
| 969 | // %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1 |
||
| 970 | // |
||
| 971 | // %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0 |
||
| 972 | Register SrcUnmergeSrc = SrcUnmerge->getSourceReg(); |
||
| 973 | LLT SrcUnmergeSrcTy = MRI.getType(SrcUnmergeSrc); |
||
| 974 | |||
| 975 | // If we need to decrease the number of vector elements in the result type |
||
| 976 | // of an unmerge, this would involve the creation of an equivalent unmerge |
||
| 977 | // to copy back to the original result registers. |
||
| 978 | LegalizeActionStep ActionStep = LI.getAction( |
||
| 979 | {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}}); |
||
| 980 | switch (ActionStep.Action) { |
||
| 981 | case LegalizeActions::Lower: |
||
| 982 | case LegalizeActions::Unsupported: |
||
| 983 | break; |
||
| 984 | case LegalizeActions::FewerElements: |
||
| 985 | case LegalizeActions::NarrowScalar: |
||
| 986 | if (ActionStep.TypeIdx == 1) |
||
| 987 | return false; |
||
| 988 | break; |
||
| 989 | default: |
||
| 990 | return false; |
||
| 991 | } |
||
| 992 | |||
| 993 | auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc); |
||
| 994 | |||
| 995 | // TODO: Should we try to process out the other defs now? If the other |
||
| 996 | // defs of the source unmerge are also unmerged, we end up with a separate |
||
| 997 | // unmerge for each one. |
||
| 998 | for (unsigned I = 0; I != NumDefs; ++I) { |
||
| 999 | Register Def = MI.getReg(I); |
||
| 1000 | replaceRegOrBuildCopy(Def, NewUnmerge.getReg(SrcDefIdx * NumDefs + I), |
||
| 1001 | MRI, Builder, UpdatedDefs, Observer); |
||
| 1002 | } |
||
| 1003 | |||
| 1004 | markInstAndDefDead(MI, *SrcUnmerge, DeadInsts, SrcDefIdx); |
||
| 1005 | return true; |
||
| 1006 | } |
||
| 1007 | |||
| 1008 | MachineInstr *MergeI = SrcDef; |
||
| 1009 | unsigned ConvertOp = 0; |
||
| 1010 | |||
| 1011 | // Handle intermediate conversions |
||
| 1012 | unsigned SrcOp = SrcDef->getOpcode(); |
||
| 1013 | if (isArtifactCast(SrcOp)) { |
||
| 1014 | ConvertOp = SrcOp; |
||
| 1015 | MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI); |
||
| 1016 | } |
||
| 1017 | |||
| 1018 | if (!MergeI || !canFoldMergeOpcode(MergeI->getOpcode(), |
||
| 1019 | ConvertOp, OpTy, DestTy)) { |
||
| 1020 | // We might have a chance to combine later by trying to combine |
||
| 1021 | // unmerge(cast) first |
||
| 1022 | return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs); |
||
| 1023 | } |
||
| 1024 | |||
| 1025 | const unsigned NumMergeRegs = MergeI->getNumOperands() - 1; |
||
| 1026 | |||
| 1027 | if (NumMergeRegs < NumDefs) { |
||
| 1028 | if (NumDefs % NumMergeRegs != 0) |
||
| 1029 | return false; |
||
| 1030 | |||
| 1031 | Builder.setInstr(MI); |
||
| 1032 | // Transform to UNMERGEs, for example |
||
| 1033 | // %1 = G_MERGE_VALUES %4, %5 |
||
| 1034 | // %9, %10, %11, %12 = G_UNMERGE_VALUES %1 |
||
| 1035 | // to |
||
| 1036 | // %9, %10 = G_UNMERGE_VALUES %4 |
||
| 1037 | // %11, %12 = G_UNMERGE_VALUES %5 |
||
| 1038 | |||
| 1039 | const unsigned NewNumDefs = NumDefs / NumMergeRegs; |
||
| 1040 | for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) { |
||
| 1041 | SmallVector<Register, 8> DstRegs; |
||
| 1042 | for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs; |
||
| 1043 | ++j, ++DefIdx) |
||
| 1044 | DstRegs.push_back(MI.getReg(DefIdx)); |
||
| 1045 | |||
| 1046 | if (ConvertOp) { |
||
| 1047 | LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg()); |
||
| 1048 | |||
| 1049 | // This is a vector that is being split and casted. Extract to the |
||
| 1050 | // element type, and do the conversion on the scalars (or smaller |
||
| 1051 | // vectors). |
||
| 1052 | LLT MergeEltTy = MergeSrcTy.divide(NewNumDefs); |
||
| 1053 | |||
| 1054 | // Handle split to smaller vectors, with conversions. |
||
| 1055 | // %2(<8 x s8>) = G_CONCAT_VECTORS %0(<4 x s8>), %1(<4 x s8>) |
||
| 1056 | // %3(<8 x s16>) = G_SEXT %2 |
||
| 1057 | // %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>), %7(<2 x s16>) = G_UNMERGE_VALUES %3 |
||
| 1058 | // |
||
| 1059 | // => |
||
| 1060 | // |
||
| 1061 | // %8(<2 x s8>), %9(<2 x s8>) = G_UNMERGE_VALUES %0 |
||
| 1062 | // %10(<2 x s8>), %11(<2 x s8>) = G_UNMERGE_VALUES %1 |
||
| 1063 | // %4(<2 x s16>) = G_SEXT %8 |
||
| 1064 | // %5(<2 x s16>) = G_SEXT %9 |
||
| 1065 | // %6(<2 x s16>) = G_SEXT %10 |
||
| 1066 | // %7(<2 x s16>)= G_SEXT %11 |
||
| 1067 | |||
| 1068 | SmallVector<Register, 4> TmpRegs(NewNumDefs); |
||
| 1069 | for (unsigned k = 0; k < NewNumDefs; ++k) |
||
| 1070 | TmpRegs[k] = MRI.createGenericVirtualRegister(MergeEltTy); |
||
| 1071 | |||
| 1072 | Builder.buildUnmerge(TmpRegs, MergeI->getOperand(Idx + 1).getReg()); |
||
| 1073 | |||
| 1074 | for (unsigned k = 0; k < NewNumDefs; ++k) |
||
| 1075 | Builder.buildInstr(ConvertOp, {DstRegs[k]}, {TmpRegs[k]}); |
||
| 1076 | } else { |
||
| 1077 | Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg()); |
||
| 1078 | } |
||
| 1079 | UpdatedDefs.append(DstRegs.begin(), DstRegs.end()); |
||
| 1080 | } |
||
| 1081 | |||
| 1082 | } else if (NumMergeRegs > NumDefs) { |
||
| 1083 | if (ConvertOp != 0 || NumMergeRegs % NumDefs != 0) |
||
| 1084 | return false; |
||
| 1085 | |||
| 1086 | Builder.setInstr(MI); |
||
| 1087 | // Transform to MERGEs |
||
| 1088 | // %6 = G_MERGE_VALUES %17, %18, %19, %20 |
||
| 1089 | // %7, %8 = G_UNMERGE_VALUES %6 |
||
| 1090 | // to |
||
| 1091 | // %7 = G_MERGE_VALUES %17, %18 |
||
| 1092 | // %8 = G_MERGE_VALUES %19, %20 |
||
| 1093 | |||
| 1094 | const unsigned NumRegs = NumMergeRegs / NumDefs; |
||
| 1095 | for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { |
||
| 1096 | SmallVector<Register, 8> Regs; |
||
| 1097 | for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; |
||
| 1098 | ++j, ++Idx) |
||
| 1099 | Regs.push_back(MergeI->getOperand(Idx).getReg()); |
||
| 1100 | |||
| 1101 | Register DefReg = MI.getReg(DefIdx); |
||
| 1102 | Builder.buildMergeLikeInstr(DefReg, Regs); |
||
| 1103 | UpdatedDefs.push_back(DefReg); |
||
| 1104 | } |
||
| 1105 | |||
| 1106 | } else { |
||
| 1107 | LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg()); |
||
| 1108 | |||
| 1109 | if (!ConvertOp && DestTy != MergeSrcTy) |
||
| 1110 | ConvertOp = TargetOpcode::G_BITCAST; |
||
| 1111 | |||
| 1112 | if (ConvertOp) { |
||
| 1113 | Builder.setInstr(MI); |
||
| 1114 | |||
| 1115 | for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { |
||
| 1116 | Register DefReg = MI.getOperand(Idx).getReg(); |
||
| 1117 | Register MergeSrc = MergeI->getOperand(Idx + 1).getReg(); |
||
| 1118 | |||
| 1119 | if (!MRI.use_empty(DefReg)) { |
||
| 1120 | Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc}); |
||
| 1121 | UpdatedDefs.push_back(DefReg); |
||
| 1122 | } |
||
| 1123 | } |
||
| 1124 | |||
| 1125 | markInstAndDefDead(MI, *MergeI, DeadInsts); |
||
| 1126 | return true; |
||
| 1127 | } |
||
| 1128 | |||
| 1129 | assert(DestTy == MergeSrcTy && |
||
| 1130 | "Bitcast and the other kinds of conversions should " |
||
| 1131 | "have happened earlier"); |
||
| 1132 | |||
| 1133 | Builder.setInstr(MI); |
||
| 1134 | for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { |
||
| 1135 | Register DstReg = MI.getOperand(Idx).getReg(); |
||
| 1136 | Register SrcReg = MergeI->getOperand(Idx + 1).getReg(); |
||
| 1137 | replaceRegOrBuildCopy(DstReg, SrcReg, MRI, Builder, UpdatedDefs, |
||
| 1138 | Observer); |
||
| 1139 | } |
||
| 1140 | } |
||
| 1141 | |||
| 1142 | markInstAndDefDead(MI, *MergeI, DeadInsts); |
||
| 1143 | return true; |
||
| 1144 | } |
||
| 1145 | |||
| 1146 | bool tryCombineExtract(MachineInstr &MI, |
||
| 1147 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
| 1148 | SmallVectorImpl<Register> &UpdatedDefs) { |
||
| 1149 | assert(MI.getOpcode() == TargetOpcode::G_EXTRACT); |
||
| 1150 | |||
| 1151 | // Try to use the source registers from a G_MERGE_VALUES |
||
| 1152 | // |
||
| 1153 | // %2 = G_MERGE_VALUES %0, %1 |
||
| 1154 | // %3 = G_EXTRACT %2, N |
||
| 1155 | // => |
||
| 1156 | // |
||
| 1157 | // for N < %2.getSizeInBits() / 2 |
||
| 1158 | // %3 = G_EXTRACT %0, N |
||
| 1159 | // |
||
| 1160 | // for N >= %2.getSizeInBits() / 2 |
||
| 1161 | // %3 = G_EXTRACT %1, (N - %0.getSizeInBits() |
||
| 1162 | |||
| 1163 | Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); |
||
| 1164 | MachineInstr *MergeI = MRI.getVRegDef(SrcReg); |
||
| 1165 | if (!MergeI || !isa<GMergeLikeInstr>(MergeI)) |
||
| 1166 | return false; |
||
| 1167 | |||
| 1168 | Register DstReg = MI.getOperand(0).getReg(); |
||
| 1169 | LLT DstTy = MRI.getType(DstReg); |
||
| 1170 | LLT SrcTy = MRI.getType(SrcReg); |
||
| 1171 | |||
| 1172 | // TODO: Do we need to check if the resulting extract is supported? |
||
| 1173 | unsigned ExtractDstSize = DstTy.getSizeInBits(); |
||
| 1174 | unsigned Offset = MI.getOperand(2).getImm(); |
||
| 1175 | unsigned NumMergeSrcs = MergeI->getNumOperands() - 1; |
||
| 1176 | unsigned MergeSrcSize = SrcTy.getSizeInBits() / NumMergeSrcs; |
||
| 1177 | unsigned MergeSrcIdx = Offset / MergeSrcSize; |
||
| 1178 | |||
| 1179 | // Compute the offset of the last bit the extract needs. |
||
| 1180 | unsigned EndMergeSrcIdx = (Offset + ExtractDstSize - 1) / MergeSrcSize; |
||
| 1181 | |||
| 1182 | // Can't handle the case where the extract spans multiple inputs. |
||
| 1183 | if (MergeSrcIdx != EndMergeSrcIdx) |
||
| 1184 | return false; |
||
| 1185 | |||
| 1186 | // TODO: We could modify MI in place in most cases. |
||
| 1187 | Builder.setInstr(MI); |
||
| 1188 | Builder.buildExtract(DstReg, MergeI->getOperand(MergeSrcIdx + 1).getReg(), |
||
| 1189 | Offset - MergeSrcIdx * MergeSrcSize); |
||
| 1190 | UpdatedDefs.push_back(DstReg); |
||
| 1191 | markInstAndDefDead(MI, *MergeI, DeadInsts); |
||
| 1192 | return true; |
||
| 1193 | } |
||
| 1194 | |||
| 1195 | /// Try to combine away MI. |
||
| 1196 | /// Returns true if it combined away the MI. |
||
| 1197 | /// Adds instructions that are dead as a result of the combine |
||
| 1198 | /// into DeadInsts, which can include MI. |
||
| 1199 | bool tryCombineInstruction(MachineInstr &MI, |
||
| 1200 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
| 1201 | GISelObserverWrapper &WrapperObserver) { |
||
| 1202 | ArtifactValueFinder Finder(MRI, Builder, LI); |
||
| 1203 | |||
| 1204 | // This might be a recursive call, and we might have DeadInsts already |
||
| 1205 | // populated. To avoid bad things happening later with multiple vreg defs |
||
| 1206 | // etc, process the dead instructions now if any. |
||
| 1207 | if (!DeadInsts.empty()) |
||
| 1208 | deleteMarkedDeadInsts(DeadInsts, WrapperObserver); |
||
| 1209 | |||
| 1210 | // Put here every vreg that was redefined in such a way that it's at least |
||
| 1211 | // possible that one (or more) of its users (immediate or COPY-separated) |
||
| 1212 | // could become artifact combinable with the new definition (or the |
||
| 1213 | // instruction reachable from it through a chain of copies if any). |
||
| 1214 | SmallVector<Register, 4> UpdatedDefs; |
||
| 1215 | bool Changed = false; |
||
| 1216 | switch (MI.getOpcode()) { |
||
| 1217 | default: |
||
| 1218 | return false; |
||
| 1219 | case TargetOpcode::G_ANYEXT: |
||
| 1220 | Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs, WrapperObserver); |
||
| 1221 | break; |
||
| 1222 | case TargetOpcode::G_ZEXT: |
||
| 1223 | Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver); |
||
| 1224 | break; |
||
| 1225 | case TargetOpcode::G_SEXT: |
||
| 1226 | Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs); |
||
| 1227 | break; |
||
| 1228 | case TargetOpcode::G_UNMERGE_VALUES: |
||
| 1229 | Changed = tryCombineUnmergeValues(cast<GUnmerge>(MI), DeadInsts, |
||
| 1230 | UpdatedDefs, WrapperObserver); |
||
| 1231 | break; |
||
| 1232 | case TargetOpcode::G_MERGE_VALUES: |
||
| 1233 | case TargetOpcode::G_BUILD_VECTOR: |
||
| 1234 | case TargetOpcode::G_CONCAT_VECTORS: |
||
| 1235 | // If any of the users of this merge are an unmerge, then add them to the |
||
| 1236 | // artifact worklist in case there's folding that can be done looking up. |
||
| 1237 | for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) { |
||
| 1238 | if (U.getOpcode() == TargetOpcode::G_UNMERGE_VALUES || |
||
| 1239 | U.getOpcode() == TargetOpcode::G_TRUNC) { |
||
| 1240 | UpdatedDefs.push_back(MI.getOperand(0).getReg()); |
||
| 1241 | break; |
||
| 1242 | } |
||
| 1243 | } |
||
| 1244 | Changed = Finder.tryCombineMergeLike(cast<GMergeLikeInstr>(MI), DeadInsts, |
||
| 1245 | UpdatedDefs, WrapperObserver); |
||
| 1246 | break; |
||
| 1247 | case TargetOpcode::G_EXTRACT: |
||
| 1248 | Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs); |
||
| 1249 | break; |
||
| 1250 | case TargetOpcode::G_TRUNC: |
||
| 1251 | Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs, WrapperObserver); |
||
| 1252 | if (!Changed) { |
||
| 1253 | // Try to combine truncates away even if they are legal. As all artifact |
||
| 1254 | // combines at the moment look only "up" the def-use chains, we achieve |
||
| 1255 | // that by throwing truncates' users (with look through copies) into the |
||
| 1256 | // ArtifactList again. |
||
| 1257 | UpdatedDefs.push_back(MI.getOperand(0).getReg()); |
||
| 1258 | } |
||
| 1259 | break; |
||
| 1260 | } |
||
| 1261 | // If the main loop through the ArtifactList found at least one combinable |
||
| 1262 | // pair of artifacts, not only combine it away (as done above), but also |
||
| 1263 | // follow the def-use chain from there to combine everything that can be |
||
| 1264 | // combined within this def-use chain of artifacts. |
||
| 1265 | while (!UpdatedDefs.empty()) { |
||
| 1266 | Register NewDef = UpdatedDefs.pop_back_val(); |
||
| 1267 | assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg"); |
||
| 1268 | for (MachineInstr &Use : MRI.use_instructions(NewDef)) { |
||
| 1269 | switch (Use.getOpcode()) { |
||
| 1270 | // Keep this list in sync with the list of all artifact combines. |
||
| 1271 | case TargetOpcode::G_ANYEXT: |
||
| 1272 | case TargetOpcode::G_ZEXT: |
||
| 1273 | case TargetOpcode::G_SEXT: |
||
| 1274 | case TargetOpcode::G_UNMERGE_VALUES: |
||
| 1275 | case TargetOpcode::G_EXTRACT: |
||
| 1276 | case TargetOpcode::G_TRUNC: |
||
| 1277 | case TargetOpcode::G_BUILD_VECTOR: |
||
| 1278 | // Adding Use to ArtifactList. |
||
| 1279 | WrapperObserver.changedInstr(Use); |
||
| 1280 | break; |
||
| 1281 | case TargetOpcode::COPY: { |
||
| 1282 | Register Copy = Use.getOperand(0).getReg(); |
||
| 1283 | if (Copy.isVirtual()) |
||
| 1284 | UpdatedDefs.push_back(Copy); |
||
| 1285 | break; |
||
| 1286 | } |
||
| 1287 | default: |
||
| 1288 | // If we do not have an artifact combine for the opcode, there is no |
||
| 1289 | // point in adding it to the ArtifactList as nothing interesting will |
||
| 1290 | // be done to it anyway. |
||
| 1291 | break; |
||
| 1292 | } |
||
| 1293 | } |
||
| 1294 | } |
||
| 1295 | return Changed; |
||
| 1296 | } |
||
| 1297 | |||
| 1298 | private: |
||
| 1299 | static Register getArtifactSrcReg(const MachineInstr &MI) { |
||
| 1300 | switch (MI.getOpcode()) { |
||
| 1301 | case TargetOpcode::COPY: |
||
| 1302 | case TargetOpcode::G_TRUNC: |
||
| 1303 | case TargetOpcode::G_ZEXT: |
||
| 1304 | case TargetOpcode::G_ANYEXT: |
||
| 1305 | case TargetOpcode::G_SEXT: |
||
| 1306 | case TargetOpcode::G_EXTRACT: |
||
| 1307 | return MI.getOperand(1).getReg(); |
||
| 1308 | case TargetOpcode::G_UNMERGE_VALUES: |
||
| 1309 | return MI.getOperand(MI.getNumOperands() - 1).getReg(); |
||
| 1310 | default: |
||
| 1311 | llvm_unreachable("Not a legalization artifact happen"); |
||
| 1312 | } |
||
| 1313 | } |
||
| 1314 | |||
| 1315 | /// Mark a def of one of MI's original operands, DefMI, as dead if changing MI |
||
| 1316 | /// (either by killing it or changing operands) results in DefMI being dead |
||
| 1317 | /// too. In-between COPYs or artifact-casts are also collected if they are |
||
| 1318 | /// dead. |
||
| 1319 | /// MI is not marked dead. |
||
| 1320 | void markDefDead(MachineInstr &MI, MachineInstr &DefMI, |
||
| 1321 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
| 1322 | unsigned DefIdx = 0) { |
||
| 1323 | // Collect all the copy instructions that are made dead, due to deleting |
||
| 1324 | // this instruction. Collect all of them until the Trunc(DefMI). |
||
| 1325 | // Eg, |
||
| 1326 | // %1(s1) = G_TRUNC %0(s32) |
||
| 1327 | // %2(s1) = COPY %1(s1) |
||
| 1328 | // %3(s1) = COPY %2(s1) |
||
| 1329 | // %4(s32) = G_ANYEXT %3(s1) |
||
| 1330 | // In this case, we would have replaced %4 with a copy of %0, |
||
| 1331 | // and as a result, %3, %2, %1 are dead. |
||
| 1332 | MachineInstr *PrevMI = &MI; |
||
| 1333 | while (PrevMI != &DefMI) { |
||
| 1334 | Register PrevRegSrc = getArtifactSrcReg(*PrevMI); |
||
| 1335 | |||
| 1336 | MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc); |
||
| 1337 | if (MRI.hasOneUse(PrevRegSrc)) { |
||
| 1338 | if (TmpDef != &DefMI) { |
||
| 1339 | assert((TmpDef->getOpcode() == TargetOpcode::COPY || |
||
| 1340 | isArtifactCast(TmpDef->getOpcode())) && |
||
| 1341 | "Expecting copy or artifact cast here"); |
||
| 1342 | |||
| 1343 | DeadInsts.push_back(TmpDef); |
||
| 1344 | } |
||
| 1345 | } else |
||
| 1346 | break; |
||
| 1347 | PrevMI = TmpDef; |
||
| 1348 | } |
||
| 1349 | |||
| 1350 | if (PrevMI == &DefMI) { |
||
| 1351 | unsigned I = 0; |
||
| 1352 | bool IsDead = true; |
||
| 1353 | for (MachineOperand &Def : DefMI.defs()) { |
||
| 1354 | if (I != DefIdx) { |
||
| 1355 | if (!MRI.use_empty(Def.getReg())) { |
||
| 1356 | IsDead = false; |
||
| 1357 | break; |
||
| 1358 | } |
||
| 1359 | } else { |
||
| 1360 | if (!MRI.hasOneUse(DefMI.getOperand(DefIdx).getReg())) |
||
| 1361 | break; |
||
| 1362 | } |
||
| 1363 | |||
| 1364 | ++I; |
||
| 1365 | } |
||
| 1366 | |||
| 1367 | if (IsDead) |
||
| 1368 | DeadInsts.push_back(&DefMI); |
||
| 1369 | } |
||
| 1370 | } |
||
| 1371 | |||
| 1372 | /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be |
||
| 1373 | /// dead due to MI being killed, then mark DefMI as dead too. |
||
| 1374 | /// Some of the combines (extends(trunc)), try to walk through redundant |
||
| 1375 | /// copies in between the extends and the truncs, and this attempts to collect |
||
| 1376 | /// the in between copies if they're dead. |
||
| 1377 | void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI, |
||
| 1378 | SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
| 1379 | unsigned DefIdx = 0) { |
||
| 1380 | DeadInsts.push_back(&MI); |
||
| 1381 | markDefDead(MI, DefMI, DeadInsts, DefIdx); |
||
| 1382 | } |
||
| 1383 | |||
| 1384 | /// Erase the dead instructions in the list and call the observer hooks. |
||
| 1385 | /// Normally the Legalizer will deal with erasing instructions that have been |
||
| 1386 | /// marked dead. However, for the trunc(ext(x)) cases we can end up trying to |
||
| 1387 | /// process instructions which have been marked dead, but otherwise break the |
||
| 1388 | /// MIR by introducing multiple vreg defs. For those cases, allow the combines |
||
| 1389 | /// to explicitly delete the instructions before we run into trouble. |
||
| 1390 | void deleteMarkedDeadInsts(SmallVectorImpl<MachineInstr *> &DeadInsts, |
||
| 1391 | GISelObserverWrapper &WrapperObserver) { |
||
| 1392 | for (auto *DeadMI : DeadInsts) { |
||
| 1393 | LLVM_DEBUG(dbgs() << *DeadMI << "Is dead, eagerly deleting\n"); |
||
| 1394 | WrapperObserver.erasingInstr(*DeadMI); |
||
| 1395 | DeadMI->eraseFromParent(); |
||
| 1396 | } |
||
| 1397 | DeadInsts.clear(); |
||
| 1398 | } |
||
| 1399 | |||
| 1400 | /// Checks if the target legalizer info has specified anything about the |
||
| 1401 | /// instruction, or if unsupported. |
||
| 1402 | bool isInstUnsupported(const LegalityQuery &Query) const { |
||
| 1403 | using namespace LegalizeActions; |
||
| 1404 | auto Step = LI.getAction(Query); |
||
| 1405 | return Step.Action == Unsupported || Step.Action == NotFound; |
||
| 1406 | } |
||
| 1407 | |||
| 1408 | bool isInstLegal(const LegalityQuery &Query) const { |
||
| 1409 | return LI.getAction(Query).Action == LegalizeActions::Legal; |
||
| 1410 | } |
||
| 1411 | |||
| 1412 | bool isConstantUnsupported(LLT Ty) const { |
||
| 1413 | if (!Ty.isVector()) |
||
| 1414 | return isInstUnsupported({TargetOpcode::G_CONSTANT, {Ty}}); |
||
| 1415 | |||
| 1416 | LLT EltTy = Ty.getElementType(); |
||
| 1417 | return isInstUnsupported({TargetOpcode::G_CONSTANT, {EltTy}}) || |
||
| 1418 | isInstUnsupported({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}); |
||
| 1419 | } |
||
| 1420 | |||
| 1421 | /// Looks through copy instructions and returns the actual |
||
| 1422 | /// source register. |
||
| 1423 | Register lookThroughCopyInstrs(Register Reg) { |
||
| 1424 | using namespace llvm::MIPatternMatch; |
||
| 1425 | |||
| 1426 | Register TmpReg; |
||
| 1427 | while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) { |
||
| 1428 | if (MRI.getType(TmpReg).isValid()) |
||
| 1429 | Reg = TmpReg; |
||
| 1430 | else |
||
| 1431 | break; |
||
| 1432 | } |
||
| 1433 | return Reg; |
||
| 1434 | } |
||
| 1435 | }; |
||
| 1436 | |||
| 1437 | } // namespace llvm |
||
| 1438 | |||
| 1439 | #endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H |