Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line | 
|---|---|---|---|
| 14 | pmbaty | 1 | //===------------------------- LSUnit.h --------------------------*- C++-*-===// | 
| 2 | // | ||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | ||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| 6 | // | ||
| 7 | //===----------------------------------------------------------------------===// | ||
| 8 | /// \file | ||
| 9 | /// | ||
| 10 | /// A Load/Store unit class that models load/store queues and that implements | ||
| 11 | /// a simple weak memory consistency model. | ||
| 12 | /// | ||
| 13 | //===----------------------------------------------------------------------===// | ||
| 14 | |||
| 15 | #ifndef LLVM_MCA_HARDWAREUNITS_LSUNIT_H | ||
| 16 | #define LLVM_MCA_HARDWAREUNITS_LSUNIT_H | ||
| 17 | |||
| 18 | #include "llvm/ADT/DenseMap.h" | ||
| 19 | #include "llvm/ADT/SmallVector.h" | ||
| 20 | #include "llvm/MC/MCSchedule.h" | ||
| 21 | #include "llvm/MCA/HardwareUnits/HardwareUnit.h" | ||
| 22 | #include "llvm/MCA/Instruction.h" | ||
| 23 | |||
| 24 | namespace llvm { | ||
| 25 | namespace mca { | ||
| 26 | |||
| 27 | /// A node of a memory dependency graph. A MemoryGroup describes a set of | ||
| 28 | /// instructions with same memory dependencies. | ||
| 29 | /// | ||
| 30 | /// By construction, instructions of a MemoryGroup don't depend on each other. | ||
| 31 | /// At dispatch stage, instructions are mapped by the LSUnit to MemoryGroups. | ||
| 32 | /// A Memory group identifier is then stored as a "token" in field | ||
| 33 | /// Instruction::LSUTokenID of each dispatched instructions. That token is used | ||
| 34 | /// internally by the LSUnit to track memory dependencies. | ||
| 35 | class MemoryGroup { | ||
| 36 | unsigned NumPredecessors; | ||
| 37 | unsigned NumExecutingPredecessors; | ||
| 38 | unsigned NumExecutedPredecessors; | ||
| 39 | |||
| 40 | unsigned NumInstructions; | ||
| 41 | unsigned NumExecuting; | ||
| 42 | unsigned NumExecuted; | ||
| 43 |   // Successors that are in a order dependency with this group. | ||
| 44 | SmallVector<MemoryGroup *, 4> OrderSucc; | ||
| 45 |   // Successors that are in a data dependency with this group. | ||
| 46 | SmallVector<MemoryGroup *, 4> DataSucc; | ||
| 47 | |||
| 48 |   CriticalDependency CriticalPredecessor; | ||
| 49 |   InstRef CriticalMemoryInstruction; | ||
| 50 | |||
| 51 | MemoryGroup(const MemoryGroup &) = delete; | ||
| 52 | MemoryGroup &operator=(const MemoryGroup &) = delete; | ||
| 53 | |||
| 54 | public: | ||
| 55 | MemoryGroup() | ||
| 56 | : NumPredecessors(0), NumExecutingPredecessors(0), | ||
| 57 | NumExecutedPredecessors(0), NumInstructions(0), NumExecuting(0), | ||
| 58 | NumExecuted(0), CriticalPredecessor() {} | ||
| 59 | MemoryGroup(MemoryGroup &&) = default; | ||
| 60 | |||
| 61 | size_t getNumSuccessors() const { | ||
| 62 | return OrderSucc.size() + DataSucc.size(); | ||
| 63 |   } | ||
| 64 | unsigned getNumPredecessors() const { return NumPredecessors; } | ||
| 65 | unsigned getNumExecutingPredecessors() const { | ||
| 66 | return NumExecutingPredecessors; | ||
| 67 |   } | ||
| 68 | unsigned getNumExecutedPredecessors() const { | ||
| 69 | return NumExecutedPredecessors; | ||
| 70 |   } | ||
| 71 | unsigned getNumInstructions() const { return NumInstructions; } | ||
| 72 | unsigned getNumExecuting() const { return NumExecuting; } | ||
| 73 | unsigned getNumExecuted() const { return NumExecuted; } | ||
| 74 | |||
| 75 | const InstRef &getCriticalMemoryInstruction() const { | ||
| 76 | return CriticalMemoryInstruction; | ||
| 77 |   } | ||
| 78 | const CriticalDependency &getCriticalPredecessor() const { | ||
| 79 | return CriticalPredecessor; | ||
| 80 |   } | ||
| 81 | |||
| 82 | void addSuccessor(MemoryGroup *Group, bool IsDataDependent) { | ||
| 83 |     // Do not need to add a dependency if there is no data | ||
| 84 |     // dependency and all instructions from this group have been | ||
| 85 |     // issued already. | ||
| 86 | if (!IsDataDependent && isExecuting()) | ||
| 87 | return; | ||
| 88 | |||
| 89 | Group->NumPredecessors++; | ||
| 90 | assert(!isExecuted() && "Should have been removed!"); | ||
| 91 | if (isExecuting()) | ||
| 92 | Group->onGroupIssued(CriticalMemoryInstruction, IsDataDependent); | ||
| 93 | |||
| 94 | if (IsDataDependent) | ||
| 95 | DataSucc.emplace_back(Group); | ||
| 96 |     else | ||
| 97 | OrderSucc.emplace_back(Group); | ||
| 98 |   } | ||
| 99 | |||
| 100 | bool isWaiting() const { | ||
| 101 | return NumPredecessors > | ||
| 102 | (NumExecutingPredecessors + NumExecutedPredecessors); | ||
| 103 |   } | ||
| 104 | bool isPending() const { | ||
| 105 | return NumExecutingPredecessors && | ||
| 106 | ((NumExecutedPredecessors + NumExecutingPredecessors) == | ||
| 107 | NumPredecessors); | ||
| 108 |   } | ||
| 109 | bool isReady() const { return NumExecutedPredecessors == NumPredecessors; } | ||
| 110 | bool isExecuting() const { | ||
| 111 | return NumExecuting && (NumExecuting == (NumInstructions - NumExecuted)); | ||
| 112 |   } | ||
| 113 | bool isExecuted() const { return NumInstructions == NumExecuted; } | ||
| 114 | |||
| 115 | void onGroupIssued(const InstRef &IR, bool ShouldUpdateCriticalDep) { | ||
| 116 | assert(!isReady() && "Unexpected group-start event!"); | ||
| 117 | NumExecutingPredecessors++; | ||
| 118 | |||
| 119 | if (!ShouldUpdateCriticalDep) | ||
| 120 | return; | ||
| 121 | |||
| 122 | unsigned Cycles = IR.getInstruction()->getCyclesLeft(); | ||
| 123 | if (CriticalPredecessor.Cycles < Cycles) { | ||
| 124 | CriticalPredecessor.IID = IR.getSourceIndex(); | ||
| 125 | CriticalPredecessor.Cycles = Cycles; | ||
| 126 |     } | ||
| 127 |   } | ||
| 128 | |||
| 129 | void onGroupExecuted() { | ||
| 130 | assert(!isReady() && "Inconsistent state found!"); | ||
| 131 | NumExecutingPredecessors--; | ||
| 132 | NumExecutedPredecessors++; | ||
| 133 |   } | ||
| 134 | |||
| 135 | void onInstructionIssued(const InstRef &IR) { | ||
| 136 | assert(!isExecuting() && "Invalid internal state!"); | ||
| 137 | ++NumExecuting; | ||
| 138 | |||
| 139 |     // update the CriticalMemDep. | ||
| 140 | const Instruction &IS = *IR.getInstruction(); | ||
| 141 | if ((bool)CriticalMemoryInstruction) { | ||
| 142 | const Instruction &OtherIS = *CriticalMemoryInstruction.getInstruction(); | ||
| 143 | if (OtherIS.getCyclesLeft() < IS.getCyclesLeft()) | ||
| 144 | CriticalMemoryInstruction = IR; | ||
| 145 | } else { | ||
| 146 | CriticalMemoryInstruction = IR; | ||
| 147 |     } | ||
| 148 | |||
| 149 | if (!isExecuting()) | ||
| 150 | return; | ||
| 151 | |||
| 152 |     // Notify successors that this group started execution. | ||
| 153 | for (MemoryGroup *MG : OrderSucc) { | ||
| 154 | MG->onGroupIssued(CriticalMemoryInstruction, false); | ||
| 155 |       // Release the order dependency with this group. | ||
| 156 | MG->onGroupExecuted(); | ||
| 157 |     } | ||
| 158 | |||
| 159 | for (MemoryGroup *MG : DataSucc) | ||
| 160 | MG->onGroupIssued(CriticalMemoryInstruction, true); | ||
| 161 |   } | ||
| 162 | |||
| 163 | void onInstructionExecuted(const InstRef &IR) { | ||
| 164 | assert(isReady() && !isExecuted() && "Invalid internal state!"); | ||
| 165 | --NumExecuting; | ||
| 166 | ++NumExecuted; | ||
| 167 | |||
| 168 | if (CriticalMemoryInstruction && | ||
| 169 | CriticalMemoryInstruction.getSourceIndex() == IR.getSourceIndex()) { | ||
| 170 | CriticalMemoryInstruction.invalidate(); | ||
| 171 |     } | ||
| 172 | |||
| 173 | if (!isExecuted()) | ||
| 174 | return; | ||
| 175 | |||
| 176 |     // Notify data dependent successors that this group has finished execution. | ||
| 177 | for (MemoryGroup *MG : DataSucc) | ||
| 178 | MG->onGroupExecuted(); | ||
| 179 |   } | ||
| 180 | |||
| 181 | void addInstruction() { | ||
| 182 | assert(!getNumSuccessors() && "Cannot add instructions to this group!"); | ||
| 183 | ++NumInstructions; | ||
| 184 |   } | ||
| 185 | |||
| 186 | void cycleEvent() { | ||
| 187 | if (isWaiting() && CriticalPredecessor.Cycles) | ||
| 188 | CriticalPredecessor.Cycles--; | ||
| 189 |   } | ||
| 190 | }; | ||
| 191 | |||
| 192 | /// Abstract base interface for LS (load/store) units in llvm-mca. | ||
| 193 | class LSUnitBase : public HardwareUnit { | ||
| 194 |   /// Load queue size. | ||
| 195 |   /// | ||
| 196 |   /// A value of zero for this field means that the load queue is unbounded. | ||
| 197 |   /// Processor models can declare the size of a load queue via tablegen (see | ||
| 198 |   /// the definition of tablegen class LoadQueue in | ||
| 199 |   /// llvm/Target/TargetSchedule.td). | ||
| 200 | unsigned LQSize; | ||
| 201 | |||
| 202 |   /// Load queue size. | ||
| 203 |   /// | ||
| 204 |   /// A value of zero for this field means that the store queue is unbounded. | ||
| 205 |   /// Processor models can declare the size of a store queue via tablegen (see | ||
| 206 |   /// the definition of tablegen class StoreQueue in | ||
| 207 |   /// llvm/Target/TargetSchedule.td). | ||
| 208 | unsigned SQSize; | ||
| 209 | |||
| 210 | unsigned UsedLQEntries; | ||
| 211 | unsigned UsedSQEntries; | ||
| 212 | |||
| 213 |   /// True if loads don't alias with stores. | ||
| 214 |   /// | ||
| 215 |   /// By default, the LS unit assumes that loads and stores don't alias with | ||
| 216 |   /// eachother. If this field is set to false, then loads are always assumed to | ||
| 217 |   /// alias with stores. | ||
| 218 | const bool NoAlias; | ||
| 219 | |||
| 220 |   /// Used to map group identifiers to MemoryGroups. | ||
| 221 | DenseMap<unsigned, std::unique_ptr<MemoryGroup>> Groups; | ||
| 222 | unsigned NextGroupID; | ||
| 223 | |||
| 224 | public: | ||
| 225 | LSUnitBase(const MCSchedModel &SM, unsigned LoadQueueSize, | ||
| 226 | unsigned StoreQueueSize, bool AssumeNoAlias); | ||
| 227 | |||
| 228 | virtual ~LSUnitBase(); | ||
| 229 | |||
| 230 |   /// Returns the total number of entries in the load queue. | ||
| 231 | unsigned getLoadQueueSize() const { return LQSize; } | ||
| 232 | |||
| 233 |   /// Returns the total number of entries in the store queue. | ||
| 234 | unsigned getStoreQueueSize() const { return SQSize; } | ||
| 235 | |||
| 236 | unsigned getUsedLQEntries() const { return UsedLQEntries; } | ||
| 237 | unsigned getUsedSQEntries() const { return UsedSQEntries; } | ||
| 238 | void acquireLQSlot() { ++UsedLQEntries; } | ||
| 239 | void acquireSQSlot() { ++UsedSQEntries; } | ||
| 240 | void releaseLQSlot() { --UsedLQEntries; } | ||
| 241 | void releaseSQSlot() { --UsedSQEntries; } | ||
| 242 | |||
| 243 | bool assumeNoAlias() const { return NoAlias; } | ||
| 244 | |||
| 245 | enum Status { | ||
| 246 | LSU_AVAILABLE = 0, | ||
| 247 |     LSU_LQUEUE_FULL, // Load Queue unavailable | ||
| 248 |     LSU_SQUEUE_FULL  // Store Queue unavailable | ||
| 249 | }; | ||
| 250 | |||
| 251 |   /// This method checks the availability of the load/store buffers. | ||
| 252 |   /// | ||
| 253 |   /// Returns LSU_AVAILABLE if there are enough load/store queue entries to | ||
| 254 |   /// accomodate instruction IR. By default, LSU_AVAILABLE is returned if IR is | ||
| 255 |   /// not a memory operation. | ||
| 256 | virtual Status isAvailable(const InstRef &IR) const = 0; | ||
| 257 | |||
| 258 |   /// Allocates LS resources for instruction IR. | ||
| 259 |   /// | ||
| 260 |   /// This method assumes that a previous call to `isAvailable(IR)` succeeded | ||
| 261 |   /// with a LSUnitBase::Status value of LSU_AVAILABLE. | ||
| 262 |   /// Returns the GroupID associated with this instruction. That value will be | ||
| 263 |   /// used to set the LSUTokenID field in class Instruction. | ||
| 264 | virtual unsigned dispatch(const InstRef &IR) = 0; | ||
| 265 | |||
| 266 | bool isSQEmpty() const { return !UsedSQEntries; } | ||
| 267 | bool isLQEmpty() const { return !UsedLQEntries; } | ||
| 268 | bool isSQFull() const { return SQSize && SQSize == UsedSQEntries; } | ||
| 269 | bool isLQFull() const { return LQSize && LQSize == UsedLQEntries; } | ||
| 270 | |||
| 271 | bool isValidGroupID(unsigned Index) const { | ||
| 272 | return Index && (Groups.find(Index) != Groups.end()); | ||
| 273 |   } | ||
| 274 | |||
| 275 |   /// Check if a peviously dispatched instruction IR is now ready for execution. | ||
| 276 | bool isReady(const InstRef &IR) const { | ||
| 277 | unsigned GroupID = IR.getInstruction()->getLSUTokenID(); | ||
| 278 | const MemoryGroup &Group = getGroup(GroupID); | ||
| 279 | return Group.isReady(); | ||
| 280 |   } | ||
| 281 | |||
| 282 |   /// Check if instruction IR only depends on memory instructions that are | ||
| 283 |   /// currently executing. | ||
| 284 | bool isPending(const InstRef &IR) const { | ||
| 285 | unsigned GroupID = IR.getInstruction()->getLSUTokenID(); | ||
| 286 | const MemoryGroup &Group = getGroup(GroupID); | ||
| 287 | return Group.isPending(); | ||
| 288 |   } | ||
| 289 | |||
| 290 |   /// Check if instruction IR is still waiting on memory operations, and the | ||
| 291 |   /// wait time is still unknown. | ||
| 292 | bool isWaiting(const InstRef &IR) const { | ||
| 293 | unsigned GroupID = IR.getInstruction()->getLSUTokenID(); | ||
| 294 | const MemoryGroup &Group = getGroup(GroupID); | ||
| 295 | return Group.isWaiting(); | ||
| 296 |   } | ||
| 297 | |||
| 298 | bool hasDependentUsers(const InstRef &IR) const { | ||
| 299 | unsigned GroupID = IR.getInstruction()->getLSUTokenID(); | ||
| 300 | const MemoryGroup &Group = getGroup(GroupID); | ||
| 301 | return !Group.isExecuted() && Group.getNumSuccessors(); | ||
| 302 |   } | ||
| 303 | |||
| 304 | const MemoryGroup &getGroup(unsigned Index) const { | ||
| 305 | assert(isValidGroupID(Index) && "Group doesn't exist!"); | ||
| 306 | return *Groups.find(Index)->second; | ||
| 307 |   } | ||
| 308 | |||
| 309 | MemoryGroup &getGroup(unsigned Index) { | ||
| 310 | assert(isValidGroupID(Index) && "Group doesn't exist!"); | ||
| 311 | return *Groups.find(Index)->second; | ||
| 312 |   } | ||
| 313 | |||
| 314 | unsigned createMemoryGroup() { | ||
| 315 | Groups.insert( | ||
| 316 | std::make_pair(NextGroupID, std::make_unique<MemoryGroup>())); | ||
| 317 | return NextGroupID++; | ||
| 318 |   } | ||
| 319 | |||
| 320 | virtual void onInstructionExecuted(const InstRef &IR); | ||
| 321 | |||
| 322 |   // Loads are tracked by the LDQ (load queue) from dispatch until completion. | ||
| 323 |   // Stores are tracked by the STQ (store queue) from dispatch until commitment. | ||
| 324 |   // By default we conservatively assume that the LDQ receives a load at | ||
| 325 |   // dispatch. Loads leave the LDQ at retirement stage. | ||
| 326 | virtual void onInstructionRetired(const InstRef &IR); | ||
| 327 | |||
| 328 | virtual void onInstructionIssued(const InstRef &IR) { | ||
| 329 | unsigned GroupID = IR.getInstruction()->getLSUTokenID(); | ||
| 330 | Groups[GroupID]->onInstructionIssued(IR); | ||
| 331 |   } | ||
| 332 | |||
| 333 | virtual void cycleEvent(); | ||
| 334 | |||
| 335 | #ifndef NDEBUG | ||
| 336 | void dump() const; | ||
| 337 | #endif | ||
| 338 | }; | ||
| 339 | |||
| 340 | /// Default Load/Store Unit (LS Unit) for simulated processors. | ||
| 341 | /// | ||
| 342 | /// Each load (or store) consumes one entry in the load (or store) queue. | ||
| 343 | /// | ||
| 344 | /// Rules are: | ||
| 345 | /// 1) A younger load is allowed to pass an older load only if there are no | ||
| 346 | ///    stores nor barriers in between the two loads. | ||
| 347 | /// 2) An younger store is not allowed to pass an older store. | ||
| 348 | /// 3) A younger store is not allowed to pass an older load. | ||
| 349 | /// 4) A younger load is allowed to pass an older store only if the load does | ||
| 350 | ///    not alias with the store. | ||
| 351 | /// | ||
| 352 | /// This class optimistically assumes that loads don't alias store operations. | ||
| 353 | /// Under this assumption, younger loads are always allowed to pass older | ||
| 354 | /// stores (this would only affects rule 4). | ||
| 355 | /// Essentially, this class doesn't perform any sort alias analysis to | ||
| 356 | /// identify aliasing loads and stores. | ||
| 357 | /// | ||
| 358 | /// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be | ||
| 359 | /// set to `false` by the constructor of LSUnit. | ||
| 360 | /// | ||
| 361 | /// Note that this class doesn't know about the existence of different memory | ||
| 362 | /// types for memory operations (example: write-through, write-combining, etc.). | ||
| 363 | /// Derived classes are responsible for implementing that extra knowledge, and | ||
| 364 | /// provide different sets of rules for loads and stores by overriding method | ||
| 365 | /// `isReady()`. | ||
| 366 | /// To emulate a write-combining memory type, rule 2. must be relaxed in a | ||
| 367 | /// derived class to enable the reordering of non-aliasing store operations. | ||
| 368 | /// | ||
| 369 | /// No assumptions are made by this class on the size of the store buffer.  This | ||
| 370 | /// class doesn't know how to identify cases where store-to-load forwarding may | ||
| 371 | /// occur. | ||
| 372 | /// | ||
| 373 | /// LSUnit doesn't attempt to predict whether a load or store hits or misses | ||
| 374 | /// the L1 cache. To be more specific, LSUnit doesn't know anything about | ||
| 375 | /// cache hierarchy and memory types. | ||
| 376 | /// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the | ||
| 377 | /// scheduling model provides an "optimistic" load-to-use latency (which usually | ||
| 378 | /// matches the load-to-use latency for when there is a hit in the L1D). | ||
| 379 | /// Derived classes may expand this knowledge. | ||
| 380 | /// | ||
| 381 | /// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor | ||
| 382 | /// memory-barrier like instructions. | ||
| 383 | /// LSUnit conservatively assumes that an instruction which `mayLoad` and has | ||
| 384 | /// `unmodeled side effects` behave like a "soft" load-barrier. That means, it | ||
| 385 | /// serializes loads without forcing a flush of the load queue. | ||
| 386 | /// Similarly, instructions that both `mayStore` and have `unmodeled side | ||
| 387 | /// effects` are treated like store barriers. A full memory | ||
| 388 | /// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side | ||
| 389 | /// effects. This is obviously inaccurate, but this is the best that we can do | ||
| 390 | /// at the moment. | ||
| 391 | /// | ||
| 392 | /// Each load/store barrier consumes one entry in the load/store queue. A | ||
| 393 | /// load/store barrier enforces ordering of loads/stores: | ||
| 394 | ///  - A younger load cannot pass a load barrier. | ||
| 395 | ///  - A younger store cannot pass a store barrier. | ||
| 396 | /// | ||
| 397 | /// A younger load has to wait for the memory load barrier to execute. | ||
| 398 | /// A load/store barrier is "executed" when it becomes the oldest entry in | ||
| 399 | /// the load/store queue(s). That also means, all the older loads/stores have | ||
| 400 | /// already been executed. | ||
| 401 | class LSUnit : public LSUnitBase { | ||
| 402 |   // This class doesn't know about the latency of a load instruction. So, it | ||
| 403 |   // conservatively/pessimistically assumes that the latency of a load opcode | ||
| 404 |   // matches the instruction latency. | ||
| 405 |   // | ||
| 406 |   // FIXME: In the absence of cache misses (i.e. L1I/L1D/iTLB/dTLB hits/misses), | ||
| 407 |   // and load/store conflicts, the latency of a load is determined by the depth | ||
| 408 |   // of the load pipeline. So, we could use field `LoadLatency` in the | ||
| 409 |   // MCSchedModel to model that latency. | ||
| 410 |   // Field `LoadLatency` often matches the so-called 'load-to-use' latency from | ||
| 411 |   // L1D, and it usually already accounts for any extra latency due to data | ||
| 412 |   // forwarding. | ||
| 413 |   // When doing throughput analysis, `LoadLatency` is likely to | ||
| 414 |   // be a better predictor of load latency than instruction latency. This is | ||
| 415 |   // particularly true when simulating code with temporal/spatial locality of | ||
| 416 |   // memory accesses. | ||
| 417 |   // Using `LoadLatency` (instead of the instruction latency) is also expected | ||
| 418 |   // to improve the load queue allocation for long latency instructions with | ||
| 419 |   // folded memory operands (See PR39829). | ||
| 420 |   // | ||
| 421 |   // FIXME: On some processors, load/store operations are split into multiple | ||
| 422 |   // uOps. For example, X86 AMD Jaguar natively supports 128-bit data types, but | ||
| 423 |   // not 256-bit data types. So, a 256-bit load is effectively split into two | ||
| 424 |   // 128-bit loads, and each split load consumes one 'LoadQueue' entry. For | ||
| 425 |   // simplicity, this class optimistically assumes that a load instruction only | ||
| 426 |   // consumes one entry in the LoadQueue.  Similarly, store instructions only | ||
| 427 |   // consume a single entry in the StoreQueue. | ||
| 428 |   // In future, we should reassess the quality of this design, and consider | ||
| 429 |   // alternative approaches that let instructions specify the number of | ||
| 430 |   // load/store queue entries which they consume at dispatch stage (See | ||
| 431 |   // PR39830). | ||
| 432 |   // | ||
| 433 |   // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is | ||
| 434 |   // conservatively treated as a store barrier. It forces older store to be | ||
| 435 |   // executed before newer stores are issued. | ||
| 436 |   // | ||
| 437 |   // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is | ||
| 438 |   // conservatively treated as a load barrier. It forces older loads to execute | ||
| 439 |   // before newer loads are issued. | ||
| 440 | unsigned CurrentLoadGroupID; | ||
| 441 | unsigned CurrentLoadBarrierGroupID; | ||
| 442 | unsigned CurrentStoreGroupID; | ||
| 443 | unsigned CurrentStoreBarrierGroupID; | ||
| 444 | |||
| 445 | public: | ||
| 446 | LSUnit(const MCSchedModel &SM) | ||
| 447 | : LSUnit(SM, /* LQSize */ 0, /* SQSize */ 0, /* NoAlias */ false) {} | ||
| 448 | LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ) | ||
| 449 | : LSUnit(SM, LQ, SQ, /* NoAlias */ false) {} | ||
| 450 | LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias) | ||
| 451 | : LSUnitBase(SM, LQ, SQ, AssumeNoAlias), CurrentLoadGroupID(0), | ||
| 452 | CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0), | ||
| 453 | CurrentStoreBarrierGroupID(0) {} | ||
| 454 | |||
| 455 |   /// Returns LSU_AVAILABLE if there are enough load/store queue entries to | ||
| 456 |   /// accomodate instruction IR. | ||
| 457 | Status isAvailable(const InstRef &IR) const override; | ||
| 458 | |||
| 459 |   /// Allocates LS resources for instruction IR. | ||
| 460 |   /// | ||
| 461 |   /// This method assumes that a previous call to `isAvailable(IR)` succeeded | ||
| 462 |   /// returning LSU_AVAILABLE. | ||
| 463 |   /// | ||
| 464 |   /// Rules are: | ||
| 465 |   /// By default, rules are: | ||
| 466 |   /// 1. A store may not pass a previous store. | ||
| 467 |   /// 2. A load may not pass a previous store unless flag 'NoAlias' is set. | ||
| 468 |   /// 3. A load may pass a previous load. | ||
| 469 |   /// 4. A store may not pass a previous load (regardless of flag 'NoAlias'). | ||
| 470 |   /// 5. A load has to wait until an older load barrier is fully executed. | ||
| 471 |   /// 6. A store has to wait until an older store barrier is fully executed. | ||
| 472 | unsigned dispatch(const InstRef &IR) override; | ||
| 473 | |||
| 474 | void onInstructionExecuted(const InstRef &IR) override; | ||
| 475 | }; | ||
| 476 | |||
| 477 | } // namespace mca | ||
| 478 | } // namespace llvm | ||
| 479 | |||
| 480 | #endif // LLVM_MCA_HARDWAREUNITS_LSUNIT_H |