Details | Last modification | View Log | RSS feed
| Rev | Author | Line No. | Line |
|---|---|---|---|
| 14 | pmbaty | 1 | //===------------------------- LSUnit.h --------------------------*- C++-*-===// |
| 2 | // |
||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
| 6 | // |
||
| 7 | //===----------------------------------------------------------------------===// |
||
| 8 | /// \file |
||
| 9 | /// |
||
| 10 | /// A Load/Store unit class that models load/store queues and that implements |
||
| 11 | /// a simple weak memory consistency model. |
||
| 12 | /// |
||
| 13 | //===----------------------------------------------------------------------===// |
||
| 14 | |||
| 15 | #ifndef LLVM_MCA_HARDWAREUNITS_LSUNIT_H |
||
| 16 | #define LLVM_MCA_HARDWAREUNITS_LSUNIT_H |
||
| 17 | |||
| 18 | #include "llvm/ADT/DenseMap.h" |
||
| 19 | #include "llvm/ADT/SmallVector.h" |
||
| 20 | #include "llvm/MC/MCSchedule.h" |
||
| 21 | #include "llvm/MCA/HardwareUnits/HardwareUnit.h" |
||
| 22 | #include "llvm/MCA/Instruction.h" |
||
| 23 | |||
| 24 | namespace llvm { |
||
| 25 | namespace mca { |
||
| 26 | |||
| 27 | /// A node of a memory dependency graph. A MemoryGroup describes a set of |
||
| 28 | /// instructions with same memory dependencies. |
||
| 29 | /// |
||
| 30 | /// By construction, instructions of a MemoryGroup don't depend on each other. |
||
| 31 | /// At dispatch stage, instructions are mapped by the LSUnit to MemoryGroups. |
||
| 32 | /// A Memory group identifier is then stored as a "token" in field |
||
| 33 | /// Instruction::LSUTokenID of each dispatched instructions. That token is used |
||
| 34 | /// internally by the LSUnit to track memory dependencies. |
||
| 35 | class MemoryGroup { |
||
| 36 | unsigned NumPredecessors; |
||
| 37 | unsigned NumExecutingPredecessors; |
||
| 38 | unsigned NumExecutedPredecessors; |
||
| 39 | |||
| 40 | unsigned NumInstructions; |
||
| 41 | unsigned NumExecuting; |
||
| 42 | unsigned NumExecuted; |
||
| 43 | // Successors that are in a order dependency with this group. |
||
| 44 | SmallVector<MemoryGroup *, 4> OrderSucc; |
||
| 45 | // Successors that are in a data dependency with this group. |
||
| 46 | SmallVector<MemoryGroup *, 4> DataSucc; |
||
| 47 | |||
| 48 | CriticalDependency CriticalPredecessor; |
||
| 49 | InstRef CriticalMemoryInstruction; |
||
| 50 | |||
| 51 | MemoryGroup(const MemoryGroup &) = delete; |
||
| 52 | MemoryGroup &operator=(const MemoryGroup &) = delete; |
||
| 53 | |||
| 54 | public: |
||
| 55 | MemoryGroup() |
||
| 56 | : NumPredecessors(0), NumExecutingPredecessors(0), |
||
| 57 | NumExecutedPredecessors(0), NumInstructions(0), NumExecuting(0), |
||
| 58 | NumExecuted(0), CriticalPredecessor() {} |
||
| 59 | MemoryGroup(MemoryGroup &&) = default; |
||
| 60 | |||
| 61 | size_t getNumSuccessors() const { |
||
| 62 | return OrderSucc.size() + DataSucc.size(); |
||
| 63 | } |
||
| 64 | unsigned getNumPredecessors() const { return NumPredecessors; } |
||
| 65 | unsigned getNumExecutingPredecessors() const { |
||
| 66 | return NumExecutingPredecessors; |
||
| 67 | } |
||
| 68 | unsigned getNumExecutedPredecessors() const { |
||
| 69 | return NumExecutedPredecessors; |
||
| 70 | } |
||
| 71 | unsigned getNumInstructions() const { return NumInstructions; } |
||
| 72 | unsigned getNumExecuting() const { return NumExecuting; } |
||
| 73 | unsigned getNumExecuted() const { return NumExecuted; } |
||
| 74 | |||
| 75 | const InstRef &getCriticalMemoryInstruction() const { |
||
| 76 | return CriticalMemoryInstruction; |
||
| 77 | } |
||
| 78 | const CriticalDependency &getCriticalPredecessor() const { |
||
| 79 | return CriticalPredecessor; |
||
| 80 | } |
||
| 81 | |||
| 82 | void addSuccessor(MemoryGroup *Group, bool IsDataDependent) { |
||
| 83 | // Do not need to add a dependency if there is no data |
||
| 84 | // dependency and all instructions from this group have been |
||
| 85 | // issued already. |
||
| 86 | if (!IsDataDependent && isExecuting()) |
||
| 87 | return; |
||
| 88 | |||
| 89 | Group->NumPredecessors++; |
||
| 90 | assert(!isExecuted() && "Should have been removed!"); |
||
| 91 | if (isExecuting()) |
||
| 92 | Group->onGroupIssued(CriticalMemoryInstruction, IsDataDependent); |
||
| 93 | |||
| 94 | if (IsDataDependent) |
||
| 95 | DataSucc.emplace_back(Group); |
||
| 96 | else |
||
| 97 | OrderSucc.emplace_back(Group); |
||
| 98 | } |
||
| 99 | |||
| 100 | bool isWaiting() const { |
||
| 101 | return NumPredecessors > |
||
| 102 | (NumExecutingPredecessors + NumExecutedPredecessors); |
||
| 103 | } |
||
| 104 | bool isPending() const { |
||
| 105 | return NumExecutingPredecessors && |
||
| 106 | ((NumExecutedPredecessors + NumExecutingPredecessors) == |
||
| 107 | NumPredecessors); |
||
| 108 | } |
||
| 109 | bool isReady() const { return NumExecutedPredecessors == NumPredecessors; } |
||
| 110 | bool isExecuting() const { |
||
| 111 | return NumExecuting && (NumExecuting == (NumInstructions - NumExecuted)); |
||
| 112 | } |
||
| 113 | bool isExecuted() const { return NumInstructions == NumExecuted; } |
||
| 114 | |||
| 115 | void onGroupIssued(const InstRef &IR, bool ShouldUpdateCriticalDep) { |
||
| 116 | assert(!isReady() && "Unexpected group-start event!"); |
||
| 117 | NumExecutingPredecessors++; |
||
| 118 | |||
| 119 | if (!ShouldUpdateCriticalDep) |
||
| 120 | return; |
||
| 121 | |||
| 122 | unsigned Cycles = IR.getInstruction()->getCyclesLeft(); |
||
| 123 | if (CriticalPredecessor.Cycles < Cycles) { |
||
| 124 | CriticalPredecessor.IID = IR.getSourceIndex(); |
||
| 125 | CriticalPredecessor.Cycles = Cycles; |
||
| 126 | } |
||
| 127 | } |
||
| 128 | |||
| 129 | void onGroupExecuted() { |
||
| 130 | assert(!isReady() && "Inconsistent state found!"); |
||
| 131 | NumExecutingPredecessors--; |
||
| 132 | NumExecutedPredecessors++; |
||
| 133 | } |
||
| 134 | |||
| 135 | void onInstructionIssued(const InstRef &IR) { |
||
| 136 | assert(!isExecuting() && "Invalid internal state!"); |
||
| 137 | ++NumExecuting; |
||
| 138 | |||
| 139 | // update the CriticalMemDep. |
||
| 140 | const Instruction &IS = *IR.getInstruction(); |
||
| 141 | if ((bool)CriticalMemoryInstruction) { |
||
| 142 | const Instruction &OtherIS = *CriticalMemoryInstruction.getInstruction(); |
||
| 143 | if (OtherIS.getCyclesLeft() < IS.getCyclesLeft()) |
||
| 144 | CriticalMemoryInstruction = IR; |
||
| 145 | } else { |
||
| 146 | CriticalMemoryInstruction = IR; |
||
| 147 | } |
||
| 148 | |||
| 149 | if (!isExecuting()) |
||
| 150 | return; |
||
| 151 | |||
| 152 | // Notify successors that this group started execution. |
||
| 153 | for (MemoryGroup *MG : OrderSucc) { |
||
| 154 | MG->onGroupIssued(CriticalMemoryInstruction, false); |
||
| 155 | // Release the order dependency with this group. |
||
| 156 | MG->onGroupExecuted(); |
||
| 157 | } |
||
| 158 | |||
| 159 | for (MemoryGroup *MG : DataSucc) |
||
| 160 | MG->onGroupIssued(CriticalMemoryInstruction, true); |
||
| 161 | } |
||
| 162 | |||
| 163 | void onInstructionExecuted(const InstRef &IR) { |
||
| 164 | assert(isReady() && !isExecuted() && "Invalid internal state!"); |
||
| 165 | --NumExecuting; |
||
| 166 | ++NumExecuted; |
||
| 167 | |||
| 168 | if (CriticalMemoryInstruction && |
||
| 169 | CriticalMemoryInstruction.getSourceIndex() == IR.getSourceIndex()) { |
||
| 170 | CriticalMemoryInstruction.invalidate(); |
||
| 171 | } |
||
| 172 | |||
| 173 | if (!isExecuted()) |
||
| 174 | return; |
||
| 175 | |||
| 176 | // Notify data dependent successors that this group has finished execution. |
||
| 177 | for (MemoryGroup *MG : DataSucc) |
||
| 178 | MG->onGroupExecuted(); |
||
| 179 | } |
||
| 180 | |||
| 181 | void addInstruction() { |
||
| 182 | assert(!getNumSuccessors() && "Cannot add instructions to this group!"); |
||
| 183 | ++NumInstructions; |
||
| 184 | } |
||
| 185 | |||
| 186 | void cycleEvent() { |
||
| 187 | if (isWaiting() && CriticalPredecessor.Cycles) |
||
| 188 | CriticalPredecessor.Cycles--; |
||
| 189 | } |
||
| 190 | }; |
||
| 191 | |||
| 192 | /// Abstract base interface for LS (load/store) units in llvm-mca. |
||
| 193 | class LSUnitBase : public HardwareUnit { |
||
| 194 | /// Load queue size. |
||
| 195 | /// |
||
| 196 | /// A value of zero for this field means that the load queue is unbounded. |
||
| 197 | /// Processor models can declare the size of a load queue via tablegen (see |
||
| 198 | /// the definition of tablegen class LoadQueue in |
||
| 199 | /// llvm/Target/TargetSchedule.td). |
||
| 200 | unsigned LQSize; |
||
| 201 | |||
| 202 | /// Load queue size. |
||
| 203 | /// |
||
| 204 | /// A value of zero for this field means that the store queue is unbounded. |
||
| 205 | /// Processor models can declare the size of a store queue via tablegen (see |
||
| 206 | /// the definition of tablegen class StoreQueue in |
||
| 207 | /// llvm/Target/TargetSchedule.td). |
||
| 208 | unsigned SQSize; |
||
| 209 | |||
| 210 | unsigned UsedLQEntries; |
||
| 211 | unsigned UsedSQEntries; |
||
| 212 | |||
| 213 | /// True if loads don't alias with stores. |
||
| 214 | /// |
||
| 215 | /// By default, the LS unit assumes that loads and stores don't alias with |
||
| 216 | /// eachother. If this field is set to false, then loads are always assumed to |
||
| 217 | /// alias with stores. |
||
| 218 | const bool NoAlias; |
||
| 219 | |||
| 220 | /// Used to map group identifiers to MemoryGroups. |
||
| 221 | DenseMap<unsigned, std::unique_ptr<MemoryGroup>> Groups; |
||
| 222 | unsigned NextGroupID; |
||
| 223 | |||
| 224 | public: |
||
| 225 | LSUnitBase(const MCSchedModel &SM, unsigned LoadQueueSize, |
||
| 226 | unsigned StoreQueueSize, bool AssumeNoAlias); |
||
| 227 | |||
| 228 | virtual ~LSUnitBase(); |
||
| 229 | |||
| 230 | /// Returns the total number of entries in the load queue. |
||
| 231 | unsigned getLoadQueueSize() const { return LQSize; } |
||
| 232 | |||
| 233 | /// Returns the total number of entries in the store queue. |
||
| 234 | unsigned getStoreQueueSize() const { return SQSize; } |
||
| 235 | |||
| 236 | unsigned getUsedLQEntries() const { return UsedLQEntries; } |
||
| 237 | unsigned getUsedSQEntries() const { return UsedSQEntries; } |
||
| 238 | void acquireLQSlot() { ++UsedLQEntries; } |
||
| 239 | void acquireSQSlot() { ++UsedSQEntries; } |
||
| 240 | void releaseLQSlot() { --UsedLQEntries; } |
||
| 241 | void releaseSQSlot() { --UsedSQEntries; } |
||
| 242 | |||
| 243 | bool assumeNoAlias() const { return NoAlias; } |
||
| 244 | |||
| 245 | enum Status { |
||
| 246 | LSU_AVAILABLE = 0, |
||
| 247 | LSU_LQUEUE_FULL, // Load Queue unavailable |
||
| 248 | LSU_SQUEUE_FULL // Store Queue unavailable |
||
| 249 | }; |
||
| 250 | |||
| 251 | /// This method checks the availability of the load/store buffers. |
||
| 252 | /// |
||
| 253 | /// Returns LSU_AVAILABLE if there are enough load/store queue entries to |
||
| 254 | /// accomodate instruction IR. By default, LSU_AVAILABLE is returned if IR is |
||
| 255 | /// not a memory operation. |
||
| 256 | virtual Status isAvailable(const InstRef &IR) const = 0; |
||
| 257 | |||
| 258 | /// Allocates LS resources for instruction IR. |
||
| 259 | /// |
||
| 260 | /// This method assumes that a previous call to `isAvailable(IR)` succeeded |
||
| 261 | /// with a LSUnitBase::Status value of LSU_AVAILABLE. |
||
| 262 | /// Returns the GroupID associated with this instruction. That value will be |
||
| 263 | /// used to set the LSUTokenID field in class Instruction. |
||
| 264 | virtual unsigned dispatch(const InstRef &IR) = 0; |
||
| 265 | |||
| 266 | bool isSQEmpty() const { return !UsedSQEntries; } |
||
| 267 | bool isLQEmpty() const { return !UsedLQEntries; } |
||
| 268 | bool isSQFull() const { return SQSize && SQSize == UsedSQEntries; } |
||
| 269 | bool isLQFull() const { return LQSize && LQSize == UsedLQEntries; } |
||
| 270 | |||
| 271 | bool isValidGroupID(unsigned Index) const { |
||
| 272 | return Index && (Groups.find(Index) != Groups.end()); |
||
| 273 | } |
||
| 274 | |||
| 275 | /// Check if a peviously dispatched instruction IR is now ready for execution. |
||
| 276 | bool isReady(const InstRef &IR) const { |
||
| 277 | unsigned GroupID = IR.getInstruction()->getLSUTokenID(); |
||
| 278 | const MemoryGroup &Group = getGroup(GroupID); |
||
| 279 | return Group.isReady(); |
||
| 280 | } |
||
| 281 | |||
| 282 | /// Check if instruction IR only depends on memory instructions that are |
||
| 283 | /// currently executing. |
||
| 284 | bool isPending(const InstRef &IR) const { |
||
| 285 | unsigned GroupID = IR.getInstruction()->getLSUTokenID(); |
||
| 286 | const MemoryGroup &Group = getGroup(GroupID); |
||
| 287 | return Group.isPending(); |
||
| 288 | } |
||
| 289 | |||
| 290 | /// Check if instruction IR is still waiting on memory operations, and the |
||
| 291 | /// wait time is still unknown. |
||
| 292 | bool isWaiting(const InstRef &IR) const { |
||
| 293 | unsigned GroupID = IR.getInstruction()->getLSUTokenID(); |
||
| 294 | const MemoryGroup &Group = getGroup(GroupID); |
||
| 295 | return Group.isWaiting(); |
||
| 296 | } |
||
| 297 | |||
| 298 | bool hasDependentUsers(const InstRef &IR) const { |
||
| 299 | unsigned GroupID = IR.getInstruction()->getLSUTokenID(); |
||
| 300 | const MemoryGroup &Group = getGroup(GroupID); |
||
| 301 | return !Group.isExecuted() && Group.getNumSuccessors(); |
||
| 302 | } |
||
| 303 | |||
| 304 | const MemoryGroup &getGroup(unsigned Index) const { |
||
| 305 | assert(isValidGroupID(Index) && "Group doesn't exist!"); |
||
| 306 | return *Groups.find(Index)->second; |
||
| 307 | } |
||
| 308 | |||
| 309 | MemoryGroup &getGroup(unsigned Index) { |
||
| 310 | assert(isValidGroupID(Index) && "Group doesn't exist!"); |
||
| 311 | return *Groups.find(Index)->second; |
||
| 312 | } |
||
| 313 | |||
| 314 | unsigned createMemoryGroup() { |
||
| 315 | Groups.insert( |
||
| 316 | std::make_pair(NextGroupID, std::make_unique<MemoryGroup>())); |
||
| 317 | return NextGroupID++; |
||
| 318 | } |
||
| 319 | |||
| 320 | virtual void onInstructionExecuted(const InstRef &IR); |
||
| 321 | |||
| 322 | // Loads are tracked by the LDQ (load queue) from dispatch until completion. |
||
| 323 | // Stores are tracked by the STQ (store queue) from dispatch until commitment. |
||
| 324 | // By default we conservatively assume that the LDQ receives a load at |
||
| 325 | // dispatch. Loads leave the LDQ at retirement stage. |
||
| 326 | virtual void onInstructionRetired(const InstRef &IR); |
||
| 327 | |||
| 328 | virtual void onInstructionIssued(const InstRef &IR) { |
||
| 329 | unsigned GroupID = IR.getInstruction()->getLSUTokenID(); |
||
| 330 | Groups[GroupID]->onInstructionIssued(IR); |
||
| 331 | } |
||
| 332 | |||
| 333 | virtual void cycleEvent(); |
||
| 334 | |||
| 335 | #ifndef NDEBUG |
||
| 336 | void dump() const; |
||
| 337 | #endif |
||
| 338 | }; |
||
| 339 | |||
| 340 | /// Default Load/Store Unit (LS Unit) for simulated processors. |
||
| 341 | /// |
||
| 342 | /// Each load (or store) consumes one entry in the load (or store) queue. |
||
| 343 | /// |
||
| 344 | /// Rules are: |
||
| 345 | /// 1) A younger load is allowed to pass an older load only if there are no |
||
| 346 | /// stores nor barriers in between the two loads. |
||
| 347 | /// 2) An younger store is not allowed to pass an older store. |
||
| 348 | /// 3) A younger store is not allowed to pass an older load. |
||
| 349 | /// 4) A younger load is allowed to pass an older store only if the load does |
||
| 350 | /// not alias with the store. |
||
| 351 | /// |
||
| 352 | /// This class optimistically assumes that loads don't alias store operations. |
||
| 353 | /// Under this assumption, younger loads are always allowed to pass older |
||
| 354 | /// stores (this would only affects rule 4). |
||
| 355 | /// Essentially, this class doesn't perform any sort alias analysis to |
||
| 356 | /// identify aliasing loads and stores. |
||
| 357 | /// |
||
| 358 | /// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be |
||
| 359 | /// set to `false` by the constructor of LSUnit. |
||
| 360 | /// |
||
| 361 | /// Note that this class doesn't know about the existence of different memory |
||
| 362 | /// types for memory operations (example: write-through, write-combining, etc.). |
||
| 363 | /// Derived classes are responsible for implementing that extra knowledge, and |
||
| 364 | /// provide different sets of rules for loads and stores by overriding method |
||
| 365 | /// `isReady()`. |
||
| 366 | /// To emulate a write-combining memory type, rule 2. must be relaxed in a |
||
| 367 | /// derived class to enable the reordering of non-aliasing store operations. |
||
| 368 | /// |
||
| 369 | /// No assumptions are made by this class on the size of the store buffer. This |
||
| 370 | /// class doesn't know how to identify cases where store-to-load forwarding may |
||
| 371 | /// occur. |
||
| 372 | /// |
||
| 373 | /// LSUnit doesn't attempt to predict whether a load or store hits or misses |
||
| 374 | /// the L1 cache. To be more specific, LSUnit doesn't know anything about |
||
| 375 | /// cache hierarchy and memory types. |
||
| 376 | /// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the |
||
| 377 | /// scheduling model provides an "optimistic" load-to-use latency (which usually |
||
| 378 | /// matches the load-to-use latency for when there is a hit in the L1D). |
||
| 379 | /// Derived classes may expand this knowledge. |
||
| 380 | /// |
||
| 381 | /// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor |
||
| 382 | /// memory-barrier like instructions. |
||
| 383 | /// LSUnit conservatively assumes that an instruction which `mayLoad` and has |
||
| 384 | /// `unmodeled side effects` behave like a "soft" load-barrier. That means, it |
||
| 385 | /// serializes loads without forcing a flush of the load queue. |
||
| 386 | /// Similarly, instructions that both `mayStore` and have `unmodeled side |
||
| 387 | /// effects` are treated like store barriers. A full memory |
||
| 388 | /// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side |
||
| 389 | /// effects. This is obviously inaccurate, but this is the best that we can do |
||
| 390 | /// at the moment. |
||
| 391 | /// |
||
| 392 | /// Each load/store barrier consumes one entry in the load/store queue. A |
||
| 393 | /// load/store barrier enforces ordering of loads/stores: |
||
| 394 | /// - A younger load cannot pass a load barrier. |
||
| 395 | /// - A younger store cannot pass a store barrier. |
||
| 396 | /// |
||
| 397 | /// A younger load has to wait for the memory load barrier to execute. |
||
| 398 | /// A load/store barrier is "executed" when it becomes the oldest entry in |
||
| 399 | /// the load/store queue(s). That also means, all the older loads/stores have |
||
| 400 | /// already been executed. |
||
| 401 | class LSUnit : public LSUnitBase { |
||
| 402 | // This class doesn't know about the latency of a load instruction. So, it |
||
| 403 | // conservatively/pessimistically assumes that the latency of a load opcode |
||
| 404 | // matches the instruction latency. |
||
| 405 | // |
||
| 406 | // FIXME: In the absence of cache misses (i.e. L1I/L1D/iTLB/dTLB hits/misses), |
||
| 407 | // and load/store conflicts, the latency of a load is determined by the depth |
||
| 408 | // of the load pipeline. So, we could use field `LoadLatency` in the |
||
| 409 | // MCSchedModel to model that latency. |
||
| 410 | // Field `LoadLatency` often matches the so-called 'load-to-use' latency from |
||
| 411 | // L1D, and it usually already accounts for any extra latency due to data |
||
| 412 | // forwarding. |
||
| 413 | // When doing throughput analysis, `LoadLatency` is likely to |
||
| 414 | // be a better predictor of load latency than instruction latency. This is |
||
| 415 | // particularly true when simulating code with temporal/spatial locality of |
||
| 416 | // memory accesses. |
||
| 417 | // Using `LoadLatency` (instead of the instruction latency) is also expected |
||
| 418 | // to improve the load queue allocation for long latency instructions with |
||
| 419 | // folded memory operands (See PR39829). |
||
| 420 | // |
||
| 421 | // FIXME: On some processors, load/store operations are split into multiple |
||
| 422 | // uOps. For example, X86 AMD Jaguar natively supports 128-bit data types, but |
||
| 423 | // not 256-bit data types. So, a 256-bit load is effectively split into two |
||
| 424 | // 128-bit loads, and each split load consumes one 'LoadQueue' entry. For |
||
| 425 | // simplicity, this class optimistically assumes that a load instruction only |
||
| 426 | // consumes one entry in the LoadQueue. Similarly, store instructions only |
||
| 427 | // consume a single entry in the StoreQueue. |
||
| 428 | // In future, we should reassess the quality of this design, and consider |
||
| 429 | // alternative approaches that let instructions specify the number of |
||
| 430 | // load/store queue entries which they consume at dispatch stage (See |
||
| 431 | // PR39830). |
||
| 432 | // |
||
| 433 | // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is |
||
| 434 | // conservatively treated as a store barrier. It forces older store to be |
||
| 435 | // executed before newer stores are issued. |
||
| 436 | // |
||
| 437 | // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is |
||
| 438 | // conservatively treated as a load barrier. It forces older loads to execute |
||
| 439 | // before newer loads are issued. |
||
| 440 | unsigned CurrentLoadGroupID; |
||
| 441 | unsigned CurrentLoadBarrierGroupID; |
||
| 442 | unsigned CurrentStoreGroupID; |
||
| 443 | unsigned CurrentStoreBarrierGroupID; |
||
| 444 | |||
| 445 | public: |
||
| 446 | LSUnit(const MCSchedModel &SM) |
||
| 447 | : LSUnit(SM, /* LQSize */ 0, /* SQSize */ 0, /* NoAlias */ false) {} |
||
| 448 | LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ) |
||
| 449 | : LSUnit(SM, LQ, SQ, /* NoAlias */ false) {} |
||
| 450 | LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias) |
||
| 451 | : LSUnitBase(SM, LQ, SQ, AssumeNoAlias), CurrentLoadGroupID(0), |
||
| 452 | CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0), |
||
| 453 | CurrentStoreBarrierGroupID(0) {} |
||
| 454 | |||
| 455 | /// Returns LSU_AVAILABLE if there are enough load/store queue entries to |
||
| 456 | /// accomodate instruction IR. |
||
| 457 | Status isAvailable(const InstRef &IR) const override; |
||
| 458 | |||
| 459 | /// Allocates LS resources for instruction IR. |
||
| 460 | /// |
||
| 461 | /// This method assumes that a previous call to `isAvailable(IR)` succeeded |
||
| 462 | /// returning LSU_AVAILABLE. |
||
| 463 | /// |
||
| 464 | /// Rules are: |
||
| 465 | /// By default, rules are: |
||
| 466 | /// 1. A store may not pass a previous store. |
||
| 467 | /// 2. A load may not pass a previous store unless flag 'NoAlias' is set. |
||
| 468 | /// 3. A load may pass a previous load. |
||
| 469 | /// 4. A store may not pass a previous load (regardless of flag 'NoAlias'). |
||
| 470 | /// 5. A load has to wait until an older load barrier is fully executed. |
||
| 471 | /// 6. A store has to wait until an older store barrier is fully executed. |
||
| 472 | unsigned dispatch(const InstRef &IR) override; |
||
| 473 | |||
| 474 | void onInstructionExecuted(const InstRef &IR) override; |
||
| 475 | }; |
||
| 476 | |||
| 477 | } // namespace mca |
||
| 478 | } // namespace llvm |
||
| 479 | |||
| 480 | #endif // LLVM_MCA_HARDWAREUNITS_LSUNIT_H |