Subversion Repositories QNX 8.QNX8 LLVM/Clang compiler suite

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
14 pmbaty 1
//===------------------------- LSUnit.h --------------------------*- C++-*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
/// \file
9
///
10
/// A Load/Store unit class that models load/store queues and that implements
11
/// a simple weak memory consistency model.
12
///
13
//===----------------------------------------------------------------------===//
14
 
15
#ifndef LLVM_MCA_HARDWAREUNITS_LSUNIT_H
16
#define LLVM_MCA_HARDWAREUNITS_LSUNIT_H
17
 
18
#include "llvm/ADT/DenseMap.h"
19
#include "llvm/ADT/SmallVector.h"
20
#include "llvm/MC/MCSchedule.h"
21
#include "llvm/MCA/HardwareUnits/HardwareUnit.h"
22
#include "llvm/MCA/Instruction.h"
23
 
24
namespace llvm {
25
namespace mca {
26
 
27
/// A node of a memory dependency graph. A MemoryGroup describes a set of
28
/// instructions with same memory dependencies.
29
///
30
/// By construction, instructions of a MemoryGroup don't depend on each other.
31
/// At dispatch stage, instructions are mapped by the LSUnit to MemoryGroups.
32
/// A Memory group identifier is then stored as a "token" in field
33
/// Instruction::LSUTokenID of each dispatched instructions. That token is used
34
/// internally by the LSUnit to track memory dependencies.
35
class MemoryGroup {
36
  unsigned NumPredecessors;
37
  unsigned NumExecutingPredecessors;
38
  unsigned NumExecutedPredecessors;
39
 
40
  unsigned NumInstructions;
41
  unsigned NumExecuting;
42
  unsigned NumExecuted;
43
  // Successors that are in a order dependency with this group.
44
  SmallVector<MemoryGroup *, 4> OrderSucc;
45
  // Successors that are in a data dependency with this group.
46
  SmallVector<MemoryGroup *, 4> DataSucc;
47
 
48
  CriticalDependency CriticalPredecessor;
49
  InstRef CriticalMemoryInstruction;
50
 
51
  MemoryGroup(const MemoryGroup &) = delete;
52
  MemoryGroup &operator=(const MemoryGroup &) = delete;
53
 
54
public:
55
  MemoryGroup()
56
      : NumPredecessors(0), NumExecutingPredecessors(0),
57
        NumExecutedPredecessors(0), NumInstructions(0), NumExecuting(0),
58
        NumExecuted(0), CriticalPredecessor() {}
59
  MemoryGroup(MemoryGroup &&) = default;
60
 
61
  size_t getNumSuccessors() const {
62
    return OrderSucc.size() + DataSucc.size();
63
  }
64
  unsigned getNumPredecessors() const { return NumPredecessors; }
65
  unsigned getNumExecutingPredecessors() const {
66
    return NumExecutingPredecessors;
67
  }
68
  unsigned getNumExecutedPredecessors() const {
69
    return NumExecutedPredecessors;
70
  }
71
  unsigned getNumInstructions() const { return NumInstructions; }
72
  unsigned getNumExecuting() const { return NumExecuting; }
73
  unsigned getNumExecuted() const { return NumExecuted; }
74
 
75
  const InstRef &getCriticalMemoryInstruction() const {
76
    return CriticalMemoryInstruction;
77
  }
78
  const CriticalDependency &getCriticalPredecessor() const {
79
    return CriticalPredecessor;
80
  }
81
 
82
  void addSuccessor(MemoryGroup *Group, bool IsDataDependent) {
83
    // Do not need to add a dependency if there is no data
84
    // dependency and all instructions from this group have been
85
    // issued already.
86
    if (!IsDataDependent && isExecuting())
87
      return;
88
 
89
    Group->NumPredecessors++;
90
    assert(!isExecuted() && "Should have been removed!");
91
    if (isExecuting())
92
      Group->onGroupIssued(CriticalMemoryInstruction, IsDataDependent);
93
 
94
    if (IsDataDependent)
95
      DataSucc.emplace_back(Group);
96
    else
97
      OrderSucc.emplace_back(Group);
98
  }
99
 
100
  bool isWaiting() const {
101
    return NumPredecessors >
102
           (NumExecutingPredecessors + NumExecutedPredecessors);
103
  }
104
  bool isPending() const {
105
    return NumExecutingPredecessors &&
106
           ((NumExecutedPredecessors + NumExecutingPredecessors) ==
107
            NumPredecessors);
108
  }
109
  bool isReady() const { return NumExecutedPredecessors == NumPredecessors; }
110
  bool isExecuting() const {
111
    return NumExecuting && (NumExecuting == (NumInstructions - NumExecuted));
112
  }
113
  bool isExecuted() const { return NumInstructions == NumExecuted; }
114
 
115
  void onGroupIssued(const InstRef &IR, bool ShouldUpdateCriticalDep) {
116
    assert(!isReady() && "Unexpected group-start event!");
117
    NumExecutingPredecessors++;
118
 
119
    if (!ShouldUpdateCriticalDep)
120
      return;
121
 
122
    unsigned Cycles = IR.getInstruction()->getCyclesLeft();
123
    if (CriticalPredecessor.Cycles < Cycles) {
124
      CriticalPredecessor.IID = IR.getSourceIndex();
125
      CriticalPredecessor.Cycles = Cycles;
126
    }
127
  }
128
 
129
  void onGroupExecuted() {
130
    assert(!isReady() && "Inconsistent state found!");
131
    NumExecutingPredecessors--;
132
    NumExecutedPredecessors++;
133
  }
134
 
135
  void onInstructionIssued(const InstRef &IR) {
136
    assert(!isExecuting() && "Invalid internal state!");
137
    ++NumExecuting;
138
 
139
    // update the CriticalMemDep.
140
    const Instruction &IS = *IR.getInstruction();
141
    if ((bool)CriticalMemoryInstruction) {
142
      const Instruction &OtherIS = *CriticalMemoryInstruction.getInstruction();
143
      if (OtherIS.getCyclesLeft() < IS.getCyclesLeft())
144
        CriticalMemoryInstruction = IR;
145
    } else {
146
      CriticalMemoryInstruction = IR;
147
    }
148
 
149
    if (!isExecuting())
150
      return;
151
 
152
    // Notify successors that this group started execution.
153
    for (MemoryGroup *MG : OrderSucc) {
154
      MG->onGroupIssued(CriticalMemoryInstruction, false);
155
      // Release the order dependency with this group.
156
      MG->onGroupExecuted();
157
    }
158
 
159
    for (MemoryGroup *MG : DataSucc)
160
      MG->onGroupIssued(CriticalMemoryInstruction, true);
161
  }
162
 
163
  void onInstructionExecuted(const InstRef &IR) {
164
    assert(isReady() && !isExecuted() && "Invalid internal state!");
165
    --NumExecuting;
166
    ++NumExecuted;
167
 
168
    if (CriticalMemoryInstruction &&
169
        CriticalMemoryInstruction.getSourceIndex() == IR.getSourceIndex()) {
170
      CriticalMemoryInstruction.invalidate();
171
    }
172
 
173
    if (!isExecuted())
174
      return;
175
 
176
    // Notify data dependent successors that this group has finished execution.
177
    for (MemoryGroup *MG : DataSucc)
178
      MG->onGroupExecuted();
179
  }
180
 
181
  void addInstruction() {
182
    assert(!getNumSuccessors() && "Cannot add instructions to this group!");
183
    ++NumInstructions;
184
  }
185
 
186
  void cycleEvent() {
187
    if (isWaiting() && CriticalPredecessor.Cycles)
188
      CriticalPredecessor.Cycles--;
189
  }
190
};
191
 
192
/// Abstract base interface for LS (load/store) units in llvm-mca.
193
class LSUnitBase : public HardwareUnit {
194
  /// Load queue size.
195
  ///
196
  /// A value of zero for this field means that the load queue is unbounded.
197
  /// Processor models can declare the size of a load queue via tablegen (see
198
  /// the definition of tablegen class LoadQueue in
199
  /// llvm/Target/TargetSchedule.td).
200
  unsigned LQSize;
201
 
202
  /// Load queue size.
203
  ///
204
  /// A value of zero for this field means that the store queue is unbounded.
205
  /// Processor models can declare the size of a store queue via tablegen (see
206
  /// the definition of tablegen class StoreQueue in
207
  /// llvm/Target/TargetSchedule.td).
208
  unsigned SQSize;
209
 
210
  unsigned UsedLQEntries;
211
  unsigned UsedSQEntries;
212
 
213
  /// True if loads don't alias with stores.
214
  ///
215
  /// By default, the LS unit assumes that loads and stores don't alias with
216
  /// eachother. If this field is set to false, then loads are always assumed to
217
  /// alias with stores.
218
  const bool NoAlias;
219
 
220
  /// Used to map group identifiers to MemoryGroups.
221
  DenseMap<unsigned, std::unique_ptr<MemoryGroup>> Groups;
222
  unsigned NextGroupID;
223
 
224
public:
225
  LSUnitBase(const MCSchedModel &SM, unsigned LoadQueueSize,
226
             unsigned StoreQueueSize, bool AssumeNoAlias);
227
 
228
  virtual ~LSUnitBase();
229
 
230
  /// Returns the total number of entries in the load queue.
231
  unsigned getLoadQueueSize() const { return LQSize; }
232
 
233
  /// Returns the total number of entries in the store queue.
234
  unsigned getStoreQueueSize() const { return SQSize; }
235
 
236
  unsigned getUsedLQEntries() const { return UsedLQEntries; }
237
  unsigned getUsedSQEntries() const { return UsedSQEntries; }
238
  void acquireLQSlot() { ++UsedLQEntries; }
239
  void acquireSQSlot() { ++UsedSQEntries; }
240
  void releaseLQSlot() { --UsedLQEntries; }
241
  void releaseSQSlot() { --UsedSQEntries; }
242
 
243
  bool assumeNoAlias() const { return NoAlias; }
244
 
245
  enum Status {
246
    LSU_AVAILABLE = 0,
247
    LSU_LQUEUE_FULL, // Load Queue unavailable
248
    LSU_SQUEUE_FULL  // Store Queue unavailable
249
  };
250
 
251
  /// This method checks the availability of the load/store buffers.
252
  ///
253
  /// Returns LSU_AVAILABLE if there are enough load/store queue entries to
254
  /// accomodate instruction IR. By default, LSU_AVAILABLE is returned if IR is
255
  /// not a memory operation.
256
  virtual Status isAvailable(const InstRef &IR) const = 0;
257
 
258
  /// Allocates LS resources for instruction IR.
259
  ///
260
  /// This method assumes that a previous call to `isAvailable(IR)` succeeded
261
  /// with a LSUnitBase::Status value of LSU_AVAILABLE.
262
  /// Returns the GroupID associated with this instruction. That value will be
263
  /// used to set the LSUTokenID field in class Instruction.
264
  virtual unsigned dispatch(const InstRef &IR) = 0;
265
 
266
  bool isSQEmpty() const { return !UsedSQEntries; }
267
  bool isLQEmpty() const { return !UsedLQEntries; }
268
  bool isSQFull() const { return SQSize && SQSize == UsedSQEntries; }
269
  bool isLQFull() const { return LQSize && LQSize == UsedLQEntries; }
270
 
271
  bool isValidGroupID(unsigned Index) const {
272
    return Index && (Groups.find(Index) != Groups.end());
273
  }
274
 
275
  /// Check if a peviously dispatched instruction IR is now ready for execution.
276
  bool isReady(const InstRef &IR) const {
277
    unsigned GroupID = IR.getInstruction()->getLSUTokenID();
278
    const MemoryGroup &Group = getGroup(GroupID);
279
    return Group.isReady();
280
  }
281
 
282
  /// Check if instruction IR only depends on memory instructions that are
283
  /// currently executing.
284
  bool isPending(const InstRef &IR) const {
285
    unsigned GroupID = IR.getInstruction()->getLSUTokenID();
286
    const MemoryGroup &Group = getGroup(GroupID);
287
    return Group.isPending();
288
  }
289
 
290
  /// Check if instruction IR is still waiting on memory operations, and the
291
  /// wait time is still unknown.
292
  bool isWaiting(const InstRef &IR) const {
293
    unsigned GroupID = IR.getInstruction()->getLSUTokenID();
294
    const MemoryGroup &Group = getGroup(GroupID);
295
    return Group.isWaiting();
296
  }
297
 
298
  bool hasDependentUsers(const InstRef &IR) const {
299
    unsigned GroupID = IR.getInstruction()->getLSUTokenID();
300
    const MemoryGroup &Group = getGroup(GroupID);
301
    return !Group.isExecuted() && Group.getNumSuccessors();
302
  }
303
 
304
  const MemoryGroup &getGroup(unsigned Index) const {
305
    assert(isValidGroupID(Index) && "Group doesn't exist!");
306
    return *Groups.find(Index)->second;
307
  }
308
 
309
  MemoryGroup &getGroup(unsigned Index) {
310
    assert(isValidGroupID(Index) && "Group doesn't exist!");
311
    return *Groups.find(Index)->second;
312
  }
313
 
314
  unsigned createMemoryGroup() {
315
    Groups.insert(
316
        std::make_pair(NextGroupID, std::make_unique<MemoryGroup>()));
317
    return NextGroupID++;
318
  }
319
 
320
  virtual void onInstructionExecuted(const InstRef &IR);
321
 
322
  // Loads are tracked by the LDQ (load queue) from dispatch until completion.
323
  // Stores are tracked by the STQ (store queue) from dispatch until commitment.
324
  // By default we conservatively assume that the LDQ receives a load at
325
  // dispatch. Loads leave the LDQ at retirement stage.
326
  virtual void onInstructionRetired(const InstRef &IR);
327
 
328
  virtual void onInstructionIssued(const InstRef &IR) {
329
    unsigned GroupID = IR.getInstruction()->getLSUTokenID();
330
    Groups[GroupID]->onInstructionIssued(IR);
331
  }
332
 
333
  virtual void cycleEvent();
334
 
335
#ifndef NDEBUG
336
  void dump() const;
337
#endif
338
};
339
 
340
/// Default Load/Store Unit (LS Unit) for simulated processors.
341
///
342
/// Each load (or store) consumes one entry in the load (or store) queue.
343
///
344
/// Rules are:
345
/// 1) A younger load is allowed to pass an older load only if there are no
346
///    stores nor barriers in between the two loads.
347
/// 2) An younger store is not allowed to pass an older store.
348
/// 3) A younger store is not allowed to pass an older load.
349
/// 4) A younger load is allowed to pass an older store only if the load does
350
///    not alias with the store.
351
///
352
/// This class optimistically assumes that loads don't alias store operations.
353
/// Under this assumption, younger loads are always allowed to pass older
354
/// stores (this would only affects rule 4).
355
/// Essentially, this class doesn't perform any sort alias analysis to
356
/// identify aliasing loads and stores.
357
///
358
/// To enforce aliasing between loads and stores, flag `AssumeNoAlias` must be
359
/// set to `false` by the constructor of LSUnit.
360
///
361
/// Note that this class doesn't know about the existence of different memory
362
/// types for memory operations (example: write-through, write-combining, etc.).
363
/// Derived classes are responsible for implementing that extra knowledge, and
364
/// provide different sets of rules for loads and stores by overriding method
365
/// `isReady()`.
366
/// To emulate a write-combining memory type, rule 2. must be relaxed in a
367
/// derived class to enable the reordering of non-aliasing store operations.
368
///
369
/// No assumptions are made by this class on the size of the store buffer.  This
370
/// class doesn't know how to identify cases where store-to-load forwarding may
371
/// occur.
372
///
373
/// LSUnit doesn't attempt to predict whether a load or store hits or misses
374
/// the L1 cache. To be more specific, LSUnit doesn't know anything about
375
/// cache hierarchy and memory types.
376
/// It only knows if an instruction "mayLoad" and/or "mayStore". For loads, the
377
/// scheduling model provides an "optimistic" load-to-use latency (which usually
378
/// matches the load-to-use latency for when there is a hit in the L1D).
379
/// Derived classes may expand this knowledge.
380
///
381
/// Class MCInstrDesc in LLVM doesn't know about serializing operations, nor
382
/// memory-barrier like instructions.
383
/// LSUnit conservatively assumes that an instruction which `mayLoad` and has
384
/// `unmodeled side effects` behave like a "soft" load-barrier. That means, it
385
/// serializes loads without forcing a flush of the load queue.
386
/// Similarly, instructions that both `mayStore` and have `unmodeled side
387
/// effects` are treated like store barriers. A full memory
388
/// barrier is a 'mayLoad' and 'mayStore' instruction with unmodeled side
389
/// effects. This is obviously inaccurate, but this is the best that we can do
390
/// at the moment.
391
///
392
/// Each load/store barrier consumes one entry in the load/store queue. A
393
/// load/store barrier enforces ordering of loads/stores:
394
///  - A younger load cannot pass a load barrier.
395
///  - A younger store cannot pass a store barrier.
396
///
397
/// A younger load has to wait for the memory load barrier to execute.
398
/// A load/store barrier is "executed" when it becomes the oldest entry in
399
/// the load/store queue(s). That also means, all the older loads/stores have
400
/// already been executed.
401
class LSUnit : public LSUnitBase {
402
  // This class doesn't know about the latency of a load instruction. So, it
403
  // conservatively/pessimistically assumes that the latency of a load opcode
404
  // matches the instruction latency.
405
  //
406
  // FIXME: In the absence of cache misses (i.e. L1I/L1D/iTLB/dTLB hits/misses),
407
  // and load/store conflicts, the latency of a load is determined by the depth
408
  // of the load pipeline. So, we could use field `LoadLatency` in the
409
  // MCSchedModel to model that latency.
410
  // Field `LoadLatency` often matches the so-called 'load-to-use' latency from
411
  // L1D, and it usually already accounts for any extra latency due to data
412
  // forwarding.
413
  // When doing throughput analysis, `LoadLatency` is likely to
414
  // be a better predictor of load latency than instruction latency. This is
415
  // particularly true when simulating code with temporal/spatial locality of
416
  // memory accesses.
417
  // Using `LoadLatency` (instead of the instruction latency) is also expected
418
  // to improve the load queue allocation for long latency instructions with
419
  // folded memory operands (See PR39829).
420
  //
421
  // FIXME: On some processors, load/store operations are split into multiple
422
  // uOps. For example, X86 AMD Jaguar natively supports 128-bit data types, but
423
  // not 256-bit data types. So, a 256-bit load is effectively split into two
424
  // 128-bit loads, and each split load consumes one 'LoadQueue' entry. For
425
  // simplicity, this class optimistically assumes that a load instruction only
426
  // consumes one entry in the LoadQueue.  Similarly, store instructions only
427
  // consume a single entry in the StoreQueue.
428
  // In future, we should reassess the quality of this design, and consider
429
  // alternative approaches that let instructions specify the number of
430
  // load/store queue entries which they consume at dispatch stage (See
431
  // PR39830).
432
  //
433
  // An instruction that both 'mayStore' and 'HasUnmodeledSideEffects' is
434
  // conservatively treated as a store barrier. It forces older store to be
435
  // executed before newer stores are issued.
436
  //
437
  // An instruction that both 'MayLoad' and 'HasUnmodeledSideEffects' is
438
  // conservatively treated as a load barrier. It forces older loads to execute
439
  // before newer loads are issued.
440
  unsigned CurrentLoadGroupID;
441
  unsigned CurrentLoadBarrierGroupID;
442
  unsigned CurrentStoreGroupID;
443
  unsigned CurrentStoreBarrierGroupID;
444
 
445
public:
446
  LSUnit(const MCSchedModel &SM)
447
      : LSUnit(SM, /* LQSize */ 0, /* SQSize */ 0, /* NoAlias */ false) {}
448
  LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ)
449
      : LSUnit(SM, LQ, SQ, /* NoAlias */ false) {}
450
  LSUnit(const MCSchedModel &SM, unsigned LQ, unsigned SQ, bool AssumeNoAlias)
451
      : LSUnitBase(SM, LQ, SQ, AssumeNoAlias), CurrentLoadGroupID(0),
452
        CurrentLoadBarrierGroupID(0), CurrentStoreGroupID(0),
453
        CurrentStoreBarrierGroupID(0) {}
454
 
455
  /// Returns LSU_AVAILABLE if there are enough load/store queue entries to
456
  /// accomodate instruction IR.
457
  Status isAvailable(const InstRef &IR) const override;
458
 
459
  /// Allocates LS resources for instruction IR.
460
  ///
461
  /// This method assumes that a previous call to `isAvailable(IR)` succeeded
462
  /// returning LSU_AVAILABLE.
463
  ///
464
  /// Rules are:
465
  /// By default, rules are:
466
  /// 1. A store may not pass a previous store.
467
  /// 2. A load may not pass a previous store unless flag 'NoAlias' is set.
468
  /// 3. A load may pass a previous load.
469
  /// 4. A store may not pass a previous load (regardless of flag 'NoAlias').
470
  /// 5. A load has to wait until an older load barrier is fully executed.
471
  /// 6. A store has to wait until an older store barrier is fully executed.
472
  unsigned dispatch(const InstRef &IR) override;
473
 
474
  void onInstructionExecuted(const InstRef &IR) override;
475
};
476
 
477
} // namespace mca
478
} // namespace llvm
479
 
480
#endif // LLVM_MCA_HARDWAREUNITS_LSUNIT_H