Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===--------------------- ResourceManager.h --------------------*- C++ -*-===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | /// \file |
||
9 | /// |
||
10 | /// The classes here represent processor resource units and their management |
||
11 | /// strategy. These classes are managed by the Scheduler. |
||
12 | /// |
||
13 | //===----------------------------------------------------------------------===// |
||
14 | |||
15 | #ifndef LLVM_MCA_HARDWAREUNITS_RESOURCEMANAGER_H |
||
16 | #define LLVM_MCA_HARDWAREUNITS_RESOURCEMANAGER_H |
||
17 | |||
18 | #include "llvm/ADT/DenseMap.h" |
||
19 | #include "llvm/ADT/SmallVector.h" |
||
20 | #include "llvm/MC/MCSchedule.h" |
||
21 | #include "llvm/MCA/Instruction.h" |
||
22 | #include "llvm/MCA/Support.h" |
||
23 | |||
24 | namespace llvm { |
||
25 | namespace mca { |
||
26 | |||
27 | /// Used to notify the internal state of a processor resource. |
||
28 | /// |
||
29 | /// A processor resource is available if it is not reserved, and there are |
||
30 | /// available slots in the buffer. A processor resource is unavailable if it |
||
31 | /// is either reserved, or the associated buffer is full. A processor resource |
||
32 | /// with a buffer size of -1 is always available if it is not reserved. |
||
33 | /// |
||
34 | /// Values of type ResourceStateEvent are returned by method |
||
35 | /// ResourceManager::canBeDispatched() |
||
36 | /// |
||
37 | /// The naming convention for resource state events is: |
||
38 | /// * Event names start with prefix RS_ |
||
39 | /// * Prefix RS_ is followed by a string describing the actual resource state. |
||
40 | enum ResourceStateEvent { |
||
41 | RS_BUFFER_AVAILABLE, |
||
42 | RS_BUFFER_UNAVAILABLE, |
||
43 | RS_RESERVED |
||
44 | }; |
||
45 | |||
46 | /// Resource allocation strategy used by hardware scheduler resources. |
||
47 | class ResourceStrategy { |
||
48 | ResourceStrategy(const ResourceStrategy &) = delete; |
||
49 | ResourceStrategy &operator=(const ResourceStrategy &) = delete; |
||
50 | |||
51 | public: |
||
52 | ResourceStrategy() = default; |
||
53 | virtual ~ResourceStrategy(); |
||
54 | |||
55 | /// Selects a processor resource unit from a ReadyMask. |
||
56 | virtual uint64_t select(uint64_t ReadyMask) = 0; |
||
57 | |||
58 | /// Called by the ResourceManager when a processor resource group, or a |
||
59 | /// processor resource with multiple units has become unavailable. |
||
60 | /// |
||
61 | /// The default strategy uses this information to bias its selection logic. |
||
62 | virtual void used(uint64_t ResourceMask) {} |
||
63 | }; |
||
64 | |||
65 | /// Default resource allocation strategy used by processor resource groups and |
||
66 | /// processor resources with multiple units. |
||
67 | class DefaultResourceStrategy final : public ResourceStrategy { |
||
68 | /// A Mask of resource unit identifiers. |
||
69 | /// |
||
70 | /// There is one bit set for every available resource unit. |
||
71 | /// It defaults to the value of field ResourceSizeMask in ResourceState. |
||
72 | const uint64_t ResourceUnitMask; |
||
73 | |||
74 | /// A simple round-robin selector for processor resource units. |
||
75 | /// Each bit of this mask identifies a sub resource within a group. |
||
76 | /// |
||
77 | /// As an example, lets assume that this is a default policy for a |
||
78 | /// processor resource group composed by the following three units: |
||
79 | /// ResourceA -- 0b001 |
||
80 | /// ResourceB -- 0b010 |
||
81 | /// ResourceC -- 0b100 |
||
82 | /// |
||
83 | /// Field NextInSequenceMask is used to select the next unit from the set of |
||
84 | /// resource units. It defaults to the value of field `ResourceUnitMasks` (in |
||
85 | /// this example, it defaults to mask '0b111'). |
||
86 | /// |
||
87 | /// The round-robin selector would firstly select 'ResourceC', then |
||
88 | /// 'ResourceB', and eventually 'ResourceA'. When a resource R is used, the |
||
89 | /// corresponding bit in NextInSequenceMask is cleared. For example, if |
||
90 | /// 'ResourceC' is selected, then the new value of NextInSequenceMask becomes |
||
91 | /// 0xb011. |
||
92 | /// |
||
93 | /// When NextInSequenceMask becomes zero, it is automatically reset to the |
||
94 | /// default value (i.e. ResourceUnitMask). |
||
95 | uint64_t NextInSequenceMask; |
||
96 | |||
97 | /// This field is used to track resource units that are used (i.e. selected) |
||
98 | /// by other groups other than the one associated with this strategy object. |
||
99 | /// |
||
100 | /// In LLVM processor resource groups are allowed to partially (or fully) |
||
101 | /// overlap. That means, a same unit may be visible to multiple groups. |
||
102 | /// This field keeps track of uses that have originated from outside of |
||
103 | /// this group. The idea is to bias the selection strategy, so that resources |
||
104 | /// that haven't been used by other groups get prioritized. |
||
105 | /// |
||
106 | /// The end goal is to (try to) keep the resource distribution as much uniform |
||
107 | /// as possible. By construction, this mask only tracks one-level of resource |
||
108 | /// usage. Therefore, this strategy is expected to be less accurate when same |
||
109 | /// units are used multiple times by other groups within a single round of |
||
110 | /// select. |
||
111 | /// |
||
112 | /// Note: an LRU selector would have a better accuracy at the cost of being |
||
113 | /// slightly more expensive (mostly in terms of runtime cost). Methods |
||
114 | /// 'select' and 'used', are always in the hot execution path of llvm-mca. |
||
115 | /// Therefore, a slow implementation of 'select' would have a negative impact |
||
116 | /// on the overall performance of the tool. |
||
117 | uint64_t RemovedFromNextInSequence; |
||
118 | |||
119 | public: |
||
120 | DefaultResourceStrategy(uint64_t UnitMask) |
||
121 | : ResourceUnitMask(UnitMask), NextInSequenceMask(UnitMask), |
||
122 | RemovedFromNextInSequence(0) {} |
||
123 | virtual ~DefaultResourceStrategy() = default; |
||
124 | |||
125 | uint64_t select(uint64_t ReadyMask) override; |
||
126 | void used(uint64_t Mask) override; |
||
127 | }; |
||
128 | |||
129 | /// A processor resource descriptor. |
||
130 | /// |
||
131 | /// There is an instance of this class for every processor resource defined by |
||
132 | /// the machine scheduling model. |
||
133 | /// Objects of class ResourceState dynamically track the usage of processor |
||
134 | /// resource units. |
||
135 | class ResourceState { |
||
136 | /// An index to the MCProcResourceDesc entry in the processor model. |
||
137 | const unsigned ProcResourceDescIndex; |
||
138 | /// A resource mask. This is generated by the tool with the help of |
||
139 | /// function `mca::computeProcResourceMasks' (see Support.h). |
||
140 | /// |
||
141 | /// Field ResourceMask only has one bit set if this resource state describes a |
||
142 | /// processor resource unit (i.e. this is not a group). That means, we can |
||
143 | /// quickly check if a resource is a group by simply counting the number of |
||
144 | /// bits that are set in the mask. |
||
145 | /// |
||
146 | /// The most significant bit of a mask (MSB) uniquely identifies a resource. |
||
147 | /// Remaining bits are used to describe the composition of a group (Group). |
||
148 | /// |
||
149 | /// Example (little endian): |
||
150 | /// Resource | Mask | MSB | Group |
||
151 | /// ---------+------------+------------+------------ |
||
152 | /// A | 0b000001 | 0b000001 | 0b000000 |
||
153 | /// | | | |
||
154 | /// B | 0b000010 | 0b000010 | 0b000000 |
||
155 | /// | | | |
||
156 | /// C | 0b010000 | 0b010000 | 0b000000 |
||
157 | /// | | | |
||
158 | /// D | 0b110010 | 0b100000 | 0b010010 |
||
159 | /// |
||
160 | /// In this example, resources A, B and C are processor resource units. |
||
161 | /// Only resource D is a group resource, and it contains resources B and C. |
||
162 | /// That is because MSB(B) and MSB(C) are both contained within Group(D). |
||
163 | const uint64_t ResourceMask; |
||
164 | |||
165 | /// A ProcResource can have multiple units. |
||
166 | /// |
||
167 | /// For processor resource groups this field is a mask of contained resource |
||
168 | /// units. It is obtained from ResourceMask by clearing the highest set bit. |
||
169 | /// The number of resource units in a group can be simply computed as the |
||
170 | /// population count of this field. |
||
171 | /// |
||
172 | /// For normal (i.e. non-group) resources, the number of bits set in this mask |
||
173 | /// is equivalent to the number of units declared by the processor model (see |
||
174 | /// field 'NumUnits' in 'ProcResourceUnits'). |
||
175 | uint64_t ResourceSizeMask; |
||
176 | |||
177 | /// A mask of ready units. |
||
178 | uint64_t ReadyMask; |
||
179 | |||
180 | /// Buffered resources will have this field set to a positive number different |
||
181 | /// than zero. A buffered resource behaves like a reservation station |
||
182 | /// implementing its own buffer for out-of-order execution. |
||
183 | /// |
||
184 | /// A BufferSize of 1 is used by scheduler resources that force in-order |
||
185 | /// execution. |
||
186 | /// |
||
187 | /// A BufferSize of 0 is used to model in-order issue/dispatch resources. |
||
188 | /// Since in-order issue/dispatch resources don't implement buffers, dispatch |
||
189 | /// events coincide with issue events. |
||
190 | /// Also, no other instruction ca be dispatched/issue while this resource is |
||
191 | /// in use. Only when all the "resource cycles" are consumed (after the issue |
||
192 | /// event), a new instruction ca be dispatched. |
||
193 | const int BufferSize; |
||
194 | |||
195 | /// Available slots in the buffer (zero, if this is not a buffered resource). |
||
196 | unsigned AvailableSlots; |
||
197 | |||
198 | /// This field is set if this resource is currently reserved. |
||
199 | /// |
||
200 | /// Resources can be reserved for a number of cycles. |
||
201 | /// Instructions can still be dispatched to reserved resources. However, |
||
202 | /// istructions dispatched to a reserved resource cannot be issued to the |
||
203 | /// underlying units (i.e. pipelines) until the resource is released. |
||
204 | bool Unavailable; |
||
205 | |||
206 | const bool IsAGroup; |
||
207 | |||
208 | /// Checks for the availability of unit 'SubResMask' in the group. |
||
209 | bool isSubResourceReady(uint64_t SubResMask) const { |
||
210 | return ReadyMask & SubResMask; |
||
211 | } |
||
212 | |||
213 | public: |
||
214 | ResourceState(const MCProcResourceDesc &Desc, unsigned Index, uint64_t Mask); |
||
215 | |||
216 | unsigned getProcResourceID() const { return ProcResourceDescIndex; } |
||
217 | uint64_t getResourceMask() const { return ResourceMask; } |
||
218 | uint64_t getReadyMask() const { return ReadyMask; } |
||
219 | int getBufferSize() const { return BufferSize; } |
||
220 | |||
221 | bool isBuffered() const { return BufferSize > 0; } |
||
222 | bool isInOrder() const { return BufferSize == 1; } |
||
223 | |||
224 | /// Returns true if this is an in-order dispatch/issue resource. |
||
225 | bool isADispatchHazard() const { return BufferSize == 0; } |
||
226 | bool isReserved() const { return Unavailable; } |
||
227 | |||
228 | void setReserved() { Unavailable = true; } |
||
229 | void clearReserved() { Unavailable = false; } |
||
230 | |||
231 | /// Returs true if this resource is not reserved, and if there are at least |
||
232 | /// `NumUnits` available units. |
||
233 | bool isReady(unsigned NumUnits = 1) const; |
||
234 | |||
235 | bool isAResourceGroup() const { return IsAGroup; } |
||
236 | |||
237 | bool containsResource(uint64_t ID) const { return ResourceMask & ID; } |
||
238 | |||
239 | void markSubResourceAsUsed(uint64_t ID) { |
||
240 | assert(isSubResourceReady(ID)); |
||
241 | ReadyMask ^= ID; |
||
242 | } |
||
243 | |||
244 | void releaseSubResource(uint64_t ID) { |
||
245 | assert(!isSubResourceReady(ID)); |
||
246 | ReadyMask ^= ID; |
||
247 | } |
||
248 | |||
249 | unsigned getNumUnits() const { |
||
250 | return isAResourceGroup() ? 1U : llvm::popcount(ResourceSizeMask); |
||
251 | } |
||
252 | |||
253 | /// Checks if there is an available slot in the resource buffer. |
||
254 | /// |
||
255 | /// Returns RS_BUFFER_AVAILABLE if this is not a buffered resource, or if |
||
256 | /// there is a slot available. |
||
257 | /// |
||
258 | /// Returns RS_RESERVED if this buffered resource is a dispatch hazard, and it |
||
259 | /// is reserved. |
||
260 | /// |
||
261 | /// Returns RS_BUFFER_UNAVAILABLE if there are no available slots. |
||
262 | ResourceStateEvent isBufferAvailable() const; |
||
263 | |||
264 | /// Reserve a buffer slot. |
||
265 | /// |
||
266 | /// Returns true if the buffer is not full. |
||
267 | /// It always returns true if BufferSize is set to zero. |
||
268 | bool reserveBuffer() { |
||
269 | if (BufferSize <= 0) |
||
270 | return true; |
||
271 | |||
272 | --AvailableSlots; |
||
273 | assert(AvailableSlots <= static_cast<unsigned>(BufferSize)); |
||
274 | return AvailableSlots; |
||
275 | } |
||
276 | |||
277 | /// Releases a slot in the buffer. |
||
278 | void releaseBuffer() { |
||
279 | // Ignore dispatch hazards or invalid buffer sizes. |
||
280 | if (BufferSize <= 0) |
||
281 | return; |
||
282 | |||
283 | ++AvailableSlots; |
||
284 | assert(AvailableSlots <= static_cast<unsigned>(BufferSize)); |
||
285 | } |
||
286 | |||
287 | #ifndef NDEBUG |
||
288 | void dump() const; |
||
289 | #endif |
||
290 | }; |
||
291 | |||
292 | /// A resource unit identifier. |
||
293 | /// |
||
294 | /// This is used to identify a specific processor resource unit using a pair |
||
295 | /// of indices where the 'first' index is a processor resource mask, and the |
||
296 | /// 'second' index is an index for a "sub-resource" (i.e. unit). |
||
297 | typedef std::pair<uint64_t, uint64_t> ResourceRef; |
||
298 | |||
299 | // First: a MCProcResourceDesc index identifying a buffered resource. |
||
300 | // Second: max number of buffer entries used in this resource. |
||
301 | typedef std::pair<unsigned, unsigned> BufferUsageEntry; |
||
302 | |||
303 | /// A resource manager for processor resource units and groups. |
||
304 | /// |
||
305 | /// This class owns all the ResourceState objects, and it is responsible for |
||
306 | /// acting on requests from a Scheduler by updating the internal state of |
||
307 | /// ResourceState objects. |
||
308 | /// This class doesn't know about instruction itineraries and functional units. |
||
309 | /// In future, it can be extended to support itineraries too through the same |
||
310 | /// public interface. |
||
311 | class ResourceManager { |
||
312 | // Set of resources available on the subtarget. |
||
313 | // |
||
314 | // There is an instance of ResourceState for every resource declared by the |
||
315 | // target scheduling model. |
||
316 | // |
||
317 | // Elements of this vector are ordered by resource kind. In particular, |
||
318 | // resource units take precedence over resource groups. |
||
319 | // |
||
320 | // The index of a processor resource in this vector depends on the value of |
||
321 | // its mask (see the description of field ResourceState::ResourceMask). In |
||
322 | // particular, it is computed as the position of the most significant bit set |
||
323 | // (MSB) in the mask plus one (since we want to ignore the invalid resource |
||
324 | // descriptor at index zero). |
||
325 | // |
||
326 | // Example (little endian): |
||
327 | // |
||
328 | // Resource | Mask | MSB | Index |
||
329 | // ---------+---------+---------+------- |
||
330 | // A | 0b00001 | 0b00001 | 1 |
||
331 | // | | | |
||
332 | // B | 0b00100 | 0b00100 | 3 |
||
333 | // | | | |
||
334 | // C | 0b10010 | 0b10000 | 5 |
||
335 | // |
||
336 | // |
||
337 | // The same index is also used to address elements within vector `Strategies` |
||
338 | // and vector `Resource2Groups`. |
||
339 | std::vector<std::unique_ptr<ResourceState>> Resources; |
||
340 | std::vector<std::unique_ptr<ResourceStrategy>> Strategies; |
||
341 | |||
342 | // Used to quickly identify groups that own a particular resource unit. |
||
343 | std::vector<uint64_t> Resource2Groups; |
||
344 | |||
345 | // A table that maps processor resource IDs to processor resource masks. |
||
346 | SmallVector<uint64_t, 8> ProcResID2Mask; |
||
347 | |||
348 | // A table that maps resource indices to actual processor resource IDs in the |
||
349 | // scheduling model. |
||
350 | SmallVector<unsigned, 8> ResIndex2ProcResID; |
||
351 | |||
352 | // Keeps track of which resources are busy, and how many cycles are left |
||
353 | // before those become usable again. |
||
354 | SmallDenseMap<ResourceRef, unsigned> BusyResources; |
||
355 | |||
356 | // Set of processor resource units available on the target. |
||
357 | uint64_t ProcResUnitMask; |
||
358 | |||
359 | // Set of processor resource units that are available during this cycle. |
||
360 | uint64_t AvailableProcResUnits; |
||
361 | |||
362 | // Set of processor resources that are currently reserved. |
||
363 | uint64_t ReservedResourceGroups; |
||
364 | |||
365 | // Set of unavailable scheduler buffer resources. This is used internally to |
||
366 | // speedup `canBeDispatched()` queries. |
||
367 | uint64_t AvailableBuffers; |
||
368 | |||
369 | // Set of dispatch hazard buffer resources that are currently unavailable. |
||
370 | uint64_t ReservedBuffers; |
||
371 | |||
372 | // Returns the actual resource unit that will be used. |
||
373 | ResourceRef selectPipe(uint64_t ResourceID); |
||
374 | |||
375 | void use(const ResourceRef &RR); |
||
376 | void release(const ResourceRef &RR); |
||
377 | |||
378 | unsigned getNumUnits(uint64_t ResourceID) const; |
||
379 | |||
380 | // Overrides the selection strategy for the processor resource with the given |
||
381 | // mask. |
||
382 | void setCustomStrategyImpl(std::unique_ptr<ResourceStrategy> S, |
||
383 | uint64_t ResourceMask); |
||
384 | |||
385 | public: |
||
386 | ResourceManager(const MCSchedModel &SM); |
||
387 | virtual ~ResourceManager() = default; |
||
388 | |||
389 | // Overrides the selection strategy for the resource at index ResourceID in |
||
390 | // the MCProcResourceDesc table. |
||
391 | void setCustomStrategy(std::unique_ptr<ResourceStrategy> S, |
||
392 | unsigned ResourceID) { |
||
393 | assert(ResourceID < ProcResID2Mask.size() && |
||
394 | "Invalid resource index in input!"); |
||
395 | return setCustomStrategyImpl(std::move(S), ProcResID2Mask[ResourceID]); |
||
396 | } |
||
397 | |||
398 | // Returns RS_BUFFER_AVAILABLE if buffered resources are not reserved, and if |
||
399 | // there are enough available slots in the buffers. |
||
400 | ResourceStateEvent canBeDispatched(uint64_t ConsumedBuffers) const; |
||
401 | |||
402 | // Return the processor resource identifier associated to this Mask. |
||
403 | unsigned resolveResourceMask(uint64_t Mask) const; |
||
404 | |||
405 | // Acquires a slot from every buffered resource in mask `ConsumedBuffers`. |
||
406 | // Units that are dispatch hazards (i.e. BufferSize=0) are marked as reserved. |
||
407 | void reserveBuffers(uint64_t ConsumedBuffers); |
||
408 | |||
409 | // Releases a slot from every buffered resource in mask `ConsumedBuffers`. |
||
410 | // ConsumedBuffers is a bitmask of previously acquired buffers (using method |
||
411 | // `reserveBuffers`). Units that are dispatch hazards (i.e. BufferSize=0) are |
||
412 | // not automatically unreserved by this method. |
||
413 | void releaseBuffers(uint64_t ConsumedBuffers); |
||
414 | |||
415 | // Reserve a processor resource. A reserved resource is not available for |
||
416 | // instruction issue until it is released. |
||
417 | void reserveResource(uint64_t ResourceID); |
||
418 | |||
419 | // Release a previously reserved processor resource. |
||
420 | void releaseResource(uint64_t ResourceID); |
||
421 | |||
422 | // Returns a zero mask if resources requested by Desc are all available during |
||
423 | // this cycle. It returns a non-zero mask value only if there are unavailable |
||
424 | // processor resources; each bit set in the mask represents a busy processor |
||
425 | // resource unit or a reserved processor resource group. |
||
426 | uint64_t checkAvailability(const InstrDesc &Desc) const; |
||
427 | |||
428 | uint64_t getProcResUnitMask() const { return ProcResUnitMask; } |
||
429 | uint64_t getAvailableProcResUnits() const { return AvailableProcResUnits; } |
||
430 | |||
431 | void issueInstruction( |
||
432 | const InstrDesc &Desc, |
||
433 | SmallVectorImpl<std::pair<ResourceRef, ResourceCycles>> &Pipes); |
||
434 | |||
435 | void cycleEvent(SmallVectorImpl<ResourceRef> &ResourcesFreed); |
||
436 | |||
437 | #ifndef NDEBUG |
||
438 | void dump() const { |
||
439 | for (const std::unique_ptr<ResourceState> &Resource : Resources) |
||
440 | Resource->dump(); |
||
441 | } |
||
442 | #endif |
||
443 | }; |
||
444 | } // namespace mca |
||
445 | } // namespace llvm |
||
446 | |||
447 | #endif // LLVM_MCA_HARDWAREUNITS_RESOURCEMANAGER_H |