Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
14 | pmbaty | 1 | //===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===// |
2 | // |
||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
||
4 | // See https://llvm.org/LICENSE.txt for license information. |
||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
||
6 | // |
||
7 | //===----------------------------------------------------------------------===// |
||
8 | // |
||
9 | // This file implements an interface allowing to conveniently build hashes of |
||
10 | // various data types, without relying on the underlying hasher type to know |
||
11 | // about hashed data types. |
||
12 | // |
||
13 | //===----------------------------------------------------------------------===// |
||
14 | |||
15 | #ifndef LLVM_SUPPORT_HASHBUILDER_H |
||
16 | #define LLVM_SUPPORT_HASHBUILDER_H |
||
17 | |||
18 | #include "llvm/ADT/ArrayRef.h" |
||
19 | #include "llvm/ADT/Hashing.h" |
||
20 | #include "llvm/ADT/STLExtras.h" |
||
21 | #include "llvm/ADT/StringRef.h" |
||
22 | #include "llvm/Support/Endian.h" |
||
23 | #include "llvm/Support/type_traits.h" |
||
24 | |||
25 | #include <iterator> |
||
26 | #include <optional> |
||
27 | #include <utility> |
||
28 | |||
29 | namespace llvm { |
||
30 | |||
31 | namespace hashbuilder_detail { |
||
32 | /// Trait to indicate whether a type's bits can be hashed directly (after |
||
33 | /// endianness correction). |
||
34 | template <typename U> |
||
35 | struct IsHashableData |
||
36 | : std::integral_constant<bool, is_integral_or_enum<U>::value> {}; |
||
37 | |||
38 | } // namespace hashbuilder_detail |
||
39 | |||
40 | /// Declares the hasher member, and functions forwarding directly to the hasher. |
||
41 | template <typename HasherT> class HashBuilderBase { |
||
42 | public: |
||
43 | template <typename HasherT_ = HasherT> |
||
44 | using HashResultTy = decltype(std::declval<HasherT_ &>().final()); |
||
45 | |||
46 | HasherT &getHasher() { return Hasher; } |
||
47 | |||
48 | /// Forward to `HasherT::update(ArrayRef<uint8_t>)`. |
||
49 | /// |
||
50 | /// This may not take the size of `Data` into account. |
||
51 | /// Users of this function should pay attention to respect endianness |
||
52 | /// contraints. |
||
53 | void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); } |
||
54 | |||
55 | /// Forward to `HasherT::update(ArrayRef<uint8_t>)`. |
||
56 | /// |
||
57 | /// This may not take the size of `Data` into account. |
||
58 | /// Users of this function should pay attention to respect endianness |
||
59 | /// contraints. |
||
60 | void update(StringRef Data) { |
||
61 | update( |
||
62 | ArrayRef(reinterpret_cast<const uint8_t *>(Data.data()), Data.size())); |
||
63 | } |
||
64 | |||
65 | /// Forward to `HasherT::final()` if available. |
||
66 | template <typename HasherT_ = HasherT> HashResultTy<HasherT_> final() { |
||
67 | return this->getHasher().final(); |
||
68 | } |
||
69 | |||
70 | /// Forward to `HasherT::result()` if available. |
||
71 | template <typename HasherT_ = HasherT> HashResultTy<HasherT_> result() { |
||
72 | return this->getHasher().result(); |
||
73 | } |
||
74 | |||
75 | protected: |
||
76 | explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {} |
||
77 | |||
78 | template <typename... ArgTypes> |
||
79 | explicit HashBuilderBase(ArgTypes &&...Args) |
||
80 | : OptionalHasher(std::in_place, std::forward<ArgTypes>(Args)...), |
||
81 | Hasher(*OptionalHasher) {} |
||
82 | |||
83 | private: |
||
84 | std::optional<HasherT> OptionalHasher; |
||
85 | HasherT &Hasher; |
||
86 | }; |
||
87 | |||
88 | /// Implementation of the `HashBuilder` interface. |
||
89 | /// |
||
90 | /// `support::endianness::native` is not supported. `HashBuilder` is |
||
91 | /// expected to canonicalize `support::endianness::native` to one of |
||
92 | /// `support::endianness::big` or `support::endianness::little`. |
||
93 | template <typename HasherT, support::endianness Endianness> |
||
94 | class HashBuilderImpl : public HashBuilderBase<HasherT> { |
||
95 | static_assert(Endianness != support::endianness::native, |
||
96 | "HashBuilder should canonicalize endianness"); |
||
97 | |||
98 | public: |
||
99 | explicit HashBuilderImpl(HasherT &Hasher) |
||
100 | : HashBuilderBase<HasherT>(Hasher) {} |
||
101 | template <typename... ArgTypes> |
||
102 | explicit HashBuilderImpl(ArgTypes &&...Args) |
||
103 | : HashBuilderBase<HasherT>(Args...) {} |
||
104 | |||
105 | /// Implement hashing for hashable data types, e.g. integral or enum values. |
||
106 | template <typename T> |
||
107 | std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value, |
||
108 | HashBuilderImpl &> |
||
109 | add(T Value) { |
||
110 | return adjustForEndiannessAndAdd(Value); |
||
111 | } |
||
112 | |||
113 | /// Support hashing `ArrayRef`. |
||
114 | /// |
||
115 | /// `Value.size()` is taken into account to ensure cases like |
||
116 | /// ``` |
||
117 | /// builder.add({1}); |
||
118 | /// builder.add({2, 3}); |
||
119 | /// ``` |
||
120 | /// and |
||
121 | /// ``` |
||
122 | /// builder.add({1, 2}); |
||
123 | /// builder.add({3}); |
||
124 | /// ``` |
||
125 | /// do not collide. |
||
126 | template <typename T> HashBuilderImpl &add(ArrayRef<T> Value) { |
||
127 | // As of implementation time, simply calling `addRange(Value)` would also go |
||
128 | // through the `update` fast path. But that would rely on the implementation |
||
129 | // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call |
||
130 | // `update` to guarantee the fast path. |
||
131 | add(Value.size()); |
||
132 | if (hashbuilder_detail::IsHashableData<T>::value && |
||
133 | Endianness == support::endian::system_endianness()) { |
||
134 | this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()), |
||
135 | Value.size() * sizeof(T))); |
||
136 | } else { |
||
137 | for (auto &V : Value) |
||
138 | add(V); |
||
139 | } |
||
140 | return *this; |
||
141 | } |
||
142 | |||
143 | /// Support hashing `StringRef`. |
||
144 | /// |
||
145 | /// `Value.size()` is taken into account to ensure cases like |
||
146 | /// ``` |
||
147 | /// builder.add("a"); |
||
148 | /// builder.add("bc"); |
||
149 | /// ``` |
||
150 | /// and |
||
151 | /// ``` |
||
152 | /// builder.add("ab"); |
||
153 | /// builder.add("c"); |
||
154 | /// ``` |
||
155 | /// do not collide. |
||
156 | HashBuilderImpl &add(StringRef Value) { |
||
157 | // As of implementation time, simply calling `addRange(Value)` would also go |
||
158 | // through `update`. But that would rely on the implementation of |
||
159 | // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to |
||
160 | // guarantee the fast path. |
||
161 | add(Value.size()); |
||
162 | this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()), |
||
163 | Value.size())); |
||
164 | return *this; |
||
165 | } |
||
166 | |||
167 | template <typename T> |
||
168 | using HasAddHashT = |
||
169 | decltype(addHash(std::declval<HashBuilderImpl &>(), std::declval<T &>())); |
||
170 | /// Implement hashing for user-defined `struct`s. |
||
171 | /// |
||
172 | /// Any user-define `struct` can participate in hashing via `HashBuilder` by |
||
173 | /// providing a `addHash` templated function. |
||
174 | /// |
||
175 | /// ``` |
||
176 | /// template <typename HasherT, support::endianness Endianness> |
||
177 | /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder, |
||
178 | /// const UserDefinedStruct &Value); |
||
179 | /// ``` |
||
180 | /// |
||
181 | /// For example: |
||
182 | /// ``` |
||
183 | /// struct SimpleStruct { |
||
184 | /// char c; |
||
185 | /// int i; |
||
186 | /// }; |
||
187 | /// |
||
188 | /// template <typename HasherT, support::endianness Endianness> |
||
189 | /// void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, |
||
190 | /// const SimpleStruct &Value) { |
||
191 | /// HBuilder.add(Value.c); |
||
192 | /// HBuilder.add(Value.i); |
||
193 | /// } |
||
194 | /// ``` |
||
195 | /// |
||
196 | /// To avoid endianness issues, specializations of `addHash` should |
||
197 | /// generally rely on exising `add`, `addRange`, and `addRangeElements` |
||
198 | /// functions. If directly using `update`, an implementation must correctly |
||
199 | /// handle endianness. |
||
200 | /// |
||
201 | /// ``` |
||
202 | /// struct __attribute__ ((packed)) StructWithFastHash { |
||
203 | /// int I; |
||
204 | /// char C; |
||
205 | /// |
||
206 | /// // If possible, we want to hash both `I` and `C` in a single |
||
207 | /// // `update` call for performance concerns. |
||
208 | /// template <typename HasherT, support::endianness Endianness> |
||
209 | /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, |
||
210 | /// const StructWithFastHash &Value) { |
||
211 | /// if (Endianness == support::endian::system_endianness()) { |
||
212 | /// HBuilder.update(ArrayRef( |
||
213 | /// reinterpret_cast<const uint8_t *>(&Value), sizeof(Value))); |
||
214 | /// } else { |
||
215 | /// // Rely on existing `add` methods to handle endianness. |
||
216 | /// HBuilder.add(Value.I); |
||
217 | /// HBuilder.add(Value.C); |
||
218 | /// } |
||
219 | /// } |
||
220 | /// }; |
||
221 | /// ``` |
||
222 | /// |
||
223 | /// To avoid collisions, specialization of `addHash` for variable-size |
||
224 | /// types must take the size into account. |
||
225 | /// |
||
226 | /// For example: |
||
227 | /// ``` |
||
228 | /// struct CustomContainer { |
||
229 | /// private: |
||
230 | /// size_t Size; |
||
231 | /// int Elements[100]; |
||
232 | /// |
||
233 | /// public: |
||
234 | /// CustomContainer(size_t Size) : Size(Size) { |
||
235 | /// for (size_t I = 0; I != Size; ++I) |
||
236 | /// Elements[I] = I; |
||
237 | /// } |
||
238 | /// template <typename HasherT, support::endianness Endianness> |
||
239 | /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, |
||
240 | /// const CustomContainer &Value) { |
||
241 | /// if (Endianness == support::endian::system_endianness()) { |
||
242 | /// HBuilder.update(ArrayRef( |
||
243 | /// reinterpret_cast<const uint8_t *>(&Value.Size), |
||
244 | /// sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0]))); |
||
245 | /// } else { |
||
246 | /// // `addRange` will take care of encoding the size. |
||
247 | /// HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] + |
||
248 | /// Value.Size); |
||
249 | /// } |
||
250 | /// } |
||
251 | /// }; |
||
252 | /// ``` |
||
253 | template <typename T> |
||
254 | std::enable_if_t<is_detected<HasAddHashT, T>::value && |
||
255 | !hashbuilder_detail::IsHashableData<T>::value, |
||
256 | HashBuilderImpl &> |
||
257 | add(const T &Value) { |
||
258 | addHash(*this, Value); |
||
259 | return *this; |
||
260 | } |
||
261 | |||
262 | template <typename T1, typename T2> |
||
263 | HashBuilderImpl &add(const std::pair<T1, T2> &Value) { |
||
264 | return add(Value.first, Value.second); |
||
265 | } |
||
266 | |||
267 | template <typename... Ts> HashBuilderImpl &add(const std::tuple<Ts...> &Arg) { |
||
268 | std::apply([this](const auto &...Args) { this->add(Args...); }, Arg); |
||
269 | return *this; |
||
270 | } |
||
271 | |||
272 | /// A convenenience variadic helper. |
||
273 | /// It simply iterates over its arguments, in order. |
||
274 | /// ``` |
||
275 | /// add(Arg1, Arg2); |
||
276 | /// ``` |
||
277 | /// is equivalent to |
||
278 | /// ``` |
||
279 | /// add(Arg1) |
||
280 | /// add(Arg2) |
||
281 | /// ``` |
||
282 | template <typename... Ts> |
||
283 | std::enable_if_t<(sizeof...(Ts) > 1), HashBuilderImpl &> |
||
284 | add(const Ts &...Args) { |
||
285 | return (add(Args), ...); |
||
286 | } |
||
287 | |||
288 | template <typename ForwardIteratorT> |
||
289 | HashBuilderImpl &addRange(ForwardIteratorT First, ForwardIteratorT Last) { |
||
290 | add(std::distance(First, Last)); |
||
291 | return addRangeElements(First, Last); |
||
292 | } |
||
293 | |||
294 | template <typename RangeT> HashBuilderImpl &addRange(const RangeT &Range) { |
||
295 | return addRange(adl_begin(Range), adl_end(Range)); |
||
296 | } |
||
297 | |||
298 | template <typename ForwardIteratorT> |
||
299 | HashBuilderImpl &addRangeElements(ForwardIteratorT First, |
||
300 | ForwardIteratorT Last) { |
||
301 | return addRangeElementsImpl( |
||
302 | First, Last, |
||
303 | typename std::iterator_traits<ForwardIteratorT>::iterator_category()); |
||
304 | } |
||
305 | |||
306 | template <typename RangeT> |
||
307 | HashBuilderImpl &addRangeElements(const RangeT &Range) { |
||
308 | return addRangeElements(adl_begin(Range), adl_end(Range)); |
||
309 | } |
||
310 | |||
311 | template <typename T> |
||
312 | using HasByteSwapT = decltype(support::endian::byte_swap( |
||
313 | std::declval<T &>(), support::endianness::little)); |
||
314 | /// Adjust `Value` for the target endianness and add it to the hash. |
||
315 | template <typename T> |
||
316 | std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilderImpl &> |
||
317 | adjustForEndiannessAndAdd(const T &Value) { |
||
318 | T SwappedValue = support::endian::byte_swap(Value, Endianness); |
||
319 | this->update(ArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue), |
||
320 | sizeof(SwappedValue))); |
||
321 | return *this; |
||
322 | } |
||
323 | |||
324 | private: |
||
325 | // FIXME: Once available, specialize this function for `contiguous_iterator`s, |
||
326 | // and use it for `ArrayRef` and `StringRef`. |
||
327 | template <typename ForwardIteratorT> |
||
328 | HashBuilderImpl &addRangeElementsImpl(ForwardIteratorT First, |
||
329 | ForwardIteratorT Last, |
||
330 | std::forward_iterator_tag) { |
||
331 | for (auto It = First; It != Last; ++It) |
||
332 | add(*It); |
||
333 | return *this; |
||
334 | } |
||
335 | |||
336 | template <typename T> |
||
337 | std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value && |
||
338 | Endianness == support::endian::system_endianness(), |
||
339 | HashBuilderImpl &> |
||
340 | addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) { |
||
341 | this->update(ArrayRef(reinterpret_cast<const uint8_t *>(First), |
||
342 | (Last - First) * sizeof(T))); |
||
343 | return *this; |
||
344 | } |
||
345 | }; |
||
346 | |||
347 | /// Interface to help hash various types through a hasher type. |
||
348 | /// |
||
349 | /// Via provided specializations of `add`, `addRange`, and `addRangeElements` |
||
350 | /// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed |
||
351 | /// without requiring any knowledge of hashed types from the hasher type. |
||
352 | /// |
||
353 | /// The only method expected from the templated hasher type `HasherT` is: |
||
354 | /// * void update(ArrayRef<uint8_t> Data) |
||
355 | /// |
||
356 | /// Additionally, the following methods will be forwarded to the hasher type: |
||
357 | /// * decltype(std::declval<HasherT &>().final()) final() |
||
358 | /// * decltype(std::declval<HasherT &>().result()) result() |
||
359 | /// |
||
360 | /// From a user point of view, the interface provides the following: |
||
361 | /// * `template<typename T> add(const T &Value)` |
||
362 | /// The `add` function implements hashing of various types. |
||
363 | /// * `template <typename ItT> void addRange(ItT First, ItT Last)` |
||
364 | /// The `addRange` function is designed to aid hashing a range of values. |
||
365 | /// It explicitly adds the size of the range in the hash. |
||
366 | /// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)` |
||
367 | /// The `addRangeElements` function is also designed to aid hashing a range of |
||
368 | /// values. In contrast to `addRange`, it **ignores** the size of the range, |
||
369 | /// behaving as if elements were added one at a time with `add`. |
||
370 | /// |
||
371 | /// User-defined `struct` types can participate in this interface by providing |
||
372 | /// an `addHash` templated function. See the associated template specialization |
||
373 | /// for details. |
||
374 | /// |
||
375 | /// This interface does not impose requirements on the hasher |
||
376 | /// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for |
||
377 | /// variable-size types; for example for |
||
378 | /// ``` |
||
379 | /// builder.add({1}); |
||
380 | /// builder.add({2, 3}); |
||
381 | /// ``` |
||
382 | /// and |
||
383 | /// ``` |
||
384 | /// builder.add({1, 2}); |
||
385 | /// builder.add({3}); |
||
386 | /// ``` |
||
387 | /// . Thus, specializations of `add` and `addHash` for variable-size types must |
||
388 | /// not assume that the hasher type considers the size as part of the hash; they |
||
389 | /// must explicitly add the size to the hash. See for example specializations |
||
390 | /// for `ArrayRef` and `StringRef`. |
||
391 | /// |
||
392 | /// Additionally, since types are eventually forwarded to the hasher's |
||
393 | /// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash |
||
394 | /// computation (for example when computing `add((int)123)`). |
||
395 | /// Specifiying a non-`native` `Endianness` template parameter allows to compute |
||
396 | /// stable hash across platforms with different endianness. |
||
397 | template <class HasherT, support::endianness Endianness> |
||
398 | using HashBuilder = |
||
399 | HashBuilderImpl<HasherT, (Endianness == support::endianness::native |
||
400 | ? support::endian::system_endianness() |
||
401 | : Endianness)>; |
||
402 | |||
403 | namespace hashbuilder_detail { |
||
404 | class HashCodeHasher { |
||
405 | public: |
||
406 | HashCodeHasher() : Code(0) {} |
||
407 | void update(ArrayRef<uint8_t> Data) { |
||
408 | hash_code DataCode = hash_value(Data); |
||
409 | Code = hash_combine(Code, DataCode); |
||
410 | } |
||
411 | hash_code Code; |
||
412 | }; |
||
413 | |||
414 | using HashCodeHashBuilder = HashBuilder<hashbuilder_detail::HashCodeHasher, |
||
415 | support::endianness::native>; |
||
416 | } // namespace hashbuilder_detail |
||
417 | |||
418 | /// Provide a default implementation of `hash_value` when `addHash(const T &)` |
||
419 | /// is supported. |
||
420 | template <typename T> |
||
421 | std::enable_if_t< |
||
422 | is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value, |
||
423 | hash_code> |
||
424 | hash_value(const T &Value) { |
||
425 | hashbuilder_detail::HashCodeHashBuilder HBuilder; |
||
426 | HBuilder.add(Value); |
||
427 | return HBuilder.getHasher().Code; |
||
428 | } |
||
429 | } // end namespace llvm |
||
430 | |||
431 | #endif // LLVM_SUPPORT_HASHBUILDER_H |