//===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
 
//
 
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 
// See https://llvm.org/LICENSE.txt for license information.
 
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
//
 
//===----------------------------------------------------------------------===//
 
 
 
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
 
#define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
 
 
 
#include "llvm/ADT/SparseBitVector.h"
 
#include "llvm/ADT/iterator.h"
 
#include "llvm/DebugInfo/PDB/Native/RawError.h"
 
#include "llvm/Support/BinaryStreamReader.h"
 
#include "llvm/Support/BinaryStreamWriter.h"
 
#include "llvm/Support/Endian.h"
 
#include "llvm/Support/Error.h"
 
#include <cstdint>
 
#include <iterator>
 
#include <utility>
 
#include <vector>
 
 
 
namespace llvm {
 
 
 
namespace pdb {
 
 
 
Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
 
Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
 
 
 
template <typename ValueT> class HashTable;
 
 
 
template <typename ValueT>
 
class HashTableIterator
 
    : public iterator_facade_base<HashTableIterator<ValueT>,
 
                                  std::forward_iterator_tag,
 
                                  const std::pair<uint32_t, ValueT>> {
 
  using BaseT = typename HashTableIterator::iterator_facade_base;
 
  friend HashTable<ValueT>;
 
 
 
  HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index,
 
                    bool IsEnd)
 
      : Map(&Map), Index(Index), IsEnd(IsEnd) {}
 
 
 
public:
 
  HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) {
 
    int I = Map.Present.find_first();
 
    if (I == -1) {
 
      Index = 0;
 
      IsEnd = true;
 
    } else {
 
      Index = static_cast<uint32_t>(I);
 
      IsEnd = false;
 
    }
 
  }
 
 
 
  HashTableIterator(const HashTableIterator &R) = default;
 
  HashTableIterator &operator=(const HashTableIterator &R) {
 
    Map = R.Map;
 
    return *this;
 
  }
 
  bool operator==(const HashTableIterator &R) const {
 
    if (IsEnd && R.IsEnd)
 
      return true;
 
    if (IsEnd != R.IsEnd)
 
      return false;
 
 
 
    return (Map == R.Map) && (Index == R.Index);
 
  }
 
  const std::pair<uint32_t, ValueT> &operator*() const {
 
    assert(Map->Present.test(Index));
 
    return Map->Buckets[Index];
 
  }
 
 
 
  // Implement postfix op++ in terms of prefix op++ by using the superclass
 
  // implementation.
 
  using BaseT::operator++;
 
  HashTableIterator &operator++() {
 
    while (Index < Map->Buckets.size()) {
 
      ++Index;
 
      if (Map->Present.test(Index))
 
        return *this;
 
    }
 
 
 
    IsEnd = true;
 
    return *this;
 
  }
 
 
 
private:
 
  bool isEnd() const { return IsEnd; }
 
  uint32_t index() const { return Index; }
 
 
 
  const HashTable<ValueT> *Map;
 
  uint32_t Index;
 
  bool IsEnd;
 
};
 
 
 
template <typename ValueT>
 
class HashTable {
 
  struct Header {
 
    support::ulittle32_t Size;
 
    support::ulittle32_t Capacity;
 
  };
 
 
 
  using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
 
 
 
public:
 
  using const_iterator = HashTableIterator<ValueT>;
 
  friend const_iterator;
 
 
 
  HashTable() { Buckets.resize(8); }
 
  explicit HashTable(uint32_t Capacity) {
 
    Buckets.resize(Capacity);
 
  }
 
 
 
  Error load(BinaryStreamReader &Stream) {
 
    const Header *H;
 
    if (auto EC = Stream.readObject(H))
 
      return EC;
 
    if (H->Capacity == 0)
 
      return make_error<RawError>(raw_error_code::corrupt_file,
 
                                  "Invalid Hash Table Capacity");
 
    if (H->Size > maxLoad(H->Capacity))
 
      return make_error<RawError>(raw_error_code::corrupt_file,
 
                                  "Invalid Hash Table Size");
 
 
 
    Buckets.resize(H->Capacity);
 
 
 
    if (auto EC = readSparseBitVector(Stream, Present))
 
      return EC;
 
    if (Present.count() != H->Size)
 
      return make_error<RawError>(raw_error_code::corrupt_file,
 
                                  "Present bit vector does not match size!");
 
 
 
    if (auto EC = readSparseBitVector(Stream, Deleted))
 
      return EC;
 
    if (Present.intersects(Deleted))
 
      return make_error<RawError>(raw_error_code::corrupt_file,
 
                                  "Present bit vector intersects deleted!");
 
 
 
    for (uint32_t P : Present) {
 
      if (auto EC = Stream.readInteger(Buckets[P].first))
 
        return EC;
 
      const ValueT *Value;
 
      if (auto EC = Stream.readObject(Value))
 
        return EC;
 
      Buckets[P].second = *Value;
 
    }
 
 
 
    return Error::success();
 
  }
 
 
 
  uint32_t calculateSerializedLength() const {
 
    uint32_t Size = sizeof(Header);
 
 
 
    constexpr int BitsPerWord = 8 * sizeof(uint32_t);
 
 
 
    int NumBitsP = Present.find_last() + 1;
 
    int NumBitsD = Deleted.find_last() + 1;
 
 
 
    uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord;
 
    uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord;
 
 
 
    // Present bit set number of words (4 bytes), followed by that many actual
 
    // words (4 bytes each).
 
    Size += sizeof(uint32_t);
 
    Size += NumWordsP * sizeof(uint32_t);
 
 
 
    // Deleted bit set number of words (4 bytes), followed by that many actual
 
    // words (4 bytes each).
 
    Size += sizeof(uint32_t);
 
    Size += NumWordsD * sizeof(uint32_t);
 
 
 
    // One (Key, ValueT) pair for each entry Present.
 
    Size += (sizeof(uint32_t) + sizeof(ValueT)) * size();
 
 
 
    return Size;
 
  }
 
 
 
  Error commit(BinaryStreamWriter &Writer) const {
 
    Header H;
 
    H.Size = size();
 
    H.Capacity = capacity();
 
    if (auto EC = Writer.writeObject(H))
 
      return EC;
 
 
 
    if (auto EC = writeSparseBitVector(Writer, Present))
 
      return EC;
 
 
 
    if (auto EC = writeSparseBitVector(Writer, Deleted))
 
      return EC;
 
 
 
    for (const auto &Entry : *this) {
 
      if (auto EC = Writer.writeInteger(Entry.first))
 
        return EC;
 
      if (auto EC = Writer.writeObject(Entry.second))
 
        return EC;
 
    }
 
    return Error::success();
 
  }
 
 
 
  void clear() {
 
    Buckets.resize(8);
 
    Present.clear();
 
    Deleted.clear();
 
  }
 
 
 
  bool empty() const { return size() == 0; }
 
  uint32_t capacity() const { return Buckets.size(); }
 
  uint32_t size() const { return Present.count(); }
 
 
 
  const_iterator begin() const { return const_iterator(*this); }
 
  const_iterator end() const { return const_iterator(*this, 0, true); }
 
 
 
  /// Find the entry whose key has the specified hash value, using the specified
 
  /// traits defining hash function and equality.
 
  template <typename Key, typename TraitsT>
 
  const_iterator find_as(const Key &K, TraitsT &Traits) const {
 
    uint32_t H = Traits.hashLookupKey(K) % capacity();
 
    uint32_t I = H;
 
    std::optional<uint32_t> FirstUnused;
 
    do {
 
      if (isPresent(I)) {
 
        if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
 
          return const_iterator(*this, I, false);
 
      } else {
 
        if (!FirstUnused)
 
          FirstUnused = I;
 
        // Insertion occurs via linear probing from the slot hint, and will be
 
        // inserted at the first empty / deleted location.  Therefore, if we are
 
        // probing and find a location that is neither present nor deleted, then
 
        // nothing must have EVER been inserted at this location, and thus it is
 
        // not possible for a matching value to occur later.
 
        if (!isDeleted(I))
 
          break;
 
      }
 
      I = (I + 1) % capacity();
 
    } while (I != H);
 
 
 
    // The only way FirstUnused would not be set is if every single entry in the
 
    // table were Present.  But this would violate the load factor constraints
 
    // that we impose, so it should never happen.
 
    assert(FirstUnused);
 
    return const_iterator(*this, *FirstUnused, true);
 
  }
 
 
 
  /// Set the entry using a key type that the specified Traits can convert
 
  /// from a real key to an internal key.
 
  template <typename Key, typename TraitsT>
 
  bool set_as(const Key &K, ValueT V, TraitsT &Traits) {
 
    return set_as_internal(K, std::move(V), Traits, std::nullopt);
 
  }
 
 
 
  template <typename Key, typename TraitsT>
 
  ValueT get(const Key &K, TraitsT &Traits) const {
 
    auto Iter = find_as(K, Traits);
 
    assert(Iter != end());
 
    return (*Iter).second;
 
  }
 
 
 
protected:
 
  bool isPresent(uint32_t K) const { return Present.test(K); }
 
  bool isDeleted(uint32_t K) const { return Deleted.test(K); }
 
 
 
  BucketList Buckets;
 
  mutable SparseBitVector<> Present;
 
  mutable SparseBitVector<> Deleted;
 
 
 
private:
 
  /// Set the entry using a key type that the specified Traits can convert
 
  /// from a real key to an internal key.
 
  template <typename Key, typename TraitsT>
 
  bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits,
 
                       std::optional<uint32_t> InternalKey) {
 
    auto Entry = find_as(K, Traits);
 
    if (Entry != end()) {
 
      assert(isPresent(Entry.index()));
 
      assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
 
      // We're updating, no need to do anything special.
 
      Buckets[Entry.index()].second = V;
 
      return false;
 
    }
 
 
 
    auto &B = Buckets[Entry.index()];
 
    assert(!isPresent(Entry.index()));
 
    assert(Entry.isEnd());
 
    B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K);
 
    B.second = V;
 
    Present.set(Entry.index());
 
    Deleted.reset(Entry.index());
 
 
 
    grow(Traits);
 
 
 
    assert((find_as(K, Traits)) != end());
 
    return true;
 
  }
 
 
 
  static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
 
 
 
  template <typename TraitsT>
 
  void grow(TraitsT &Traits) {
 
    uint32_t S = size();
 
    uint32_t MaxLoad = maxLoad(capacity());
 
    if (S < maxLoad(capacity()))
 
      return;
 
    assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
 
 
 
    uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX;
 
 
 
    // Growing requires rebuilding the table and re-hashing every item.  Make a
 
    // copy with a larger capacity, insert everything into the copy, then swap
 
    // it in.
 
    HashTable NewMap(NewCapacity);
 
    for (auto I : Present) {
 
      auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
 
      NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits,
 
                             Buckets[I].first);
 
    }
 
 
 
    Buckets.swap(NewMap.Buckets);
 
    std::swap(Present, NewMap.Present);
 
    std::swap(Deleted, NewMap.Deleted);
 
    assert(capacity() == NewCapacity);
 
    assert(size() == S);
 
  }
 
};
 
 
 
} // end namespace pdb
 
 
 
} // end namespace llvm
 
 
 
#endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H