From 6be878885f35076a749997a90627eee43fded35e Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Mon, 15 Jun 2026 10:43:56 +0200 Subject: [PATCH 1/6] introduce new trait instead of relying on exising hash traits. --- crates/hash-sorted-map/README.md | 6 +- crates/hash-sorted-map/src/hash_sorted_map.rs | 140 +++++++++++++----- crates/hash-sorted-map/src/lib.rs | 2 +- 3 files changed, 109 insertions(+), 39 deletions(-) diff --git a/crates/hash-sorted-map/README.md b/crates/hash-sorted-map/README.md index d4ec3fad..a893a47c 100644 --- a/crates/hash-sorted-map/README.md +++ b/crates/hash-sorted-map/README.md @@ -37,8 +37,10 @@ keys, which means: together, keeping a single insert's data within 1–2 cache lines. - **Optimized growth** — during resize, elements are re-inserted without duplicate checking and copied via raw pointers. -- **Generic key/value/hasher** — supports any `K: Hash + Eq`, any - `S: BuildHasher`, and `Borrow`-based lookups. +- **Generic key/value/hasher** — keys need only `Eq` (`Ord` to sort). + Customise hashing with the single-method [`SortingHash`] trait; any + standard `S: BuildHasher` works out of the box via a blanket impl, and + `Borrow`-based lookups are supported. ## Benchmark results diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index 0cc37b46..52d82363 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -12,10 +12,57 @@ pub(crate) use super::group::NO_OVERFLOW; // ── Helpers ───────────────────────────────────────────────────────────────── #[inline] -fn tag(hash: u64) -> u8 { +fn tag(hash: u32) -> u8 { (hash as u8) | 0x80 } +// ──────────────────────────────────────────────────────────────────────── +// SortingHash +// ──────────────────────────────────────────────────────────────────────── + +/// Maps a key to the 32-bit hash that determines its position in the map. +/// +/// The high bits select the primary group, so visiting groups in index order +/// yields entries in ascending hash order — the property [`HashSortedMap`] +/// relies on for sorted iteration and linear-time merging. The hash should +/// therefore be well distributed in its high bits. +/// +/// Every [`BuildHasher`] (the default [`RandomState`], `foldhash`, `ahash`, +/// `fnv`, …) implements this trait automatically through a blanket impl, so +/// it can be used as a drop-in. For full control — including keys that do not +/// implement [`Hash`] — implement this single method directly instead of the +/// streaming [`Hasher`](std::hash::Hasher) interface: +/// +/// ``` +/// use hash_sorted_map::{HashSortedMap, SortingHash}; +/// +/// #[derive(Default)] +/// struct Identity; +/// impl SortingHash for Identity { +/// fn hash(&self, &key: &u32) -> u32 { +/// key +/// } +/// } +/// +/// let mut map = HashSortedMap::with_hasher(Identity); +/// map.insert(42u32, "answer"); +/// assert_eq!(map.get(&42), Some(&"answer")); +/// ``` +pub trait SortingHash { + /// Returns the hash of `key`. + fn hash(&self, key: &K) -> u32; +} + +/// Bridges the standard library's [`BuildHasher`] to [`SortingHash`], so any +/// existing hasher keeps working unchanged. The high 32 bits of the 64-bit +/// hash are used, since the map groups entries by the most significant bits. +impl SortingHash for S { + #[inline] + fn hash(&self, key: &K) -> u32 { + (self.hash_one(key) >> 32) as u32 + } +} + // ──────────────────────────────────────────────────────────────────────── // HashSortedMap // ──────────────────────────────────────────────────────────────────────── @@ -29,7 +76,7 @@ pub struct HashSortedMap { pub(crate) num_groups: u32, pub(crate) n_bits: u32, pub(crate) len: usize, - hash_builder: S, + hasher: S, } impl Default for HashSortedMap { @@ -49,11 +96,11 @@ impl HashSortedMap { } impl HashSortedMap { - pub fn with_hasher(hash_builder: S) -> Self { - Self::with_capacity_and_hasher(0, hash_builder) + pub fn with_hasher(hasher: S) -> Self { + Self::with_capacity_and_hasher(0, hasher) } - pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> Self { + pub fn with_capacity_and_hasher(capacity: usize, hasher: S) -> Self { let adjusted = (capacity as f64 / group_ops::MAX_FILL).ceil() as usize; let min_groups = (adjusted.div_ceil(GROUP_SIZE)).max(1).next_power_of_two(); let n_bits = min_groups.trailing_zeros().max(1); @@ -63,7 +110,7 @@ impl HashSortedMap { num_groups, n_bits, len: 0, - hash_builder, + hasher, } } @@ -84,12 +131,12 @@ impl HashSortedMap { } #[inline] - pub(crate) fn group_index(&self, hash: u64) -> usize { - (hash >> (64 - self.n_bits)) as usize + pub(crate) fn group_index(&self, hash: u32) -> usize { + (hash >> (32 - self.n_bits)) as usize } } -impl HashSortedMap { +impl> HashSortedMap { /// Sort all entries within each primary group chain by their hash value, /// breaking ties by key. /// @@ -115,7 +162,7 @@ impl HashSortedMap { pub fn sort_by_hash(&mut self) { let num_primary = 1usize << self.n_bits; let mut chain: Vec = Vec::new(); - let mut hashes: Vec = Vec::new(); + let mut hashes: Vec = Vec::new(); for primary_gi in 0..num_primary { chain.clear(); @@ -137,9 +184,7 @@ impl HashSortedMap { for &cgi in &chain[..chain.len() - 1] { let g = &self.groups[cgi as usize]; for slot in 0..GROUP_SIZE { - let hash = self - .hash_builder - .hash_one(unsafe { g.keys[slot].assume_init_ref() }); + let hash = self.hasher.hash(unsafe { g.keys[slot].assume_init_ref() }); hashes.push(hash); } } @@ -149,9 +194,7 @@ impl HashSortedMap { if g.ctrl[slot] == CTRL_EMPTY { break; } - let hash = self - .hash_builder - .hash_one(unsafe { g.keys[slot].assume_init_ref() }); + let hash = self.hasher.hash(unsafe { g.keys[slot].assume_init_ref() }); hashes.push(hash); } @@ -204,18 +247,19 @@ impl HashSortedMap { } } -impl HashSortedMap { +impl> HashSortedMap { pub fn insert(&mut self, key: K, value: V) -> Option { - let hash = self.hash_builder.hash_one(&key); + let hash = self.hasher.hash(&key); self.insert_hashed(hash, key, value) } pub fn get(&self, key: &Q) -> Option<&V> where K: Borrow, - Q: Hash + Eq + ?Sized, + Q: Eq + ?Sized, + S: SortingHash, { - let hash = self.hash_builder.hash_one(key); + let hash = self.hasher.hash(key); self.get_hashed(hash, key) } @@ -238,7 +282,7 @@ impl HashSortedMap { /// the resulting `VacantEntry` already knows where to write. #[inline] pub fn entry(&mut self, key: K) -> Entry<'_, K, V, S> { - let hash = self.hash_builder.hash_one(&key); + let hash = self.hasher.hash(&key); match self.find_or_insertion_slot(hash, &key) { FindResult::Found(ptr) => Entry::Occupied(OccupiedEntry { // SAFETY: pointer is valid for `'_` (bounded by `&mut self`). @@ -254,7 +298,7 @@ impl HashSortedMap { } } - fn insert_hashed(&mut self, hash: u64, key: K, value: V) -> Option { + fn insert_hashed(&mut self, hash: u32, key: K, value: V) -> Option { let tag = tag(hash); let mut gi = self.group_index(hash); loop { @@ -301,7 +345,7 @@ impl HashSortedMap { } } - fn get_hashed(&self, hash: u64, key: &Q) -> Option<&V> + fn get_hashed(&self, hash: u32, key: &Q) -> Option<&V> where K: Borrow, Q: Eq + ?Sized, @@ -334,7 +378,7 @@ impl HashSortedMap { /// Returns raw pointers (instead of indices) so the caller can write /// directly without re-indexing. Pointers remain valid for the lifetime /// of `&mut self` until any reallocation (`grow`). - fn find_or_insertion_slot(&mut self, hash: u64, key: &K) -> FindResult { + fn find_or_insertion_slot(&mut self, hash: u32, key: &K) -> FindResult { let tag = tag(hash); let mut gi = self.group_index(hash); @@ -382,9 +426,7 @@ impl HashSortedMap { for group in &old_groups[..old_num_groups] { for i in 0..group_ops::count_occupied(&group.ctrl) { - let hash = self - .hash_builder - .hash_one(unsafe { group.keys[i].assume_init_ref() }); + let hash = self.hasher.hash(unsafe { group.keys[i].assume_init_ref() }); self.insert_for_grow(hash, group.keys[i].as_ptr(), group.values[i].as_ptr()); } } @@ -395,7 +437,7 @@ impl HashSortedMap { debug_assert_eq!(self.len, old_len); } - fn insert_for_grow(&mut self, hash: u64, key_src: *const K, value_src: *const V) { + fn insert_for_grow(&mut self, hash: u32, key_src: *const K, value_src: *const V) { let tag = tag(hash); let gi = self.group_index(hash); let mut group = &mut self.groups[gi]; @@ -479,12 +521,12 @@ pub struct OccupiedEntry<'a, V> { pub struct VacantEntry<'a, K, V, S> { phantom: PhantomData<&'a mut HashSortedMap>, map: *mut HashSortedMap, - hash: u64, + hash: u32, key: K, insertion: Insertion, } -impl<'a, K: Hash + Eq, V, S: BuildHasher> Entry<'a, K, V, S> { +impl<'a, K: Eq, V, S: SortingHash> Entry<'a, K, V, S> { /// Insert `default` if vacant; return a mutable reference to the value either way. #[inline] pub fn or_insert(self, default: V) -> &'a mut V { @@ -545,7 +587,7 @@ impl<'a, V> OccupiedEntry<'a, V> { } } -impl<'a, K: Hash + Eq, V, S: BuildHasher> VacantEntry<'a, K, V, S> { +impl<'a, K: Eq, V, S: SortingHash> VacantEntry<'a, K, V, S> { /// Insert `value` and return a mutable reference to it. /// Writes directly to the slot pre-computed during `entry()`; only re-walks /// the chain on the rare grow path (where the pre-computed pointers become @@ -597,7 +639,7 @@ impl<'a, K: Hash + Eq, V, S: BuildHasher> VacantEntry<'a, K, V, S> { /// the new insertion slot. #[cold] #[inline(never)] -fn insert_after_grow( +fn insert_after_grow>( map: &mut HashSortedMap, key: K, value: V, @@ -840,6 +882,32 @@ mod tests { } } + #[test] + fn custom_sorting_hash_without_hash_key() { + // A key type that intentionally does NOT implement `Hash`, proving the + // map only requires `SortingHash` (not `std::hash::Hash`) when a custom + // hasher is supplied. + #[derive(PartialEq, Eq)] + struct Key(u32); + + struct ByValue; + impl SortingHash for ByValue { + fn hash(&self, key: &Key) -> u32 { + key.0.wrapping_mul(0x9E37_79B1) + } + } + + let mut map = HashSortedMap::with_hasher(ByValue); + for i in 0..200u32 { + assert_eq!(map.insert(Key(i), i), None); + } + assert_eq!(map.len(), 200); + for i in 0..200u32 { + assert_eq!(map.get(&Key(i)), Some(&i)); + } + assert_eq!(map.get(&Key(999)), None); + } + // ── sort_by_hash tests ────────────────────────────────────────────── #[test] @@ -889,11 +957,11 @@ mod tests { } map.sort_by_hash(); // Iteration should now yield entries in (hash, key) order. - let mut prev_hash = 0u64; + let mut prev_hash = 0u32; let mut prev_key = 0u32; let mut first = true; for (&k, _) in &map { - let h = hasher.hash_one(k); + let h = SortingHash::hash(&hasher, &k); if !first { assert!( (h, k) >= (prev_hash, prev_key), @@ -934,11 +1002,11 @@ mod tests { } map.sort_by_hash(); assert_eq!(map.len(), 100); - let mut prev_hash = 0u64; + let mut prev_hash = 0u32; let mut prev_key = String::new(); let mut first = true; for (k, _) in &map { - let h = hasher.hash_one(k); + let h = SortingHash::hash(&hasher, k); if !first { assert!( (h, k) >= (prev_hash, &prev_key), diff --git a/crates/hash-sorted-map/src/lib.rs b/crates/hash-sorted-map/src/lib.rs index 3ff5461e..4381d2d9 100644 --- a/crates/hash-sorted-map/src/lib.rs +++ b/crates/hash-sorted-map/src/lib.rs @@ -3,5 +3,5 @@ mod group_ops; mod hash_sorted_map; mod iter; -pub use hash_sorted_map::{Entry, HashSortedMap, OccupiedEntry, VacantEntry}; +pub use hash_sorted_map::{Entry, HashSortedMap, OccupiedEntry, SortingHash, VacantEntry}; pub use iter::{IntoIter, Iter, IterMut}; From 529c7275b680ba58123633343e3b6478e1daaed1 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Mon, 15 Jun 2026 11:01:52 +0200 Subject: [PATCH 2/6] prepare for publishing --- crates/hash-sorted-map/LICENSE | 21 +++++++++++++++ crates/hash-sorted-map/benchmarks/Cargo.toml | 1 + crates/hash-sorted-map/src/hash_sorted_map.rs | 16 +++++++++++ crates/hash-sorted-map/src/lib.rs | 27 +++++++++++++++++++ 4 files changed, 65 insertions(+) create mode 100644 crates/hash-sorted-map/LICENSE diff --git a/crates/hash-sorted-map/LICENSE b/crates/hash-sorted-map/LICENSE new file mode 100644 index 00000000..163074d5 --- /dev/null +++ b/crates/hash-sorted-map/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 GitHub Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/crates/hash-sorted-map/benchmarks/Cargo.toml b/crates/hash-sorted-map/benchmarks/Cargo.toml index 0fddba94..d0ae1f13 100644 --- a/crates/hash-sorted-map/benchmarks/Cargo.toml +++ b/crates/hash-sorted-map/benchmarks/Cargo.toml @@ -1,6 +1,7 @@ [package] name = "hash-sorted-map-benchmarks" edition = "2021" +publish = false [lib] path = "lib.rs" diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index 52d82363..117a1399 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -86,20 +86,26 @@ impl Default for HashSortedMap { } impl HashSortedMap { + /// Creates an empty map using the default [`RandomState`] hasher. pub fn new() -> Self { Self::with_capacity_and_hasher(0, RandomState::new()) } + /// Creates an empty map that can hold at least `capacity` entries without + /// growing, using the default [`RandomState`] hasher. pub fn with_capacity(capacity: usize) -> Self { Self::with_capacity_and_hasher(capacity, RandomState::new()) } } impl HashSortedMap { + /// Creates an empty map that hashes keys with `hasher`. pub fn with_hasher(hasher: S) -> Self { Self::with_capacity_and_hasher(0, hasher) } + /// Creates an empty map that hashes keys with `hasher` and can hold at + /// least `capacity` entries without growing. pub fn with_capacity_and_hasher(capacity: usize, hasher: S) -> Self { let adjusted = (capacity as f64 / group_ops::MAX_FILL).ceil() as usize; let min_groups = (adjusted.div_ceil(GROUP_SIZE)).max(1).next_power_of_two(); @@ -114,10 +120,12 @@ impl HashSortedMap { } } + /// Returns the number of entries in the map. pub fn len(&self) -> usize { self.len } + /// Returns `true` if the map contains no entries. pub fn is_empty(&self) -> bool { self.len == 0 } @@ -248,11 +256,17 @@ impl> HashSortedMap { } impl> HashSortedMap { + /// Inserts a key/value pair, returning the previous value for `key` if it + /// was already present (otherwise `None`). pub fn insert(&mut self, key: K, value: V) -> Option { let hash = self.hasher.hash(&key); self.insert_hashed(hash, key, value) } + /// Returns a reference to the value for `key`, or `None` if it is absent. + /// + /// The key may be any borrowed form of `K`, as long as the borrowed value + /// hashes and compares equal to the owned key. pub fn get(&self, key: &Q) -> Option<&V> where K: Borrow, @@ -507,7 +521,9 @@ enum Insertion { /// View into a single entry in a [`HashSortedMap`], either occupied or vacant. pub enum Entry<'a, K, V, S> { + /// An occupied entry whose key already exists in the map. Occupied(OccupiedEntry<'a, V>), + /// A vacant entry whose key is absent from the map. Vacant(VacantEntry<'a, K, V, S>), } diff --git a/crates/hash-sorted-map/src/lib.rs b/crates/hash-sorted-map/src/lib.rs index 4381d2d9..8792d7a4 100644 --- a/crates/hash-sorted-map/src/lib.rs +++ b/crates/hash-sorted-map/src/lib.rs @@ -1,3 +1,30 @@ +//! A hash map whose groups are ordered by hash prefix, enabling efficient +//! sorted-order iteration and linear-time merging of two maps. +//! +//! [`HashSortedMap`] is a Swiss-table-inspired, insertion-only hash map. Its +//! groups are laid out by hash prefix, so visiting them in order yields entries +//! sorted by hashed key. This makes merging two maps a single linear scan and +//! lets serialization in hash-key order happen without an extra sort. +//! +//! Hashing is customized through the single-method [`SortingHash`] trait. Any +//! standard [`BuildHasher`](std::hash::BuildHasher) works out of the box via a +//! blanket implementation. +//! +//! ``` +//! use hash_sorted_map::HashSortedMap; +//! +//! let mut map = HashSortedMap::new(); +//! map.insert("hello", 1); +//! map.insert("world", 2); +//! assert_eq!(map.get("hello"), Some(&1)); +//! +//! // Iterate in ascending hash order. +//! map.sort_by_hash(); +//! let entries: Vec<_> = map.iter().map(|(&k, &v)| (k, v)).collect(); +//! assert_eq!(entries.len(), 2); +//! ``` +#![warn(missing_docs)] + mod group; mod group_ops; mod hash_sorted_map; From fbb2c0dac52bbfa3ecb7ff636baeee485f967133 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Mon, 15 Jun 2026 11:08:03 +0200 Subject: [PATCH 3/6] Update hash_sorted_map.rs --- crates/hash-sorted-map/src/hash_sorted_map.rs | 45 +++++++++---------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index 117a1399..f907fb2d 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -12,7 +12,7 @@ pub(crate) use super::group::NO_OVERFLOW; // ── Helpers ───────────────────────────────────────────────────────────────── #[inline] -fn tag(hash: u32) -> u8 { +fn tag(hash: u64) -> u8 { (hash as u8) | 0x80 } @@ -20,7 +20,7 @@ fn tag(hash: u32) -> u8 { // SortingHash // ──────────────────────────────────────────────────────────────────────── -/// Maps a key to the 32-bit hash that determines its position in the map. +/// Maps a key to the 64-bit hash that determines its position in the map. /// /// The high bits select the primary group, so visiting groups in index order /// yields entries in ascending hash order — the property [`HashSortedMap`] @@ -39,8 +39,8 @@ fn tag(hash: u32) -> u8 { /// #[derive(Default)] /// struct Identity; /// impl SortingHash for Identity { -/// fn hash(&self, &key: &u32) -> u32 { -/// key +/// fn hash(&self, &key: &u32) -> u64 { +/// (key as u64) | ((key as u64) << 32) /// } /// } /// @@ -50,16 +50,15 @@ fn tag(hash: u32) -> u8 { /// ``` pub trait SortingHash { /// Returns the hash of `key`. - fn hash(&self, key: &K) -> u32; + fn hash(&self, key: &K) -> u64; } /// Bridges the standard library's [`BuildHasher`] to [`SortingHash`], so any -/// existing hasher keeps working unchanged. The high 32 bits of the 64-bit -/// hash are used, since the map groups entries by the most significant bits. +/// existing hasher keeps working unchanged. impl SortingHash for S { #[inline] - fn hash(&self, key: &K) -> u32 { - (self.hash_one(key) >> 32) as u32 + fn hash(&self, key: &K) -> u64 { + self.hash_one(key) } } @@ -139,8 +138,8 @@ impl HashSortedMap { } #[inline] - pub(crate) fn group_index(&self, hash: u32) -> usize { - (hash >> (32 - self.n_bits)) as usize + pub(crate) fn group_index(&self, hash: u64) -> usize { + (hash >> (64 - self.n_bits)) as usize } } @@ -170,7 +169,7 @@ impl> HashSortedMap { pub fn sort_by_hash(&mut self) { let num_primary = 1usize << self.n_bits; let mut chain: Vec = Vec::new(); - let mut hashes: Vec = Vec::new(); + let mut hashes: Vec = Vec::new(); for primary_gi in 0..num_primary { chain.clear(); @@ -312,7 +311,7 @@ impl> HashSortedMap { } } - fn insert_hashed(&mut self, hash: u32, key: K, value: V) -> Option { + fn insert_hashed(&mut self, hash: u64, key: K, value: V) -> Option { let tag = tag(hash); let mut gi = self.group_index(hash); loop { @@ -359,7 +358,7 @@ impl> HashSortedMap { } } - fn get_hashed(&self, hash: u32, key: &Q) -> Option<&V> + fn get_hashed(&self, hash: u64, key: &Q) -> Option<&V> where K: Borrow, Q: Eq + ?Sized, @@ -392,7 +391,7 @@ impl> HashSortedMap { /// Returns raw pointers (instead of indices) so the caller can write /// directly without re-indexing. Pointers remain valid for the lifetime /// of `&mut self` until any reallocation (`grow`). - fn find_or_insertion_slot(&mut self, hash: u32, key: &K) -> FindResult { + fn find_or_insertion_slot(&mut self, hash: u64, key: &K) -> FindResult { let tag = tag(hash); let mut gi = self.group_index(hash); @@ -451,7 +450,7 @@ impl> HashSortedMap { debug_assert_eq!(self.len, old_len); } - fn insert_for_grow(&mut self, hash: u32, key_src: *const K, value_src: *const V) { + fn insert_for_grow(&mut self, hash: u64, key_src: *const K, value_src: *const V) { let tag = tag(hash); let gi = self.group_index(hash); let mut group = &mut self.groups[gi]; @@ -537,7 +536,7 @@ pub struct OccupiedEntry<'a, V> { pub struct VacantEntry<'a, K, V, S> { phantom: PhantomData<&'a mut HashSortedMap>, map: *mut HashSortedMap, - hash: u32, + hash: u64, key: K, insertion: Insertion, } @@ -908,8 +907,8 @@ mod tests { struct ByValue; impl SortingHash for ByValue { - fn hash(&self, key: &Key) -> u32 { - key.0.wrapping_mul(0x9E37_79B1) + fn hash(&self, key: &Key) -> u64 { + (key.0 as u64).wrapping_mul(0x9E37_79B9_7F4A_7C15) } } @@ -973,11 +972,11 @@ mod tests { } map.sort_by_hash(); // Iteration should now yield entries in (hash, key) order. - let mut prev_hash = 0u32; + let mut prev_hash = 0u64; let mut prev_key = 0u32; let mut first = true; for (&k, _) in &map { - let h = SortingHash::hash(&hasher, &k); + let h = hasher.hash_one(k); if !first { assert!( (h, k) >= (prev_hash, prev_key), @@ -1018,11 +1017,11 @@ mod tests { } map.sort_by_hash(); assert_eq!(map.len(), 100); - let mut prev_hash = 0u32; + let mut prev_hash = 0u64; let mut prev_key = String::new(); let mut first = true; for (k, _) in &map { - let h = SortingHash::hash(&hasher, k); + let h = hasher.hash_one(k); if !first { assert!( (h, k) >= (prev_hash, &prev_key), From b13f07cc874450f9cdec1b51c1158e80a7853ba7 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Mon, 15 Jun 2026 11:11:46 +0200 Subject: [PATCH 4/6] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- crates/hash-sorted-map/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/hash-sorted-map/README.md b/crates/hash-sorted-map/README.md index a893a47c..94980083 100644 --- a/crates/hash-sorted-map/README.md +++ b/crates/hash-sorted-map/README.md @@ -38,7 +38,7 @@ keys, which means: - **Optimized growth** — during resize, elements are re-inserted without duplicate checking and copied via raw pointers. - **Generic key/value/hasher** — keys need only `Eq` (`Ord` to sort). - Customise hashing with the single-method [`SortingHash`] trait; any + Customize hashing with the single-method [`SortingHash`] trait; any standard `S: BuildHasher` works out of the box via a blanket impl, and `Borrow`-based lookups are supported. From 89cfc33c061c80bce8f65e09ecd0352ac104b1f6 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Mon, 15 Jun 2026 11:13:55 +0200 Subject: [PATCH 5/6] Update hash_sorted_map.rs --- crates/hash-sorted-map/src/hash_sorted_map.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index f907fb2d..472f872a 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -69,7 +69,8 @@ impl SortingHash for S { /// Insertion-only hash map with SIMD group scanning. /// /// Uses NEON on aarch64, SSE2 on x86_64, scalar fallback elsewhere. -/// Generic over key type `K`, value type `V`, and hash builder `S`. +/// Generic over key type `K`, value type `V`, and hashing strategy `S` +/// (any [`SortingHash`](SortingHash), which every [`BuildHasher`] satisfies). pub struct HashSortedMap { pub(crate) groups: Box<[Group]>, pub(crate) num_groups: u32, From 55ae5f0535214c7e5ac81fe218ff911586895566 Mon Sep 17 00:00:00 2001 From: Alexander Neubeck Date: Mon, 15 Jun 2026 12:30:48 +0200 Subject: [PATCH 6/6] hash-sorted-map: drop RandomState default; hasher is now explicit The hasher determines a HashSortedMap's iteration order, so there is no meaningful default hasher for a sort-by-hash map (RandomState gives a different, per-process order, defeating the purpose). Remove the `S = RandomState` default type parameter and stop hardcoding RandomState in new()/with_capacity()/Default; those now construct `S::default()`, so a map with a custom default-constructible SortingHash works via the type's own constructors. Any BuildHasher (incl. RandomState) is still usable by naming it explicitly through the blanket SortingHash impl. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- crates/hash-sorted-map/src/hash_sorted_map.rs | 73 +++++++++++-------- crates/hash-sorted-map/src/iter.rs | 24 +++--- crates/hash-sorted-map/src/lib.rs | 3 +- 3 files changed, 56 insertions(+), 44 deletions(-) diff --git a/crates/hash-sorted-map/src/hash_sorted_map.rs b/crates/hash-sorted-map/src/hash_sorted_map.rs index 472f872a..66083496 100644 --- a/crates/hash-sorted-map/src/hash_sorted_map.rs +++ b/crates/hash-sorted-map/src/hash_sorted_map.rs @@ -1,6 +1,5 @@ use core::mem::MaybeUninit; use std::borrow::Borrow; -use std::collections::hash_map::RandomState; use std::hash::{BuildHasher, Hash}; use std::marker::PhantomData; @@ -27,10 +26,11 @@ fn tag(hash: u64) -> u8 { /// relies on for sorted iteration and linear-time merging. The hash should /// therefore be well distributed in its high bits. /// -/// Every [`BuildHasher`] (the default [`RandomState`], `foldhash`, `ahash`, -/// `fnv`, …) implements this trait automatically through a blanket impl, so -/// it can be used as a drop-in. For full control — including keys that do not -/// implement [`Hash`] — implement this single method directly instead of the +/// Every [`BuildHasher`] ([`RandomState`](std::collections::hash_map::RandomState), +/// `foldhash`, `ahash`, `fnv`, …) implements this trait automatically through a +/// blanket impl, so it can be used as a drop-in. For full control — including +/// keys that do not implement [`Hash`] — implement this single method directly +/// instead of the /// streaming [`Hasher`](std::hash::Hasher) interface: /// /// ``` @@ -71,7 +71,12 @@ impl SortingHash for S { /// Uses NEON on aarch64, SSE2 on x86_64, scalar fallback elsewhere. /// Generic over key type `K`, value type `V`, and hashing strategy `S` /// (any [`SortingHash`](SortingHash), which every [`BuildHasher`] satisfies). -pub struct HashSortedMap { +/// +/// `S` has no default: because the hasher determines the iteration order, there +/// is no meaningful "default" hasher for a sort-by-hash map. Construct one with +/// [`with_hasher`](Self::with_hasher), or — for a [`Default`] hasher `S` — with +/// [`new`](Self::new) / [`with_capacity`](Self::with_capacity) / [`Default`]. +pub struct HashSortedMap { pub(crate) groups: Box<[Group]>, pub(crate) num_groups: u32, pub(crate) n_bits: u32, @@ -79,22 +84,22 @@ pub struct HashSortedMap { hasher: S, } -impl Default for HashSortedMap { +impl Default for HashSortedMap { fn default() -> Self { Self::new() } } -impl HashSortedMap { - /// Creates an empty map using the default [`RandomState`] hasher. +impl HashSortedMap { + /// Creates an empty map using the default-constructed hasher `S`. pub fn new() -> Self { - Self::with_capacity_and_hasher(0, RandomState::new()) + Self::with_capacity_and_hasher(0, S::default()) } /// Creates an empty map that can hold at least `capacity` entries without - /// growing, using the default [`RandomState`] hasher. + /// growing, using the default-constructed hasher `S`. pub fn with_capacity(capacity: usize) -> Self { - Self::with_capacity_and_hasher(capacity, RandomState::new()) + Self::with_capacity_and_hasher(capacity, S::default()) } } @@ -679,13 +684,14 @@ impl Drop for HashSortedMap { #[cfg(test)] mod tests { + use std::collections::hash_map::RandomState; use std::hash::{BuildHasher, Hasher}; use super::*; #[test] fn insert_and_get() { - let mut map = HashSortedMap::new(); + let mut map = HashSortedMap::<_, _, RandomState>::new(); map.insert(100, "hello"); map.insert(200, "world"); assert_eq!(map.get(&100), Some(&"hello")); @@ -696,7 +702,7 @@ mod tests { #[test] fn insert_overwrite() { - let mut map = HashSortedMap::new(); + let mut map = HashSortedMap::<_, _, RandomState>::new(); map.insert(42, "a"); assert_eq!(map.insert(42, "b"), Some("a")); assert_eq!(map.get(&42), Some(&"b")); @@ -705,7 +711,7 @@ mod tests { #[test] fn grow_preserves_entries() { - let mut map = HashSortedMap::new(); + let mut map = HashSortedMap::<_, _, RandomState>::new(); for i in 0..200u32 { map.insert(i, i * 10); } @@ -717,7 +723,7 @@ mod tests { #[test] fn many_entries() { - let mut map = HashSortedMap::with_capacity(2000); + let mut map = HashSortedMap::<_, _, RandomState>::with_capacity(2000); for i in 0..2000u32 { map.insert(i.wrapping_mul(2654435761), i); } @@ -729,7 +735,7 @@ mod tests { #[test] fn overflow_chain() { - let mut map = HashSortedMap::with_capacity(8); + let mut map = HashSortedMap::<_, _, RandomState>::with_capacity(8); for i in 0..20u32 { let key = i | 0xAB000000; map.insert(key, i); @@ -743,7 +749,7 @@ mod tests { #[test] fn grow_on_overflow_exhaustion() { - let mut map = HashSortedMap::with_capacity(1); + let mut map = HashSortedMap::<_, _, RandomState>::with_capacity(1); let old_n_bits = map.n_bits; for i in 0..100u32 { let key = i | 0xFF000000; @@ -759,7 +765,7 @@ mod tests { #[test] fn string_keys() { - let mut map = HashSortedMap::new(); + let mut map = HashSortedMap::<_, _, RandomState>::new(); map.insert("hello".to_string(), 1); map.insert("world".to_string(), 2); assert_eq!(map.get("hello"), Some(&1)); @@ -774,7 +780,8 @@ mod tests { #[test] fn get_or_default_basics() { - let mut map: HashSortedMap<&str, i32> = HashSortedMap::new(); + let mut map: HashSortedMap<&str, i32, RandomState> = + HashSortedMap::<_, _, RandomState>::new(); // Inserts default (0), then mutates. *map.get_or_default("a") += 5; *map.get_or_default("b") += 7; @@ -787,7 +794,8 @@ mod tests { #[test] fn get_or_insert_with_lazy() { - let mut map: HashSortedMap = HashSortedMap::new(); + let mut map: HashSortedMap = + HashSortedMap::<_, _, RandomState>::new(); let mut call_count = 0; let mut make = |s: &str| { call_count += 1; @@ -814,7 +822,8 @@ mod tests { #[test] fn get_or_default_survives_grow() { - let mut map: HashSortedMap = HashSortedMap::with_capacity(1); + let mut map: HashSortedMap = + HashSortedMap::<_, _, RandomState>::with_capacity(1); for i in 0..500u32 { *map.get_or_default(i) = i * 2; } @@ -827,7 +836,8 @@ mod tests { #[test] fn entry_or_default_counting() { // Classic counting workload via Entry API. - let mut map: HashSortedMap<&str, u32> = HashSortedMap::new(); + let mut map: HashSortedMap<&str, u32, RandomState> = + HashSortedMap::<_, _, RandomState>::new(); for word in ["a", "b", "a", "c", "b", "a"] { *map.entry(word).or_default() += 1; } @@ -839,7 +849,8 @@ mod tests { #[test] fn entry_or_insert_lazy() { - let mut map: HashSortedMap = HashSortedMap::new(); + let mut map: HashSortedMap = + HashSortedMap::<_, _, RandomState>::new(); let mut call_count = 0; let mut make = |s: &str| { call_count += 1; @@ -856,7 +867,8 @@ mod tests { #[test] fn entry_and_modify() { - let mut map: HashSortedMap = HashSortedMap::new(); + let mut map: HashSortedMap = + HashSortedMap::<_, _, RandomState>::new(); // Vacant: and_modify is a no-op, then or_insert(0) runs. *map.entry(7).and_modify(|v| *v *= 10).or_insert(1) += 100; assert_eq!(map.get(&7), Some(&101)); @@ -928,14 +940,15 @@ mod tests { #[test] fn sort_by_hash_empty() { - let mut map: HashSortedMap = HashSortedMap::new(); + let mut map: HashSortedMap = + HashSortedMap::<_, _, RandomState>::new(); map.sort_by_hash(); assert_eq!(map.len(), 0); } #[test] fn sort_by_hash_single() { - let mut map = HashSortedMap::new(); + let mut map = HashSortedMap::<_, _, RandomState>::new(); map.insert(42u32, "hello"); map.sort_by_hash(); assert_eq!(map.len(), 1); @@ -945,7 +958,7 @@ mod tests { #[test] fn sort_by_hash_preserves_entries() { - let mut map = HashSortedMap::new(); + let mut map = HashSortedMap::<_, _, RandomState>::new(); for i in 0..200u32 { map.insert(i, i * 10); } @@ -964,8 +977,6 @@ mod tests { #[test] fn sort_by_hash_produces_hash_order() { - use std::collections::hash_map::RandomState; - let hasher = RandomState::new(); let mut map = HashSortedMap::with_hasher(hasher.clone()); for i in 0..500u32 { @@ -1009,8 +1020,6 @@ mod tests { #[test] fn sort_by_hash_with_strings() { - use std::collections::hash_map::RandomState; - let hasher = RandomState::new(); let mut map = HashSortedMap::with_hasher(hasher.clone()); for i in 0..100u32 { diff --git a/crates/hash-sorted-map/src/iter.rs b/crates/hash-sorted-map/src/iter.rs index e981bad3..ffc77720 100644 --- a/crates/hash-sorted-map/src/iter.rs +++ b/crates/hash-sorted-map/src/iter.rs @@ -212,6 +212,7 @@ impl<'a, K, V, S> IntoIterator for &'a mut HashSortedMap { #[cfg(test)] mod tests { + use std::collections::hash_map::RandomState; use std::hash::{BuildHasher, Hasher}; use super::*; @@ -238,13 +239,13 @@ mod tests { #[test] fn iter_empty() { - let map: HashSortedMap = HashSortedMap::new(); + let map: HashSortedMap = HashSortedMap::<_, _, RandomState>::new(); assert_eq!(map.iter().count(), 0); } #[test] fn iter_yields_all_entries() { - let mut map = HashSortedMap::new(); + let mut map = HashSortedMap::<_, _, RandomState>::new(); for i in 0..100u32 { map.insert(i, i * 10); } @@ -272,7 +273,7 @@ mod tests { #[test] fn iter_mut_mutates_values() { - let mut map = HashSortedMap::new(); + let mut map = HashSortedMap::<_, _, RandomState>::new(); for i in 0..20u32 { map.insert(i, i); } @@ -286,7 +287,7 @@ mod tests { #[test] fn into_iter_yields_all() { - let mut map = HashSortedMap::new(); + let mut map = HashSortedMap::<_, _, RandomState>::new(); for i in 0..100u32 { map.insert(i, i * 3); } @@ -300,7 +301,8 @@ mod tests { #[test] fn into_iter_partial_consume_then_drop() { - let mut map: HashSortedMap = HashSortedMap::new(); + let mut map: HashSortedMap = + HashSortedMap::<_, _, RandomState>::new(); for i in 0..50u32 { map.insert(format!("key-{i}"), format!("val-{i}")); } @@ -313,7 +315,7 @@ mod tests { #[test] fn into_iter_empty() { - let map: HashSortedMap = HashSortedMap::new(); + let map: HashSortedMap = HashSortedMap::<_, _, RandomState>::new(); assert_eq!(map.into_iter().count(), 0); } @@ -333,7 +335,7 @@ mod tests { #[test] fn into_iter_after_grow() { - let mut map = HashSortedMap::with_capacity(1); + let mut map = HashSortedMap::<_, _, RandomState>::with_capacity(1); for i in 0..500u32 { map.insert(i, i); } @@ -358,7 +360,7 @@ mod tests { let counter = Rc::new(Cell::new(0usize)); let n = 100; { - let mut map = HashSortedMap::new(); + let mut map = HashSortedMap::<_, _, RandomState>::new(); for i in 0..n { map.insert(i, Tracked(counter.clone())); } @@ -372,7 +374,7 @@ mod tests { #[test] fn for_loop_ref() { - let mut map = HashSortedMap::new(); + let mut map = HashSortedMap::<_, _, RandomState>::new(); map.insert(1, "a"); map.insert(2, "b"); let mut count = 0; @@ -384,7 +386,7 @@ mod tests { #[test] fn for_loop_mut() { - let mut map = HashSortedMap::new(); + let mut map = HashSortedMap::<_, _, RandomState>::new(); map.insert(1u32, 10u32); map.insert(2, 20); for (_, v) in &mut map { @@ -396,7 +398,7 @@ mod tests { #[test] fn for_loop_owned() { - let mut map = HashSortedMap::new(); + let mut map = HashSortedMap::<_, _, RandomState>::new(); map.insert(1, 10); map.insert(2, 20); let mut sum = 0; diff --git a/crates/hash-sorted-map/src/lib.rs b/crates/hash-sorted-map/src/lib.rs index 8792d7a4..7deb8539 100644 --- a/crates/hash-sorted-map/src/lib.rs +++ b/crates/hash-sorted-map/src/lib.rs @@ -11,9 +11,10 @@ //! blanket implementation. //! //! ``` +//! use std::collections::hash_map::RandomState; //! use hash_sorted_map::HashSortedMap; //! -//! let mut map = HashSortedMap::new(); +//! let mut map: HashSortedMap<_, _, RandomState> = HashSortedMap::new(); //! map.insert("hello", 1); //! map.insert("world", 2); //! assert_eq!(map.get("hello"), Some(&1));