diff --git a/crates/geo_filters/evaluation/performance.rs b/crates/geo_filters/evaluation/performance.rs
index 77a0ebd..ca03abb 100644
--- a/crates/geo_filters/evaluation/performance.rs
+++ b/crates/geo_filters/evaluation/performance.rs
@@ -1,9 +1,12 @@
+use std::hash::BuildHasher;
 use std::hint::black_box;
 
 use criterion::{criterion_group, criterion_main, Criterion};
 use geo_filters::build_hasher::UnstableDefaultBuildHasher;
 use geo_filters::config::VariableConfig;
-use geo_filters::diff_count::{GeoDiffCount, GeoDiffCount13};
+use geo_filters::diff_count::{
+    GeoDiffConfig13, GeoDiffCount, GeoDiffCount13, GeoDiffCount7, GeoDiffCountBuilder,
+};
 use geo_filters::distinct_count::GeoDistinctCount13;
 use geo_filters::evaluation::hll::Hll14;
 use geo_filters::Count;
@@ -130,6 +133,71 @@ fn criterion_benchmark(c: &mut Criterion) {
             })
         });
     }
+
+    // Compare building a diff filter from a precomputed slice of hashes one by one (`push_hash`)
+    // versus via the incremental `GeoDiffCountBuilder` (per-hash, and the batched
+    // `extend_by_hashes`). The hashes are precomputed so that only construction cost is measured.
+    for size in [1000usize, 10000, 100000, 1000000] {
+        let mut group = c.benchmark_group(format!("construct:{size}"));
+        let build_hasher = UnstableDefaultBuildHasher::default();
+        let hashes: Vec<u64> = (0..size).map(|i| build_hasher.hash_one(i)).collect();
+
+        group.bench_function("geo_diff_count_7_push", |b| {
+            b.iter(|| {
+                let mut gc = GeoDiffCount7::default();
+                for &hash in &hashes {
+                    gc.push_hash(hash);
+                }
+                black_box(&gc);
+            })
+        });
+        group.bench_function("geo_diff_count_13_push", |b| {
+            b.iter(|| {
+                let mut gc = GeoDiffCount13::default();
+                for &hash in &hashes {
+                    gc.push_hash(hash);
+                }
+                black_box(&gc);
+            })
+        });
+        group.bench_function("geo_diff_count_13_builder_extend", |b| {
+            b.iter(|| {
+                let mut builder = GeoDiffCountBuilder::with_capacity(
+                    GeoDiffConfig13::<UnstableDefaultBuildHasher>::default(),
+                    0,
+                );
+                builder.extend_by_hashes(hashes.iter().copied());
+                black_box(builder.build());
+            })
+        });
+        group.bench_function("geo_diff_count_13_builder", |b| {
+            b.iter(|| {
+                let mut builder = GeoDiffCountBuilder::with_capacity(
+                    GeoDiffConfig13::<UnstableDefaultBuildHasher>::default(),
+                    size,
+                );
+                for &hash in &hashes {
+                    builder.push_hash(hash);
+                }
+                black_box(builder.build());
+            })
+        });
+        // Reserve nothing so the split starts at 0 and every bucket initially lands in `numbers`,
+        // forcing the buffer to fill and compact (lazily flush) repeatedly as the split ramps up.
+        // This isolates the cost of the lazy-flush path versus a well-positioned builder.
+        group.bench_function("geo_diff_count_13_builder_unreserved", |b| {
+            b.iter(|| {
+                let mut builder = GeoDiffCountBuilder::with_capacity(
+                    GeoDiffConfig13::<UnstableDefaultBuildHasher>::default(),
+                    0,
+                );
+                for &hash in &hashes {
+                    builder.push_hash(hash);
+                }
+                black_box(builder.build());
+            })
+        });
+    }
 }
 
 criterion_group!(benches, criterion_benchmark);
diff --git a/crates/geo_filters/src/config/bitchunks.rs b/crates/geo_filters/src/config/bitchunks.rs
index 72660c7..438a5df 100644
--- a/crates/geo_filters/src/config/bitchunks.rs
+++ b/crates/geo_filters/src/config/bitchunks.rs
@@ -20,6 +20,11 @@ impl BitChunk {
     }
 }
 
+/// Merges a descending stream of distinct one-bit positions (`leading`) with a descending stream
+/// of `BitChunk`s (`trailing`) into a single descending `BitChunk` stream. All leading positions
+/// must be more significant than all trailing bits, except that the least-significant leading block
+/// may overlap the most-significant trailing block (the two are or-ed). Leading positions must be
+/// distinct.
 pub(crate) fn iter_bit_chunks(
     leading: impl Iterator<Item = usize>,
     trailing: impl Iterator<Item = BitChunk>,
@@ -55,8 +60,7 @@ impl<I: Iterator<Item = BitChunk>, J: Iterator<Item = usize>> Iterator for BitCh
                     _ => break,
                 }
             }
-            // All leading bits were consumed, test whether it can be merged with
-            // trailing bits.
+            // All leading bits were consumed, test whether it can be merged with trailing bits.
             match self.trailing.peek() {
                 Some(BitChunk {
                     index: other_index,
@@ -314,7 +318,7 @@ impl<T: IsBucketType, I: Iterator<Item = BitChunk>> Iterator for BitChunksOnes<T
 mod tests {
     use itertools::Itertools;
 
-    use super::{iter_ones, BitChunk};
+    use super::{iter_bit_chunks, iter_ones, BitChunk};
 
     #[test]
     fn test_iter_ones() {
@@ -338,4 +342,22 @@ mod tests {
             iter_ones::<usize, _>(chunks.into_iter().peekable()).collect_vec()
         );
     }
+
+    #[test]
+    fn test_iter_bit_chunks() {
+        // Distinct leading bits merge within a block (via or) and merge with the trailing block at
+        // the boundary index.
+        let chunks = iter_bit_chunks(
+            vec![70, 67, 5].into_iter(),
+            vec![BitChunk::new(0, 1 << 2)].into_iter(),
+        )
+        .collect_vec();
+        assert_eq!(
+            chunks,
+            vec![
+                BitChunk::new(1, (1 << 6) | (1 << 3)), // 70, 67
+                BitChunk::new(0, (1 << 5) | (1 << 2)), // 5 (leading) and bit 2 (trailing)
+            ]
+        );
+    }
 }
diff --git a/crates/geo_filters/src/config/lookup.rs b/crates/geo_filters/src/config/lookup.rs
index 67d4d1c..b97600b 100644
--- a/crates/geo_filters/src/config/lookup.rs
+++ b/crates/geo_filters/src/config/lookup.rs
@@ -2,22 +2,24 @@ use crate::config::phi_f64;
 
 pub(crate) struct HashToBucketLookup {
     b: usize,
-    buckets: Vec<(usize, usize)>,
+    buckets: Vec<(u32, u32)>,
 }
 
 impl HashToBucketLookup {
     pub(crate) fn new(b: usize) -> Self {
-        let mut buckets = vec![(0, 0); 2 << b];
+        let mut buckets = vec![(0u32, 0u32); 2 << b];
         let mut last_filled_bucket = buckets.len();
         let phi = phi_f64(b);
         for bucket in 0..(1 << b) {
             let lower_bucket_limit = phi.powf((bucket + 1) as f64);
+            // `lower_hash_limit` is a 32-bit hash threshold: `lower_bucket_limit` lies in
+            // `[0.5, 1)`, so this value is always in `[0, 2^32)` and fits losslessly into a `u32`.
             let lower_hash_limit = ((lower_bucket_limit - 0.5) * 2.0f64.powf(33.0)) as usize;
             let lower_hash_bucket = lower_hash_limit >> (32 - b - 1);
             assert!(lower_hash_bucket < last_filled_bucket);
             while last_filled_bucket > lower_hash_bucket {
                 last_filled_bucket -= 1;
-                buckets[last_filled_bucket] = (bucket, lower_hash_limit);
+                buckets[last_filled_bucket] = (bucket as u32, lower_hash_limit as u32);
             }
         }
         assert_eq!(last_filled_bucket, 0);
@@ -38,8 +40,12 @@ impl HashToBucketLookup {
         } & 0xFFFFFFFF) as usize;
         // From those, the first B bits determine the bucket index in our lookup table.
         let idx = hash >> (32 - self.b - 1);
-        let offset = (hash < self.buckets[idx].1) as usize;
-        offset + self.buckets[idx].0 + (1 << self.b) * levels
+        // SAFETY: `hash` was masked to 32 bits, so `idx = hash >> (31 - b)` holds at most `b + 1`
+        // significant bits and is therefore always `< 2^(b+1) == 2 << b == self.buckets.len()`.
+        debug_assert!(idx < self.buckets.len());
+        let (base, threshold) = *unsafe { self.buckets.get_unchecked(idx) };
+        let offset = (hash < threshold as usize) as usize;
+        offset + base as usize + (1 << self.b) * levels
     }
 }
 
diff --git a/crates/geo_filters/src/diff_count.rs b/crates/geo_filters/src/diff_count.rs
index 728cd1f..a37b746 100644
--- a/crates/geo_filters/src/diff_count.rs
+++ b/crates/geo_filters/src/diff_count.rs
@@ -8,7 +8,7 @@ use std::ops::Deref as _;
 
 use crate::config::{
     count_ones_from_bitchunks, count_ones_from_msb_and_lsb, iter_bit_chunks, iter_ones,
-    mask_bit_chunks, take_ref, xor_bit_chunks, BitChunk, GeoConfig, IsBucketType,
+    mask_bit_chunks, take_ref, xor_bit_chunks, BitChunk, GeoConfig, IsBucketType, BITS_PER_BLOCK,
 };
 use crate::{Count, Diff};
 
@@ -429,6 +429,266 @@ pub(crate) fn xor<C: GeoConfig<Diff>>(
     )
 }
 
+/// Estimates the split bucket separating the sparse most-significant buckets ("numbers") from
+/// the dense least-significant buckets ("bits") for a filter built from `n` hashes.
+///
+/// The expected number of hashes falling into buckets `>= s` is `n * phi^s`. We target about
+/// `max_msb_len / 2` such hashes: the most-significant buckets do *not* need to be fully supplied
+/// by the collected numbers, since [`GeoDiffCount::from_bit_chunks`] re-splits the combined stream
+/// and pulls the remainder from the dense bits. Because the buckets are geometric, raising the
+/// split by one `bits_per_level` roughly halves the collected set, so a small target keeps the
+/// sort cheap while only marginally enlarging the bit vector. Correctness does not depend on the
+/// estimate.
+fn estimate_split_bucket<C: GeoConfig<Diff>>(config: &C, n: usize) -> usize {
+    let target = config.max_msb_len() / 2;
+    if n <= target {
+        // Every hash ends up in `numbers` (split == 0).
+        return 0;
+    }
+    let ratio = target as f64 / n as f64;
+    ((ratio.ln() / config.phi_f64().ln()).floor() as usize)
+        // No bucket can ever exceed this bound, so never allocate a larger bit vector.
+        .min(64 * config.bits_per_level())
+}
+
+/// Splits a descending stream of set buckets into the new msb (the top `max_msb_len`) and folds
+/// the remaining buckets into `lsb`, resizing it to the new boundary. If the stream is too short
+/// to fill the msb, the highest bits of `lsb` are pulled back out to refill it (and `lsb` is
+/// truncated accordingly, or emptied if it could not be refilled). Returns the new msb.
+fn split_into_msb<T: IsBucketType>(
+    mut buckets: impl Iterator<Item = T>,
+    lsb: &mut BitVec<'_>,
+    max_msb_len: usize,
+) -> Vec<T> {
+    let mut msb: Vec<T> = Vec::with_capacity(max_msb_len);
+    msb.extend(buckets.by_ref().take(max_msb_len));
+    if msb.len() == max_msb_len {
+        // The msb is full: its smallest entry is the new boundary, the rest folds into the bits.
+        let smallest = msb[max_msb_len - 1].into_usize();
+        lsb.resize(smallest);
+        let mut toggler = lsb.toggler();
+        for bucket in buckets {
+            toggler.toggle(bucket.into_usize());
+        }
+    } else {
+        // Refill the msb from the highest bits, then truncate the bits to the new boundary.
+        let need = max_msb_len - msb.len();
+        let pulled: Vec<T> = iter_ones::<T, _>(lsb.bit_chunks().peekable())
+            .take(need)
+            .collect();
+        let smallest = if pulled.len() == need {
+            pulled[need - 1].into_usize()
+        } else {
+            0
+        };
+        msb.extend(pulled);
+        lsb.resize(smallest);
+    }
+    msb
+}
+
+/// Incrementally builds a [`GeoDiffCount`] from a known number of pushes.
+///
+/// Hashes are added one at a time via [`Self::push_hash`] / [`Self::push`], or in bulk via
+/// [`Self::extend_by_hashes`]. Reserve the expected number of pushes with [`Self::with_capacity`]
+/// so the dense/sparse split can be estimated and the buffers presized. The most-significant
+/// buckets accumulate in a plain vector without enforcing the `max_msb_len` limit; that limit, and
+/// the filter invariants, are applied only once when [`Self::build`] turns the builder into a
+/// [`GeoDiffCount`]. Pushing more (or fewer) hashes than reserved stays correct — only the presizing
+/// is then less accurate. If the final count is not known up front, call [`Self::reserve`] as it
+/// grows.
+pub struct GeoDiffCountBuilder<C: GeoConfig<Diff>> {
+    config: C,
+    /// Running total of pushes reserved for; drives the split estimate.
+    expected: usize,
+    /// Buckets at or above `split` accumulate in `numbers` (with duplicates, and transiently some
+    /// below `split` after a [`GeoDiffCountBuilder::reserve`]); buckets below `split` are folded
+    /// (xor) into `blocks`. [`GeoDiffCountBuilder::cleanup`] reconciles the two.
+    split: usize,
+    numbers: Vec<usize>,
+    blocks: Vec<u64>,
+}
+
+impl<C: GeoConfig<Diff>> GeoDiffCountBuilder<C> {
+    /// Creates a builder reserving space for roughly `expected` pushes.
+    ///
+    /// `expected` only positions the dense/sparse split; the `numbers` buffer is a fixed
+    /// `2 * max_msb_len` working set that is compacted in place once full (see [`Self::push_hash`]),
+    /// so it never needs to be sized to the number of pushes.
+    pub fn with_capacity(config: C, expected: usize) -> Self {
+        let split = estimate_split_bucket(&config, expected);
+        let capacity = 2 * config.max_msb_len();
+        Self {
+            config,
+            expected,
+            split,
+            numbers: Vec::with_capacity(capacity),
+            blocks: vec![0; split.div_ceil(BITS_PER_BLOCK)],
+        }
+    }
+
+    /// Reserves space for `additional` further pushes.
+    ///
+    /// This only advances the estimated split (growing the bit space to match) so that subsequent
+    /// pushes of low buckets fold straight into the bits. The numbers already collected below the
+    /// new split are *not* migrated here — they are folded in lazily the next time the buffer is
+    /// compacted or built (see [`Self::cleanup`]). The resulting filter is unaffected.
+    pub fn reserve(&mut self, additional: usize) {
+        self.expected = self.expected.saturating_add(additional);
+        let new_split = estimate_split_bucket(&self.config, self.expected);
+        if new_split > self.split {
+            self.split = new_split;
+            self.blocks.resize(new_split.div_ceil(BITS_PER_BLOCK), 0);
+        }
+    }
+
+    /// Sorts `numbers` and reduces it to the distinct buckets that still belong above the split:
+    /// even occurrences cancel (xor), and any bucket below the current split is folded into the bit
+    /// space. Afterwards `numbers` is sorted in descending order with no duplicates and no entries
+    /// below `split`. Shared by [`Self::compact`] and [`Self::build`].
+    fn cleanup(&mut self) {
+        self.numbers.sort_unstable_by(|a, b| b.cmp(a));
+        let split = self.split;
+        let blocks = &mut self.blocks;
+        let numbers = &mut self.numbers;
+        let mut write = 0;
+        let mut read = 0;
+        while read < numbers.len() {
+            let bucket = numbers[read];
+            let mut next = read + 1;
+            while next < numbers.len() && numbers[next] == bucket {
+                next += 1;
+            }
+            // An odd number of occurrences leaves the bucket set; an even number cancels.
+            if (next - read) % 2 == 1 {
+                if bucket < split {
+                    let (index, bit) = bucket.into_index_and_bit();
+                    blocks[index] ^= bit.into_block();
+                } else {
+                    numbers[write] = bucket;
+                    write += 1;
+                }
+            }
+            read = next;
+        }
+        numbers.truncate(write);
+    }
+
+    /// Processes a full `numbers` buffer in place rather than letting it grow. [`Self::cleanup`]
+    /// first collapses duplicates and any sub-split entries; if that already frees half the buffer
+    /// the split stays put. Otherwise the split is advanced in whole levels — each level halves the
+    /// expected number of buckets at or above it — until at most half the buffer remains, folding
+    /// the now-sub-split buckets into the bit space. The buffer is therefore never reallocated.
+    fn compact(&mut self) {
+        let target = self.numbers.capacity() / 2;
+        self.cleanup();
+        if self.numbers.len() <= target {
+            return;
+        }
+        // `numbers` is sorted descending, so the count at or above a split is a prefix length.
+        let bits_per_level = self.config.bits_per_level();
+        let mut new_split = self.split;
+        let mut keep = self.numbers.len();
+        while keep > target {
+            new_split += bits_per_level;
+            keep = self.numbers.partition_point(|&b| b >= new_split);
+        }
+        self.blocks.resize(new_split.div_ceil(BITS_PER_BLOCK), 0);
+        let blocks = &mut self.blocks;
+        for &bucket in &self.numbers[keep..] {
+            let (index, bit) = bucket.into_index_and_bit();
+            blocks[index] ^= bit.into_block();
+        }
+        self.numbers.truncate(keep);
+        self.split = new_split;
+    }
+
+    /// Adds the given hash to the filter being built.
+    #[inline]
+    pub fn push_hash(&mut self, hash: u64) {
+        let bucket = self.config.hash_to_bucket(hash).into_usize();
+        if bucket >= self.split {
+            // Compact the buffer in place once it is full rather than reallocating it. Compacting
+            // may advance the split past this bucket, in which case it lands in `numbers` below the
+            // split; the next `cleanup` simply folds it into the bits, so this stays correct.
+            if self.numbers.len() == self.numbers.capacity() {
+                self.compact();
+            }
+            self.numbers.push(bucket);
+        } else {
+            // `bucket < split`, so the block index is always in range; toggling cancels repeats.
+            let (index, bit) = bucket.into_index_and_bit();
+            self.blocks[index] ^= bit.into_block();
+        }
+    }
+
+    /// Adds the hash of the given item, computed with the configured hasher, to the filter.
+    pub fn push<I: std::hash::Hash>(&mut self, item: I) {
+        let build_hasher = C::BuildHasher::default();
+        self.push_hash(build_hasher.hash_one(item));
+    }
+
+    /// Inserts a batch of hashes, reserving room for them up front via the size estimator.
+    ///
+    /// Unlike a loop of [`Self::push_hash`] calls — which must re-resolve `self` on every call —
+    /// this folds the dense low buckets into the bit space in a tight loop that hoists the bit
+    /// storage out of the per-hash work, only re-acquiring it after the rare in-place compaction.
+    /// It can be mixed freely with [`Self::push_hash`], and further pushes remain possible after.
+    pub fn extend_by_hashes(&mut self, mut hashes: impl ExactSizeIterator<Item = u64>) {
+        self.reserve(hashes.len());
+        loop {
+            let split = self.split;
+            let filled = {
+                let config = &self.config;
+                let blocks = &mut self.blocks;
+                let numbers = &mut self.numbers;
+                let mut filled = false;
+                for hash in hashes.by_ref() {
+                    let bucket = config.hash_to_bucket(hash).into_usize();
+                    if bucket >= split {
+                        numbers.push(bucket);
+                        // Stop exactly at capacity so the buffer is never reallocated.
+                        if numbers.len() == numbers.capacity() {
+                            filled = true;
+                            break;
+                        }
+                    } else {
+                        let (index, bit) = bucket.into_index_and_bit();
+                        blocks[index] ^= bit.into_block();
+                    }
+                }
+                filled
+            };
+            // The iterator is either exhausted or the buffer filled; compact and continue if full.
+            if !filled {
+                break;
+            }
+            self.compact();
+        }
+    }
+
+    /// Finalizes the builder into a [`GeoDiffCount`], applying the `max_msb_len` constraint and
+    /// re-establishing the filter invariants.
+    pub fn build(mut self) -> GeoDiffCount<'static, C> {
+        let max_msb_len = self.config.max_msb_len();
+        // `cleanup` leaves `numbers` sorted descending, deduplicated, and free of sub-split entries.
+        self.cleanup();
+        let mut lsb = BitVec::from_blocks(self.blocks, self.split);
+        let msb = split_into_msb(
+            self.numbers.iter().map(|&b| C::BucketType::from_usize(b)),
+            &mut lsb,
+            max_msb_len,
+        );
+        let result = GeoDiffCount {
+            config: self.config,
+            msb: Cow::from(msb),
+            lsb,
+        };
+        result.debug_assert_invariants();
+        result
+    }
+}
+
 impl<C: GeoConfig<Diff>> Count<Diff> for GeoDiffCount<'_, C> {
     fn push_hash(&mut self, hash: u64) {
         self.xor_bit(self.config.hash_to_bucket(hash));
@@ -542,6 +802,107 @@ mod tests {
         assert_eq!(m.iter_ones().count(), 101);
     }
 
+    /// Building a filter via `GeoDiffCountBuilder` must produce exactly the same filter as pushing
+    /// the hashes one by one, regardless of how accurately the capacity was reserved.
+    #[test]
+    fn test_builder() {
+        fn assert_builder_matches<C: GeoConfig<Diff> + Default>(hashes: &[u64], reserve: usize) {
+            let mut expected: GeoDiffCount<'static, C> = GeoDiffCount::new(C::default());
+            for &hash in hashes {
+                expected.push_hash(hash);
+            }
+            let mut builder = GeoDiffCountBuilder::with_capacity(C::default(), reserve);
+            for &hash in hashes {
+                builder.push_hash(hash);
+            }
+            let actual = builder.build();
+            let label = (hashes.len(), reserve);
+            assert_eq!(expected, actual, "filter mismatch for {label:?}");
+            assert_eq!(
+                expected.iter_ones().collect_vec(),
+                actual.iter_ones().collect_vec(),
+                "ones mismatch for {label:?}",
+            );
+        }
+
+        // Starts with a tiny reservation and grows it while pushing, which moves the split forward
+        // and exercises the number-migration path in `reserve`.
+        fn assert_grown_builder_matches<C: GeoConfig<Diff> + Default>(hashes: &[u64]) {
+            let mut expected: GeoDiffCount<'static, C> = GeoDiffCount::new(C::default());
+            for &hash in hashes {
+                expected.push_hash(hash);
+            }
+            let mut builder = GeoDiffCountBuilder::with_capacity(C::default(), 1);
+            for (i, &hash) in hashes.iter().enumerate() {
+                if i % 64 == 0 {
+                    builder.reserve(64);
+                }
+                builder.push_hash(hash);
+            }
+            assert_eq!(expected, builder.build(), "grown builder mismatch");
+        }
+
+        prng_test_harness(4, |rnd| {
+            for n in [0usize, 1, 5, 50, 500, 5000, 50000] {
+                let pool: Vec<u64> = (0..n.div_ceil(2).max(1)).map(|_| rnd.next_u64()).collect();
+                let hashes: Vec<u64> = (0..n)
+                    .map(|_| *pool.iter().choose(rnd).expect("pool is non-empty"))
+                    .collect();
+                // Reserve exactly, far too little (split too low), and far too much (split too high).
+                assert_builder_matches::<GeoDiffConfig7>(&hashes, n);
+                assert_builder_matches::<GeoDiffConfig13>(&hashes, n);
+                assert_builder_matches::<GeoDiffConfig13>(&hashes, n / 4);
+                assert_builder_matches::<GeoDiffConfig13>(&hashes, n * 4);
+                // Reserve nothing so the split starts at 0 and every bucket initially lands in
+                // `numbers`, forcing repeated compaction once the fixed-size buffer fills. This
+                // hammers the lazy-flush path, including buckets that land below the split a
+                // compaction just advanced past.
+                assert_builder_matches::<GeoDiffConfig7>(&hashes, 0);
+                assert_builder_matches::<GeoDiffConfig13>(&hashes, 0);
+                assert_grown_builder_matches::<GeoDiffConfig7>(&hashes);
+                assert_grown_builder_matches::<GeoDiffConfig13>(&hashes);
+            }
+        });
+    }
+
+    /// `GeoDiffCountBuilder::extend_by_hashes` (alone, or mixed with `push_hash`) must produce
+    /// exactly the same filter as pushing every hash one by one.
+    #[test]
+    fn test_builder_extend() {
+        fn assert_extend_matches<C: GeoConfig<Diff> + Default>(hashes: &[u64]) {
+            let mut expected: GeoDiffCount<'static, C> = GeoDiffCount::new(C::default());
+            for &hash in hashes {
+                expected.push_hash(hash);
+            }
+
+            // Extend a fresh builder in one batch (auto-reserves for the batch size).
+            let mut batched = GeoDiffCountBuilder::with_capacity(C::default(), 0);
+            batched.extend_by_hashes(hashes.iter().copied());
+            assert_eq!(expected, batched.build(), "extend-from-empty mismatch");
+
+            // Push a prefix one by one, then extend with the remainder.
+            let mid = hashes.len() / 2;
+            let mut mixed = GeoDiffCountBuilder::with_capacity(C::default(), 0);
+            for &hash in &hashes[..mid] {
+                mixed.push_hash(hash);
+            }
+            mixed.extend_by_hashes(hashes[mid..].iter().copied());
+            assert_eq!(expected, mixed.build(), "push+extend mismatch");
+        }
+
+        prng_test_harness(4, |rnd| {
+            for n in [0usize, 1, 5, 50, 500, 5000, 50000] {
+                // Draw from a smaller pool so buckets repeat, exercising xor cancellation.
+                let pool: Vec<u64> = (0..n.div_ceil(2).max(1)).map(|_| rnd.next_u64()).collect();
+                let hashes: Vec<u64> = (0..n)
+                    .map(|_| *pool.iter().choose(rnd).expect("pool is non-empty"))
+                    .collect();
+                assert_extend_matches::<GeoDiffConfig7>(&hashes);
+                assert_extend_matches::<GeoDiffConfig13>(&hashes);
+            }
+        });
+    }
+
     #[test]
     fn test_estimate_fast() {
         prng_test_harness(1, |rnd| {
diff --git a/crates/geo_filters/src/diff_count/bitvec.rs b/crates/geo_filters/src/diff_count/bitvec.rs
index f77323c..42c873c 100644
--- a/crates/geo_filters/src/diff_count/bitvec.rs
+++ b/crates/geo_filters/src/diff_count/bitvec.rs
@@ -47,6 +47,18 @@ impl BitVec<'_> {
         result
     }
 
+    /// Wraps raw `blocks` covering `[0, num_bits)` into a `BitVec`. The number of blocks must match
+    /// `num_bits`, and any bits at or above `num_bits` are cleared.
+    pub fn from_blocks(blocks: Vec<u64>, num_bits: usize) -> BitVec<'static> {
+        debug_assert_eq!(blocks.len(), num_bits.div_ceil(BITS_PER_BLOCK));
+        let mut result = BitVec {
+            num_bits,
+            blocks: Cow::Owned(blocks),
+        };
+        result.clear_superfluous_bits();
+        result
+    }
+
     /// Resize the vector such that the top block contains the given bucket.
     pub fn resize(&mut self, num_bits: usize) {
         let num_blocks = num_bits.div_ceil(BITS_PER_BLOCK);
@@ -90,6 +102,17 @@ impl BitVec<'_> {
         self.blocks.to_mut()[block_idx] ^= bit_idx.into_block();
     }
 
+    /// Returns a [`BitToggler`] that toggles many bits without re-resolving the `Cow` on every
+    /// access. [`Self::toggle`] resolves `self.blocks.to_mut()` on every call, which keeps a
+    /// branch in the caller's hot loop even when the storage is already owned; resolving it once
+    /// up front avoids that overhead when toggling a large number of bits.
+    pub fn toggler(&mut self) -> BitToggler<'_> {
+        BitToggler {
+            num_bits: self.num_bits,
+            blocks: self.blocks.to_mut(),
+        }
+    }
+
     /// Returns an iterator over all blocks in reverse order.
     /// The blocks are represented as `BitChunk`s.
     pub fn bit_chunks(&self) -> impl Iterator<Item = BitChunk> + '_ {
@@ -200,6 +223,23 @@ impl Index<usize> for BitVec<'_> {
     }
 }
 
+/// Toggles bits in an already-owned [`BitVec`] without re-resolving the `Cow` on every call.
+/// Obtained via [`BitVec::toggler`].
+pub(crate) struct BitToggler<'a> {
+    num_bits: usize,
+    blocks: &'a mut [u64],
+}
+
+impl BitToggler<'_> {
+    /// Toggles the bit at the given zero-based position. The position must be `< num_bits`.
+    #[inline]
+    pub fn toggle(&mut self, index: usize) {
+        debug_assert!(index < self.num_bits);
+        let (block_idx, bit_idx) = index.into_index_and_bit();
+        self.blocks[block_idx] ^= bit_idx.into_block();
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;