diff --git a/differential-dataflow/examples/columnar.rs b/differential-dataflow/examples/columnar.rs index 52d5e141f..cf58ee3c2 100644 --- a/differential-dataflow/examples/columnar.rs +++ b/differential-dataflow/examples/columnar.rs @@ -580,7 +580,7 @@ pub mod dd_builder { use differential_dataflow::trace::implementations::Layout; use differential_dataflow::trace::implementations::Update; use differential_dataflow::trace::implementations::BatchContainer; - use differential_dataflow::trace::implementations::ord_neu::{OrdValBatch, val_batch::OrdValStorage, OrdKeyBatch}; + use differential_dataflow::trace::implementations::ord_neu::{OrdValBatch, val_batch::OrdValStorage, OrdKeyBatch, Vals, Upds, layers::UpdsBuilder}; use differential_dataflow::trace::implementations::ord_neu::key_batch::OrdKeyStorage; use crate::Column; @@ -597,44 +597,7 @@ pub mod dd_builder { /// /// This is public to allow container implementors to set and inspect their container. pub result: OrdValStorage, - singleton: Option<(::Time, ::Diff)>, - /// Counts the number of singleton optimizations we performed. - /// - /// This number allows us to correctly gauge the total number of updates reflected in a batch, - /// even though `updates.len()` may be much shorter than this amount. - singletons: usize, - } - - impl OrdValBuilder { - /// Pushes a single update, which may set `self.singleton` rather than push. - /// - /// This operation is meant to be equivalent to `self.results.updates.push((time, diff))`. - /// However, for "clever" reasons it does not do this. Instead, it looks for opportunities - /// to encode a singleton update with an "absert" update: repeating the most recent offset. - /// This otherwise invalid state encodes "look back one element". - /// - /// When `self.singleton` is `Some`, it means that we have seen one update and it matched the - /// previously pushed update exactly. In that case, we do not push the update into `updates`. - /// The update tuple is retained in `self.singleton` in case we see another update and need - /// to recover the singleton to push it into `updates` to join the second update. - fn push_update(&mut self, time: ::Time, diff: ::Diff) { - // If a just-pushed update exactly equals `(time, diff)` we can avoid pushing it. - if self.result.times.last().map(|t| t == <::ReadItem<'_> as IntoOwned>::borrow_as(&time)) == Some(true) && - self.result.diffs.last().map(|d| d == <::ReadItem<'_> as IntoOwned>::borrow_as(&diff)) == Some(true) - { - assert!(self.singleton.is_none()); - self.singleton = Some((time, diff)); - } - else { - // If we have pushed a single element, we need to copy it out to meet this one. - if let Some((time, diff)) = self.singleton.take() { - self.result.times.push(time); - self.result.diffs.push(diff); - } - self.result.times.push(time); - self.result.diffs.push(diff); - } - } + staging: UpdsBuilder, } // The layout `L` determines the key, val, time, and diff types. @@ -656,18 +619,13 @@ pub mod dd_builder { type Output = OrdValBatch; fn with_capacity(keys: usize, vals: usize, upds: usize) -> Self { - // We don't introduce zero offsets as they will be introduced by the first `push` call. Self { result: OrdValStorage { keys: L::KeyContainer::with_capacity(keys), - keys_offs: L::OffsetContainer::with_capacity(keys + 1), - vals: L::ValContainer::with_capacity(vals), - vals_offs: L::OffsetContainer::with_capacity(vals + 1), - times: L::TimeContainer::with_capacity(upds), - diffs: L::DiffContainer::with_capacity(upds), + vals: Vals::with_capacity(keys + 1, vals), + upds: Upds::with_capacity(vals + 1, upds), }, - singleton: None, - singletons: 0, + staging: UpdsBuilder::default(), } } @@ -689,25 +647,29 @@ pub mod dd_builder { let time = <::Owned as Columnar>::into_owned(time); let diff = <::Owned as Columnar>::into_owned(diff); + // Pre-load the first update. + if self.result.keys.is_empty() { + self.result.vals.vals.push(&val); + self.result.keys.push(&key); + self.staging.push(time, diff); + } // Perhaps this is a continuation of an already received key. - if self.result.keys.last().map(|k| <::ReadItem<'_> as IntoOwned>::borrow_as(&key).eq(&k)).unwrap_or(false) { + else if self.result.keys.last().map(|k| <::ReadItem<'_> as IntoOwned>::borrow_as(&key).eq(&k)).unwrap_or(false) { // Perhaps this is a continuation of an already received value. - if self.result.vals.last().map(|v| <::ReadItem<'_> as IntoOwned>::borrow_as(&val).eq(&v)).unwrap_or(false) { - self.push_update(time, diff); + if self.result.vals.vals.last().map(|v| <::ReadItem<'_> as IntoOwned>::borrow_as(&val).eq(&v)).unwrap_or(false) { + self.staging.push(time, diff); } else { // New value; complete representation of prior value. - self.result.vals_offs.push(self.result.times.len()); - if self.singleton.take().is_some() { self.singletons += 1; } - self.push_update(time, diff); - self.result.vals.push(&val); + self.staging.seal(&mut self.result.upds); + self.staging.push(time, diff); + self.result.vals.vals.push(&val); } } else { // New key; complete representation of prior key. - self.result.vals_offs.push(self.result.times.len()); - if self.singleton.take().is_some() { self.singletons += 1; } - self.result.keys_offs.push(self.result.vals.len()); - self.push_update(time, diff); - self.result.vals.push(&val); + self.staging.seal(&mut self.result.upds); + self.staging.push(time, diff); + self.result.vals.offs.push(self.result.vals.len()); + self.result.vals.vals.push(&val); self.result.keys.push(&key); } } @@ -715,13 +677,10 @@ pub mod dd_builder { #[inline(never)] fn done(mut self, description: Description) -> OrdValBatch { - // Record the final offsets - self.result.vals_offs.push(self.result.times.len()); - // Remove any pending singleton, and if it was set increment our count. - if self.singleton.take().is_some() { self.singletons += 1; } - self.result.keys_offs.push(self.result.vals.len()); + self.staging.seal(&mut self.result.upds); + self.result.vals.offs.push(self.result.vals.len()); OrdValBatch { - updates: self.result.times.len() + self.singletons, + updates: self.staging.total(), storage: self.result, description, } @@ -745,44 +704,7 @@ pub mod dd_builder { /// /// This is public to allow container implementors to set and inspect their container. pub result: OrdKeyStorage, - singleton: Option<(::Time, ::Diff)>, - /// Counts the number of singleton optimizations we performed. - /// - /// This number allows us to correctly gauge the total number of updates reflected in a batch, - /// even though `updates.len()` may be much shorter than this amount. - singletons: usize, - } - - impl OrdKeyBuilder { - /// Pushes a single update, which may set `self.singleton` rather than push. - /// - /// This operation is meant to be equivalent to `self.results.updates.push((time, diff))`. - /// However, for "clever" reasons it does not do this. Instead, it looks for opportunities - /// to encode a singleton update with an "absert" update: repeating the most recent offset. - /// This otherwise invalid state encodes "look back one element". - /// - /// When `self.singleton` is `Some`, it means that we have seen one update and it matched the - /// previously pushed update exactly. In that case, we do not push the update into `updates`. - /// The update tuple is retained in `self.singleton` in case we see another update and need - /// to recover the singleton to push it into `updates` to join the second update. - fn push_update(&mut self, time: ::Time, diff: ::Diff) { - // If a just-pushed update exactly equals `(time, diff)` we can avoid pushing it. - if self.result.times.last().map(|t| t == <::ReadItem<'_> as IntoOwned>::borrow_as(&time)) == Some(true) && - self.result.diffs.last().map(|d| d == <::ReadItem<'_> as IntoOwned>::borrow_as(&diff)) == Some(true) - { - assert!(self.singleton.is_none()); - self.singleton = Some((time, diff)); - } - else { - // If we have pushed a single element, we need to copy it out to meet this one. - if let Some((time, diff)) = self.singleton.take() { - self.result.times.push(time); - self.result.diffs.push(diff); - } - self.result.times.push(time); - self.result.diffs.push(diff); - } - } + staging: UpdsBuilder, } // The layout `L` determines the key, val, time, and diff types. @@ -804,16 +726,12 @@ pub mod dd_builder { type Output = OrdKeyBatch; fn with_capacity(keys: usize, _vals: usize, upds: usize) -> Self { - // We don't introduce zero offsets as they will be introduced by the first `push` call. Self { result: OrdKeyStorage { keys: L::KeyContainer::with_capacity(keys), - keys_offs: L::OffsetContainer::with_capacity(keys + 1), - times: L::TimeContainer::with_capacity(upds), - diffs: L::DiffContainer::with_capacity(upds), + upds: Upds::with_capacity(keys + 1, upds), }, - singleton: None, - singletons: 0, + staging: UpdsBuilder::default(), } } @@ -834,14 +752,18 @@ pub mod dd_builder { let time = <::Owned as Columnar>::into_owned(time); let diff = <::Owned as Columnar>::into_owned(diff); + // Pre-load the first update. + if self.result.keys.is_empty() { + self.result.keys.push(&key); + self.staging.push(time, diff); + } // Perhaps this is a continuation of an already received key. - if self.result.keys.last().map(|k| <::ReadItem<'_> as IntoOwned>::borrow_as(&key).eq(&k)).unwrap_or(false) { - self.push_update(time, diff); + else if self.result.keys.last().map(|k| <::ReadItem<'_> as IntoOwned>::borrow_as(&key).eq(&k)).unwrap_or(false) { + self.staging.push(time, diff); } else { // New key; complete representation of prior key. - self.result.keys_offs.push(self.result.times.len()); - if self.singleton.take().is_some() { self.singletons += 1; } - self.push_update(time, diff); + self.staging.seal(&mut self.result.upds); + self.staging.push(time, diff); self.result.keys.push(&key); } } @@ -849,12 +771,9 @@ pub mod dd_builder { #[inline(never)] fn done(mut self, description: Description) -> OrdKeyBatch { - // Record the final offsets - self.result.keys_offs.push(self.result.times.len()); - // Remove any pending singleton, and if it was set increment our count. - if self.singleton.take().is_some() { self.singletons += 1; } + self.staging.seal(&mut self.result.upds); OrdKeyBatch { - updates: self.result.times.len() + self.singletons, + updates: self.staging.total(), storage: self.result, description, } diff --git a/differential-dataflow/src/trace/implementations/ord_neu.rs b/differential-dataflow/src/trace/implementations/ord_neu.rs index 4ac2acf30..cb0d12bf2 100644 --- a/differential-dataflow/src/trace/implementations/ord_neu.rs +++ b/differential-dataflow/src/trace/implementations/ord_neu.rs @@ -65,56 +65,98 @@ pub type PreferredBuilder = RcBuilder = Spine>>>; +pub use layers::{Vals, Upds}; +/// Layers are containers of lists of some type. +/// +/// The intent is that they "attach" to an outer layer which has as many values +/// as the layer has lists, thereby associating a list with each outer value. +/// A sequence of layers, each matching the number of values in its predecessor, +/// forms a layered trie: a tree with values of some type on nodes at each depth. +/// +/// We will form tries here by layering `[Keys, Vals, Upds]` or `[Keys, Upds]`. +pub mod layers { -/// Types related to forming batches with values. -pub mod val_batch { - - use std::marker::PhantomData; use serde::{Deserialize, Serialize}; - use timely::container::PushInto; - use timely::progress::{Antichain, frontier::AntichainRef}; + use crate::trace::implementations::BatchContainer; - use crate::trace::{Batch, BatchReader, Builder, Cursor, Description, Merger}; - use crate::trace::implementations::{BatchContainer, BuilderInput}; - use crate::IntoOwned; - - use super::{Layout, Update}; - - /// An immutable collection of update tuples, from a contiguous interval of logical times. + /// A container for non-empty lists of values. #[derive(Debug, Serialize, Deserialize)] - pub struct OrdValStorage { - /// An ordered list of keys, corresponding to entries in `keys_offs`. - pub keys: L::KeyContainer, + pub struct Vals { /// Offsets used to provide indexes from keys to values. /// /// The length of this list is one longer than `keys`, so that we can avoid bounds logic. - pub keys_offs: L::OffsetContainer, - /// Concatenated ordered lists of values, bracketed by offsets in `keys_offs`. - pub vals: L::ValContainer, - /// Offsets used to provide indexes from values to updates. - /// - /// This list has a special representation that any empty range indicates the singleton - /// element just before the range, as if the start were decremented by one. The empty - /// range is otherwise an invalid representation, and we borrow it to compactly encode - /// single common update values (e.g. in a snapshot, the minimal time and a diff of one). - /// - /// The length of this list is one longer than `vals`, so that we can avoid bounds logic. - pub vals_offs: L::OffsetContainer, - /// Concatenated ordered lists of update times, bracketed by offsets in `vals_offs`. - pub times: L::TimeContainer, - /// Concatenated ordered lists of update diffs, bracketed by offsets in `vals_offs`. - pub diffs: L::DiffContainer, + pub offs: O, + /// Concatenated ordered lists of values, bracketed by offsets in `offs`. + pub vals: V, + } + + impl BatchContainer = usize>, V: BatchContainer> Default for Vals { + fn default() -> Self { Self::with_capacity(0, 0) } } - impl OrdValStorage { - /// Lower and upper bounds in `self.vals` corresponding to the key at `index`. - fn values_for_key(&self, index: usize) -> (usize, usize) { - (self.keys_offs.index(index), self.keys_offs.index(index+1)) + impl BatchContainer = usize>, V: BatchContainer> Vals { + /// Lower and upper bounds in `self.vals` of the indexed list. + pub fn bounds(&self, index: usize) -> (usize, usize) { + (self.offs.index(index), self.offs.index(index+1)) + } + /// Retrieves a value using relative indexes. + /// + /// The first index identifies a list, and the second an item within the list. + /// The method adds the list's lower bound to the item index, and then calls + /// `get_abs`. Using absolute indexes within the list's bounds can be more + /// efficient than using relative indexing. + pub fn get_rel(&self, list_idx: usize, item_idx: usize) -> V::ReadItem<'_> { + self.get_abs(self.bounds(list_idx).0 + item_idx) + } + + /// Number of lists in the container. + pub fn len(&self) -> usize { self.offs.len() - 1 } + /// Retrieves a value using an absolute rather than relative index. + pub fn get_abs(&self, index: usize) -> V::ReadItem<'_> { + self.vals.index(index) + } + /// Allocates with capacities for a number of lists and values. + pub fn with_capacity(o_size: usize, v_size: usize) -> Self { + let mut offs = ::with_capacity(o_size); + offs.push(0); + Self { + offs, + vals: ::with_capacity(v_size), + } + } + /// Allocates with enough capacity to contain two inputs. + pub fn merge_capacity(this: &Self, that: &Self) -> Self { + let mut offs = ::with_capacity(this.offs.len() + that.offs.len()); + offs.push(0); + Self { + offs, + vals: ::merge_capacity(&this.vals, &that.vals), + } } - /// Lower and upper bounds in `self.updates` corresponding to the value at `index`. - fn updates_for_value(&self, index: usize) -> (usize, usize) { - let mut lower = self.vals_offs.index(index); - let upper = self.vals_offs.index(index+1); + } + + /// A container for non-empty lists of updates. + /// + /// This container uses the special representiation of an empty slice to stand in for + /// "the previous single element". An empty slice is an otherwise invalid representation. + #[derive(Debug, Serialize, Deserialize)] + pub struct Upds { + /// Offsets used to provide indexes from values to updates. + offs: O, + /// Concatenated ordered lists of update times, bracketed by offsets in `offs`. + times: T, + /// Concatenated ordered lists of update diffs, bracketed by offsets in `offs`. + diffs: D, + } + + impl BatchContainer = usize>, T: BatchContainer, D: BatchContainer> Default for Upds { + fn default() -> Self { Self::with_capacity(0, 0) } + } + impl BatchContainer = usize>, T: BatchContainer, D: BatchContainer> Upds { + /// Lower and upper bounds in `self.vals` of the indexed list. + pub fn bounds(&self, index: usize) -> (usize, usize) { + let mut lower = self.offs.index(index); + let upper = self.offs.index(index+1); // We use equal lower and upper to encode "singleton update; just before here". // It should only apply when there is a prior element, so `lower` should be greater than zero. if lower == upper { @@ -123,6 +165,145 @@ pub mod val_batch { } (lower, upper) } + /// Retrieves a value using relative indexes. + /// + /// The first index identifies a list, and the second an item within the list. + /// The method adds the list's lower bound to the item index, and then calls + /// `get_abs`. Using absolute indexes within the list's bounds can be more + /// efficient than using relative indexing. + pub fn get_rel(&self, list_idx: usize, item_idx: usize) -> (T::ReadItem<'_>, D::ReadItem<'_>) { + self.get_abs(self.bounds(list_idx).0 + item_idx) + } + + /// Number of lists in the container. + pub fn len(&self) -> usize { self.offs.len() - 1 } + /// Retrieves a value using an absolute rather than relative index. + pub fn get_abs(&self, index: usize) -> (T::ReadItem<'_>, D::ReadItem<'_>) { + (self.times.index(index), self.diffs.index(index)) + } + /// Allocates with capacities for a number of lists and values. + pub fn with_capacity(o_size: usize, u_size: usize) -> Self { + let mut offs = ::with_capacity(o_size); + offs.push(0); + Self { + offs, + times: ::with_capacity(u_size), + diffs: ::with_capacity(u_size), + } + } + /// Allocates with enough capacity to contain two inputs. + pub fn merge_capacity(this: &Self, that: &Self) -> Self { + let mut offs = ::with_capacity(this.offs.len() + that.offs.len()); + offs.push(0); + Self { + offs, + times: ::merge_capacity(&this.times, &that.times), + diffs: ::merge_capacity(&this.diffs, &that.diffs), + } + } + } + + /// Helper type for constructing `Upds` containers. + pub struct UpdsBuilder { + /// Local stash of updates, to use for consolidation. + /// + /// We could emulate a `ChangeBatch` here, with related compaction smarts. + /// A `ChangeBatch` itself needs an `i64` diff type, which we have not. + stash: Vec<(T::Owned, D::Owned)>, + /// Total number of consolidated updates. + /// + /// Tracked independently to account for duplicate compression. + total: usize, + } + + impl Default for UpdsBuilder { + fn default() -> Self { Self { stash: Vec::default(), total: 0, } } + } + + + use timely::container::PushInto; + impl UpdsBuilder + where + T: BatchContainer + PushInto, + D: BatchContainer + PushInto, + { + /// Stages one update, but does not seal the set of updates. + pub fn push(&mut self, time: T::Owned, diff: D::Owned) { + self.stash.push((time, diff)); + } + + /// Consolidate and insert (if non-empty) the stashed updates. + /// + /// The return indicates whether the results were indeed non-empty. + pub fn seal BatchContainer = usize>>(&mut self, upds: &mut Upds) -> bool { + use crate::consolidation; + consolidation::consolidate(&mut self.stash); + if !self.stash.is_empty() { + // If there is a single element, equal to a just-prior recorded update, + // we push nothing and report an unincremented offset to encode this case. + let time_diff = upds.times.last().zip(upds.diffs.last()); + let last_eq = self.stash.last().zip(time_diff).map(|((t1, d1), (t2, d2))| { + use crate::IntoOwned; + let t1 = <::ReadItem<'_> as IntoOwned>::borrow_as(t1); + let d1 = <::ReadItem<'_> as IntoOwned>::borrow_as(d1); + t1.eq(&t2) && d1.eq(&d2) + }); + if self.stash.len() == 1 && last_eq.unwrap_or(false) { + // Just clear out the stash, as we won't drain it here. + self.total += 1; + self.stash.clear(); + upds.offs.push(upds.times.len()); + } + else { + // Conventional; move `stash` into `updates`. + self.total += self.stash.len(); + for (time, diff) in self.stash.drain(..) { + upds.times.push(time); + upds.diffs.push(diff); + } + upds.offs.push(upds.times.len()); + } + true + } else { + false + } + } + + /// Completes the building and returns the total updates sealed. + pub fn total(&self) -> usize { self.total } + } +} + +/// Types related to forming batches with values. +pub mod val_batch { + + use std::marker::PhantomData; + use serde::{Deserialize, Serialize}; + use timely::container::PushInto; + use timely::progress::{Antichain, frontier::AntichainRef}; + + use crate::trace::{Batch, BatchReader, Builder, Cursor, Description, Merger}; + use crate::trace::implementations::{BatchContainer, BuilderInput}; + use crate::IntoOwned; + + use super::{Layout, Update, Vals, Upds, layers::UpdsBuilder}; + + /// An immutable collection of update tuples, from a contiguous interval of logical times. + #[derive(Debug, Serialize, Deserialize)] + #[serde(bound = " + L::KeyContainer: Serialize + for<'a> Deserialize<'a>, + L::ValContainer: Serialize + for<'a> Deserialize<'a>, + L::OffsetContainer: Serialize + for<'a> Deserialize<'a>, + L::TimeContainer: Serialize + for<'a> Deserialize<'a>, + L::DiffContainer: Serialize + for<'a> Deserialize<'a>, + ")] + pub struct OrdValStorage { + /// An ordered list of keys. + pub keys: L::KeyContainer, + /// For each key in `keys`, a list of values. + pub vals: Vals, + /// For each val in `vals`, a list of (time, diff) updates. + pub upds: Upds, } /// An immutable collection of update tuples, from a contiguous interval of logical times. @@ -145,7 +326,7 @@ pub mod val_batch { /// The number of updates reflected in the batch. /// /// We track this separately from `storage` because due to the singleton optimization, - /// we may have many more updates than `storage.updates.len()`. It should equal that + /// we may have many more updates than `storage.updates.len()`. It should equal that /// length, plus the number of singleton optimizations employed. pub updates: usize, } @@ -159,14 +340,14 @@ pub mod val_batch { type DiffGat<'a> = ::ReadItem<'a>; type Cursor = OrdValCursor; - fn cursor(&self) -> Self::Cursor { + fn cursor(&self) -> Self::Cursor { OrdValCursor { key_cursor: 0, val_cursor: 0, phantom: PhantomData, } } - fn len(&self) -> usize { + fn len(&self) -> usize { // Normally this would be `self.updates.len()`, but we have a clever compact encoding. // Perhaps we should count such exceptions to the side, to provide a correct accounting. self.updates @@ -186,11 +367,8 @@ pub mod val_batch { Self { storage: OrdValStorage { keys: L::KeyContainer::with_capacity(0), - keys_offs: L::OffsetContainer::with_capacity(0), - vals: L::ValContainer::with_capacity(0), - vals_offs: L::OffsetContainer::with_capacity(0), - times: L::TimeContainer::with_capacity(0), - diffs: L::DiffContainer::with_capacity(0), + vals: Default::default(), + upds: Default::default(), }, description: Description::new(lower, upper, Antichain::from_elem(Self::Time::minimum())), updates: 0, @@ -208,14 +386,8 @@ pub mod val_batch { result: OrdValStorage, /// description description: Description<::Time>, - - /// Local stash of updates, to use for consolidation. - /// - /// We could emulate a `ChangeBatch` here, with related compaction smarts. - /// A `ChangeBatch` itself needs an `i64` diff type, which we have not. - update_stash: Vec<(::Time, ::Diff)>, - /// Counts the number of singleton-optimized entries, that we may correctly count the updates. - singletons: usize, + /// Staging area to consolidate owned times and diffs, before sealing. + staging: UpdsBuilder, } impl Merger> for OrdValMerger @@ -236,33 +408,21 @@ pub mod val_batch { let batch1 = &batch1.storage; let batch2 = &batch2.storage; - let mut storage = OrdValStorage { - keys: L::KeyContainer::merge_capacity(&batch1.keys, &batch2.keys), - keys_offs: L::OffsetContainer::with_capacity(batch1.keys_offs.len() + batch2.keys_offs.len()), - vals: L::ValContainer::merge_capacity(&batch1.vals, &batch2.vals), - vals_offs: L::OffsetContainer::with_capacity(batch1.vals_offs.len() + batch2.vals_offs.len()), - times: L::TimeContainer::merge_capacity(&batch1.times, &batch2.times), - diffs: L::DiffContainer::merge_capacity(&batch1.diffs, &batch2.diffs), - }; - - // Mark explicit types because type inference fails to resolve it. - let keys_offs: &mut L::OffsetContainer = &mut storage.keys_offs; - keys_offs.push(0); - let vals_offs: &mut L::OffsetContainer = &mut storage.vals_offs; - vals_offs.push(0); - OrdValMerger { key_cursor1: 0, key_cursor2: 0, - result: storage, + result: OrdValStorage { + keys: L::KeyContainer::merge_capacity(&batch1.keys, &batch2.keys), + vals: Vals::merge_capacity(&batch1.vals, &batch2.vals), + upds: Upds::merge_capacity(&batch1.upds, &batch2.upds), + }, description, - update_stash: Vec::new(), - singletons: 0, + staging: UpdsBuilder::default(), } } fn done(self) -> OrdValBatch { OrdValBatch { - updates: self.result.times.len() + self.singletons, + updates: self.staging.total(), storage: self.result, description: self.description, } @@ -270,27 +430,27 @@ pub mod val_batch { fn work(&mut self, source1: &OrdValBatch, source2: &OrdValBatch, fuel: &mut isize) { // An (incomplete) indication of the amount of work we've done so far. - let starting_updates = self.result.times.len() + self.singletons; + let starting_updates = self.staging.total(); let mut effort = 0isize; // While both mergees are still active, perform single-key merges. while self.key_cursor1 < source1.storage.keys.len() && self.key_cursor2 < source2.storage.keys.len() && effort < *fuel { self.merge_key(&source1.storage, &source2.storage); // An (incomplete) accounting of the work we've done. - effort = (self.result.times.len() + self.singletons - starting_updates) as isize; + effort = (self.staging.total() - starting_updates) as isize; } - // Merging is complete, and only copying remains. + // Merging is complete, and only copying remains. // Key-by-key copying allows effort interruption, and compaction. while self.key_cursor1 < source1.storage.keys.len() && effort < *fuel { self.copy_key(&source1.storage, self.key_cursor1); self.key_cursor1 += 1; - effort = (self.result.times.len() + self.singletons - starting_updates) as isize; + effort = (self.staging.total() - starting_updates) as isize; } while self.key_cursor2 < source2.storage.keys.len() && effort < *fuel { self.copy_key(&source2.storage, self.key_cursor2); self.key_cursor2 += 1; - effort = (self.result.times.len() + self.singletons - starting_updates) as isize; + effort = (self.staging.total() - starting_updates) as isize; } *fuel -= effort; @@ -308,22 +468,21 @@ pub mod val_batch { /// The caller should be certain to update the cursor, as this method does not do this. fn copy_key(&mut self, source: &OrdValStorage, cursor: usize) { // Capture the initial number of values to determine if the merge was ultimately non-empty. - let init_vals = self.result.vals.len(); - let (mut lower, upper) = source.values_for_key(cursor); + let init_vals = self.result.vals.vals.len(); + let (mut lower, upper) = source.vals.bounds(cursor); while lower < upper { self.stash_updates_for_val(source, lower); - if let Some(off) = self.consolidate_updates() { - self.result.vals_offs.push(off); - self.result.vals.push(source.vals.index(lower)); + if self.staging.seal(&mut self.result.upds) { + self.result.vals.vals.push(source.vals.get_abs(lower)); } lower += 1; - } + } // If we have pushed any values, copy the key as well. - if self.result.vals.len() > init_vals { + if self.result.vals.vals.len() > init_vals { self.result.keys.push(source.keys.index(cursor)); - self.result.keys_offs.push(self.result.vals.len()); - } + self.result.vals.offs.push(self.result.vals.vals.len()); + } } /// Merge the next key in each of `source1` and `source2` into `self`, updating the appropriate cursors. /// @@ -332,17 +491,17 @@ pub mod val_batch { fn merge_key(&mut self, source1: &OrdValStorage, source2: &OrdValStorage) { use ::std::cmp::Ordering; match source1.keys.index(self.key_cursor1).cmp(&source2.keys.index(self.key_cursor2)) { - Ordering::Less => { + Ordering::Less => { self.copy_key(source1, self.key_cursor1); self.key_cursor1 += 1; }, Ordering::Equal => { // Keys are equal; must merge all values from both sources for this one key. - let (lower1, upper1) = source1.values_for_key(self.key_cursor1); - let (lower2, upper2) = source2.values_for_key(self.key_cursor2); + let (lower1, upper1) = source1.vals.bounds(self.key_cursor1); + let (lower2, upper2) = source2.vals.bounds(self.key_cursor2); if let Some(off) = self.merge_vals((source1, lower1, upper1), (source2, lower2, upper2)) { self.result.keys.push(source1.keys.index(self.key_cursor1)); - self.result.keys_offs.push(off); + self.result.vals.offs.push(off); } // Increment cursors in either case; the keys are merged. self.key_cursor1 += 1; @@ -359,43 +518,40 @@ pub mod val_batch { /// If the compacted result contains values with non-empty updates, the function returns /// an offset that should be recorded to indicate the upper extent of the result values. fn merge_vals( - &mut self, - (source1, mut lower1, upper1): (&OrdValStorage, usize, usize), + &mut self, + (source1, mut lower1, upper1): (&OrdValStorage, usize, usize), (source2, mut lower2, upper2): (&OrdValStorage, usize, usize), ) -> Option { // Capture the initial number of values to determine if the merge was ultimately non-empty. - let init_vals = self.result.vals.len(); + let init_vals = self.result.vals.vals.len(); while lower1 < upper1 && lower2 < upper2 { // We compare values, and fold in updates for the lowest values; // if they are non-empty post-consolidation, we write the value. // We could multi-way merge and it wouldn't be very complicated. use ::std::cmp::Ordering; - match source1.vals.index(lower1).cmp(&source2.vals.index(lower2)) { - Ordering::Less => { + match source1.vals.get_abs(lower1).cmp(&source2.vals.get_abs(lower2)) { + Ordering::Less => { // Extend stash by updates, with logical compaction applied. self.stash_updates_for_val(source1, lower1); - if let Some(off) = self.consolidate_updates() { - self.result.vals_offs.push(off); - self.result.vals.push(source1.vals.index(lower1)); + if self.staging.seal(&mut self.result.upds) { + self.result.vals.vals.push(source1.vals.get_abs(lower1)); } lower1 += 1; }, Ordering::Equal => { self.stash_updates_for_val(source1, lower1); self.stash_updates_for_val(source2, lower2); - if let Some(off) = self.consolidate_updates() { - self.result.vals_offs.push(off); - self.result.vals.push(source1.vals.index(lower1)); + if self.staging.seal(&mut self.result.upds) { + self.result.vals.vals.push(source1.vals.get_abs(lower1)); } lower1 += 1; lower2 += 1; }, - Ordering::Greater => { + Ordering::Greater => { // Extend stash by updates, with logical compaction applied. self.stash_updates_for_val(source2, lower2); - if let Some(off) = self.consolidate_updates() { - self.result.vals_offs.push(off); - self.result.vals.push(source2.vals.index(lower2)); + if self.staging.seal(&mut self.result.upds) { + self.result.vals.vals.push(source2.vals.get_abs(lower2)); } lower2 += 1; }, @@ -404,24 +560,22 @@ pub mod val_batch { // Merging is complete, but we may have remaining elements to push. while lower1 < upper1 { self.stash_updates_for_val(source1, lower1); - if let Some(off) = self.consolidate_updates() { - self.result.vals_offs.push(off); - self.result.vals.push(source1.vals.index(lower1)); + if self.staging.seal(&mut self.result.upds) { + self.result.vals.vals.push(source1.vals.get_abs(lower1)); } lower1 += 1; } while lower2 < upper2 { self.stash_updates_for_val(source2, lower2); - if let Some(off) = self.consolidate_updates() { - self.result.vals_offs.push(off); - self.result.vals.push(source2.vals.index(lower2)); + if self.staging.seal(&mut self.result.upds) { + self.result.vals.vals.push(source2.vals.get_abs(lower2)); } lower2 += 1; } // Values being pushed indicate non-emptiness. - if self.result.vals.len() > init_vals { - Some(self.result.vals.len()) + if self.result.vals.vals.len() > init_vals { + Some(self.result.vals.vals.len()) } else { None } @@ -429,46 +583,14 @@ pub mod val_batch { /// Transfer updates for an indexed value in `source` into `self`, with compaction applied. fn stash_updates_for_val(&mut self, source: &OrdValStorage, index: usize) { - let (lower, upper) = source.updates_for_value(index); + let (lower, upper) = source.upds.bounds(index); for i in lower .. upper { // NB: Here is where we would need to look back if `lower == upper`. - let time = source.times.index(i); - let diff = source.diffs.index(i); + let (time, diff) = source.upds.get_abs(i); use crate::lattice::Lattice; let mut new_time: ::Time = time.into_owned(); new_time.advance_by(self.description.since().borrow()); - self.update_stash.push((new_time, diff.into_owned())); - } - } - - /// Consolidates `self.updates_stash` and produces the offset to record, if any. - fn consolidate_updates(&mut self) -> Option { - use crate::consolidation; - consolidation::consolidate(&mut self.update_stash); - if !self.update_stash.is_empty() { - // If there is a single element, equal to a just-prior recorded update, - // we push nothing and report an unincremented offset to encode this case. - let time_diff = self.result.times.last().zip(self.result.diffs.last()); - let last_eq = self.update_stash.last().zip(time_diff).map(|((t1, d1), (t2, d2))| { - let t1 = <::ReadItem<'_> as IntoOwned>::borrow_as(t1); - let d1 = <::ReadItem<'_> as IntoOwned>::borrow_as(d1); - t1.eq(&t2) && d1.eq(&d2) - }); - if self.update_stash.len() == 1 && last_eq.unwrap_or(false) { - // Just clear out update_stash, as we won't drain it here. - self.update_stash.clear(); - self.singletons += 1; - } - else { - // Conventional; move `update_stash` into `updates`. - for (time, diff) in self.update_stash.drain(..) { - self.result.times.push(time); - self.result.diffs.push(diff); - } - } - Some(self.result.times.len()) - } else { - None + self.staging.push(new_time, diff.into_owned()); } } } @@ -498,17 +620,16 @@ pub mod val_batch { fn get_val<'a>(&self, storage: &'a Self::Storage) -> Option> { if self.val_valid(storage) { Some(self.val(storage)) } else { None } } fn key<'a>(&self, storage: &'a OrdValBatch) -> Self::Key<'a> { storage.storage.keys.index(self.key_cursor) } - fn val<'a>(&self, storage: &'a OrdValBatch) -> Self::Val<'a> { storage.storage.vals.index(self.val_cursor) } + fn val<'a>(&self, storage: &'a OrdValBatch) -> Self::Val<'a> { storage.storage.vals.get_abs(self.val_cursor) } fn map_times, Self::DiffGat<'_>)>(&mut self, storage: &OrdValBatch, mut logic: L2) { - let (lower, upper) = storage.storage.updates_for_value(self.val_cursor); + let (lower, upper) = storage.storage.upds.bounds(self.val_cursor); for index in lower .. upper { - let time = storage.storage.times.index(index); - let diff = storage.storage.diffs.index(index); + let (time, diff) = storage.storage.upds.get_abs(index); logic(time, diff); } } fn key_valid(&self, storage: &OrdValBatch) -> bool { self.key_cursor < storage.storage.keys.len() } - fn val_valid(&self, storage: &OrdValBatch) -> bool { self.val_cursor < storage.storage.values_for_key(self.key_cursor).1 } + fn val_valid(&self, storage: &OrdValBatch) -> bool { self.val_cursor < storage.storage.vals.bounds(self.key_cursor).1 } fn step_key(&mut self, storage: &OrdValBatch){ self.key_cursor += 1; if self.key_valid(storage) { @@ -525,13 +646,13 @@ pub mod val_batch { } } fn step_val(&mut self, storage: &OrdValBatch) { - self.val_cursor += 1; + self.val_cursor += 1; if !self.val_valid(storage) { - self.val_cursor = storage.storage.values_for_key(self.key_cursor).1; + self.val_cursor = storage.storage.vals.bounds(self.key_cursor).1; } } fn seek_val(&mut self, storage: &OrdValBatch, val: Self::Val<'_>) { - self.val_cursor += storage.storage.vals.advance(self.val_cursor, storage.storage.values_for_key(self.key_cursor).1, |x| ::reborrow(x).lt(&::reborrow(val))); + self.val_cursor += storage.storage.vals.vals.advance(self.val_cursor, storage.storage.vals.bounds(self.key_cursor).1, |x| ::reborrow(x).lt(&::reborrow(val))); } fn rewind_keys(&mut self, storage: &OrdValBatch) { self.key_cursor = 0; @@ -540,7 +661,7 @@ pub mod val_batch { } } fn rewind_vals(&mut self, storage: &OrdValBatch) { - self.val_cursor = storage.storage.values_for_key(self.key_cursor).0; + self.val_cursor = storage.storage.vals.bounds(self.key_cursor).0; } } @@ -550,47 +671,10 @@ pub mod val_batch { /// /// This is public to allow container implementors to set and inspect their container. pub result: OrdValStorage, - singleton: Option<(::Time, ::Diff)>, - /// Counts the number of singleton optimizations we performed. - /// - /// This number allows us to correctly gauge the total number of updates reflected in a batch, - /// even though `updates.len()` may be much shorter than this amount. - singletons: usize, + staging: UpdsBuilder, _marker: PhantomData, } - impl OrdValBuilder { - /// Pushes a single update, which may set `self.singleton` rather than push. - /// - /// This operation is meant to be equivalent to `self.results.updates.push((time, diff))`. - /// However, for "clever" reasons it does not do this. Instead, it looks for opportunities - /// to encode a singleton update with an "absert" update: repeating the most recent offset. - /// This otherwise invalid state encodes "look back one element". - /// - /// When `self.singleton` is `Some`, it means that we have seen one update and it matched the - /// previously pushed update exactly. In that case, we do not push the update into `updates`. - /// The update tuple is retained in `self.singleton` in case we see another update and need - /// to recover the singleton to push it into `updates` to join the second update. - fn push_update(&mut self, time: ::Time, diff: ::Diff) { - // If a just-pushed update exactly equals `(time, diff)` we can avoid pushing it. - if self.result.times.last().map(|t| t == <::ReadItem<'_> as IntoOwned>::borrow_as(&time)) == Some(true) && - self.result.diffs.last().map(|d| d == <::ReadItem<'_> as IntoOwned>::borrow_as(&diff)) == Some(true) - { - assert!(self.singleton.is_none()); - self.singleton = Some((time, diff)); - } - else { - // If we have pushed a single element, we need to copy it out to meet this one. - if let Some((time, diff)) = self.singleton.take() { - self.result.times.push(time); - self.result.diffs.push(diff); - } - self.result.times.push(time); - self.result.diffs.push(diff); - } - } - } - impl Builder for OrdValBuilder where L: for<'a> Layout< @@ -607,18 +691,13 @@ pub mod val_batch { type Output = OrdValBatch; fn with_capacity(keys: usize, vals: usize, upds: usize) -> Self { - // We don't introduce zero offsets as they will be introduced by the first `push` call. - Self { + Self { result: OrdValStorage { keys: L::KeyContainer::with_capacity(keys), - keys_offs: L::OffsetContainer::with_capacity(keys + 1), - vals: L::ValContainer::with_capacity(vals), - vals_offs: L::OffsetContainer::with_capacity(vals + 1), - times: L::TimeContainer::with_capacity(upds), - diffs: L::DiffContainer::with_capacity(upds), + vals: Vals::with_capacity(keys + 1, vals), + upds: Upds::with_capacity(vals + 1, upds), }, - singleton: None, - singletons: 0, + staging: UpdsBuilder::default(), _marker: PhantomData, } } @@ -627,25 +706,30 @@ pub mod val_batch { fn push(&mut self, chunk: &mut Self::Input) { for item in chunk.drain() { let (key, val, time, diff) = CI::into_parts(item); + + // Pre-load the first update. + if self.result.keys.is_empty() { + self.result.vals.vals.push(val); + self.result.keys.push(key); + self.staging.push(time, diff); + } // Perhaps this is a continuation of an already received key. - if self.result.keys.last().map(|k| CI::key_eq(&key, k)).unwrap_or(false) { + else if self.result.keys.last().map(|k| CI::key_eq(&key, k)).unwrap_or(false) { // Perhaps this is a continuation of an already received value. - if self.result.vals.last().map(|v| CI::val_eq(&val, v)).unwrap_or(false) { - self.push_update(time, diff); + if self.result.vals.vals.last().map(|v| CI::val_eq(&val, v)).unwrap_or(false) { + self.staging.push(time, diff); } else { // New value; complete representation of prior value. - self.result.vals_offs.push(self.result.times.len()); - if self.singleton.take().is_some() { self.singletons += 1; } - self.push_update(time, diff); - self.result.vals.push(val); + self.staging.seal(&mut self.result.upds); + self.staging.push(time, diff); + self.result.vals.vals.push(val); } } else { // New key; complete representation of prior key. - self.result.vals_offs.push(self.result.times.len()); - if self.singleton.take().is_some() { self.singletons += 1; } - self.result.keys_offs.push(self.result.vals.len()); - self.push_update(time, diff); - self.result.vals.push(val); + self.staging.seal(&mut self.result.upds); + self.staging.push(time, diff); + self.result.vals.offs.push(self.result.vals.vals.len()); + self.result.vals.vals.push(val); self.result.keys.push(key); } } @@ -653,13 +737,10 @@ pub mod val_batch { #[inline(never)] fn done(mut self, description: Description) -> OrdValBatch { - // Record the final offsets - self.result.vals_offs.push(self.result.times.len()); - // Remove any pending singleton, and if it was set increment our count. - if self.singleton.take().is_some() { self.singletons += 1; } - self.result.keys_offs.push(self.result.vals.len()); + self.staging.seal(&mut self.result.upds); + self.result.vals.offs.push(self.result.vals.vals.len()); OrdValBatch { - updates: self.result.times.len() + self.singletons, + updates: self.staging.total(), storage: self.result, description, } @@ -671,7 +752,7 @@ pub mod val_batch { for mut chunk in chain.drain(..) { builder.push(&mut chunk); } - + builder.done(description) } } @@ -689,41 +770,21 @@ pub mod key_batch { use crate::trace::implementations::{BatchContainer, BuilderInput}; use crate::IntoOwned; - use super::{Layout, Update}; + use super::{Layout, Update, Upds, layers::UpdsBuilder}; /// An immutable collection of update tuples, from a contiguous interval of logical times. #[derive(Debug, Serialize, Deserialize)] + #[serde(bound = " + L::KeyContainer: Serialize + for<'a> Deserialize<'a>, + L::OffsetContainer: Serialize + for<'a> Deserialize<'a>, + L::TimeContainer: Serialize + for<'a> Deserialize<'a>, + L::DiffContainer: Serialize + for<'a> Deserialize<'a>, + ")] pub struct OrdKeyStorage { /// An ordered list of keys, corresponding to entries in `keys_offs`. pub keys: L::KeyContainer, - /// Offsets used to provide indexes from keys to updates. - /// - /// This list has a special representation that any empty range indicates the singleton - /// element just before the range, as if the start were decremented by one. The empty - /// range is otherwise an invalid representation, and we borrow it to compactly encode - /// single common update values (e.g. in a snapshot, the minimal time and a diff of one). - /// - /// The length of this list is one longer than `keys`, so that we can avoid bounds logic. - pub keys_offs: L::OffsetContainer, - /// Concatenated ordered lists of update times, bracketed by offsets in `vals_offs`. - pub times: L::TimeContainer, - /// Concatenated ordered lists of update diffs, bracketed by offsets in `vals_offs`. - pub diffs: L::DiffContainer, - } - - impl OrdKeyStorage { - /// Lower and upper bounds in `self.vals` corresponding to the key at `index`. - fn updates_for_key(&self, index: usize) -> (usize, usize) { - let mut lower = self.keys_offs.index(index); - let upper = self.keys_offs.index(index+1); - // We use equal lower and upper to encode "singleton update; just before here". - // It should only apply when there is a prior element, so `lower` should be greater than zero. - if lower == upper { - assert!(lower > 0); - lower -= 1; - } - (lower, upper) - } + /// For each key in `keys`, a list of (time, diff) updates. + pub upds: Upds, } /// An immutable collection of update tuples, from a contiguous interval of logical times. @@ -751,7 +812,7 @@ pub mod key_batch { } impl BatchReader for OrdKeyBatch { - + type Key<'a> = ::ReadItem<'a>; type Val<'a> = &'a (); type Time = ::Time; @@ -787,9 +848,7 @@ pub mod key_batch { Self { storage: OrdKeyStorage { keys: L::KeyContainer::with_capacity(0), - keys_offs: L::OffsetContainer::with_capacity(0), - times: L::TimeContainer::with_capacity(0), - diffs: L::DiffContainer::with_capacity(0), + upds: Upds::default(), }, description: Description::new(lower, upper, Antichain::from_elem(Self::Time::minimum())), updates: 0, @@ -809,12 +868,7 @@ pub mod key_batch { description: Description<::Time>, /// Local stash of updates, to use for consolidation. - /// - /// We could emulate a `ChangeBatch` here, with related compaction smarts. - /// A `ChangeBatch` itself needs an `i64` diff type, which we have not. - update_stash: Vec<(::Time, ::Diff)>, - /// Counts the number of singleton-optimized entries, that we may correctly count the updates. - singletons: usize, + staging: UpdsBuilder, } impl Merger> for OrdKeyMerger @@ -835,28 +889,20 @@ pub mod key_batch { let batch1 = &batch1.storage; let batch2 = &batch2.storage; - let mut storage = OrdKeyStorage { - keys: L::KeyContainer::merge_capacity(&batch1.keys, &batch2.keys), - keys_offs: L::OffsetContainer::with_capacity(batch1.keys_offs.len() + batch2.keys_offs.len()), - times: L::TimeContainer::merge_capacity(&batch1.times, &batch2.times), - diffs: L::DiffContainer::merge_capacity(&batch1.diffs, &batch2.diffs), - }; - - let keys_offs: &mut L::OffsetContainer = &mut storage.keys_offs; - keys_offs.push(0); - OrdKeyMerger { key_cursor1: 0, key_cursor2: 0, - result: storage, + result: OrdKeyStorage { + keys: L::KeyContainer::merge_capacity(&batch1.keys, &batch2.keys), + upds: Upds::merge_capacity(&batch1.upds, &batch2.upds), + }, description, - update_stash: Vec::new(), - singletons: 0, + staging: UpdsBuilder::default(), } } fn done(self) -> OrdKeyBatch { OrdKeyBatch { - updates: self.result.times.len() + self.singletons, + updates: self.staging.total(), storage: self.result, description: self.description, } @@ -864,14 +910,14 @@ pub mod key_batch { fn work(&mut self, source1: &OrdKeyBatch, source2: &OrdKeyBatch, fuel: &mut isize) { // An (incomplete) indication of the amount of work we've done so far. - let starting_updates = self.result.times.len() + self.singletons; + let starting_updates = self.staging.total(); let mut effort = 0isize; // While both mergees are still active, perform single-key merges. while self.key_cursor1 < source1.storage.keys.len() && self.key_cursor2 < source2.storage.keys.len() && effort < *fuel { self.merge_key(&source1.storage, &source2.storage); // An (incomplete) accounting of the work we've done. - effort = (self.result.times.len() + self.singletons - starting_updates) as isize; + effort = (self.staging.total() - starting_updates) as isize; } // Merging is complete, and only copying remains. @@ -879,12 +925,12 @@ pub mod key_batch { while self.key_cursor1 < source1.storage.keys.len() && effort < *fuel { self.copy_key(&source1.storage, self.key_cursor1); self.key_cursor1 += 1; - effort = (self.result.times.len() + self.singletons - starting_updates) as isize; + effort = (self.staging.total() - starting_updates) as isize; } while self.key_cursor2 < source2.storage.keys.len() && effort < *fuel { self.copy_key(&source2.storage, self.key_cursor2); self.key_cursor2 += 1; - effort = (self.result.times.len() + self.singletons - starting_updates) as isize; + effort = (self.staging.total() - starting_updates) as isize; } *fuel -= effort; @@ -897,13 +943,12 @@ pub mod key_batch { /// /// The method extracts the key in `source` at `cursor`, and merges it in to `self`. /// If the result does not wholly cancel, they key will be present in `self` with the - /// compacted values and updates. - /// + /// compacted values and updates. + /// /// The caller should be certain to update the cursor, as this method does not do this. fn copy_key(&mut self, source: &OrdKeyStorage, cursor: usize) { self.stash_updates_for_key(source, cursor); - if let Some(off) = self.consolidate_updates() { - self.result.keys_offs.push(off); + if self.staging.seal(&mut self.result.upds) { self.result.keys.push(source.keys.index(cursor)); } } @@ -914,7 +959,7 @@ pub mod key_batch { fn merge_key(&mut self, source1: &OrdKeyStorage, source2: &OrdKeyStorage) { use ::std::cmp::Ordering; match source1.keys.index(self.key_cursor1).cmp(&source2.keys.index(self.key_cursor2)) { - Ordering::Less => { + Ordering::Less => { self.copy_key(source1, self.key_cursor1); self.key_cursor1 += 1; }, @@ -922,8 +967,7 @@ pub mod key_batch { // Keys are equal; must merge all updates from both sources for this one key. self.stash_updates_for_key(source1, self.key_cursor1); self.stash_updates_for_key(source2, self.key_cursor2); - if let Some(off) = self.consolidate_updates() { - self.result.keys_offs.push(off); + if self.staging.seal(&mut self.result.upds) { self.result.keys.push(source1.keys.index(self.key_cursor1)); } // Increment cursors in either case; the keys are merged. @@ -939,46 +983,14 @@ pub mod key_batch { /// Transfer updates for an indexed value in `source` into `self`, with compaction applied. fn stash_updates_for_key(&mut self, source: &OrdKeyStorage, index: usize) { - let (lower, upper) = source.updates_for_key(index); + let (lower, upper) = source.upds.bounds(index); for i in lower .. upper { // NB: Here is where we would need to look back if `lower == upper`. - let time = source.times.index(i); - let diff = source.diffs.index(i); + let (time, diff) = source.upds.get_abs(i); use crate::lattice::Lattice; let mut new_time = time.into_owned(); new_time.advance_by(self.description.since().borrow()); - self.update_stash.push((new_time, diff.into_owned())); - } - } - - /// Consolidates `self.updates_stash` and produces the offset to record, if any. - fn consolidate_updates(&mut self) -> Option { - use crate::consolidation; - consolidation::consolidate(&mut self.update_stash); - if !self.update_stash.is_empty() { - // If there is a single element, equal to a just-prior recorded update, - // we push nothing and report an unincremented offset to encode this case. - let time_diff = self.result.times.last().zip(self.result.diffs.last()); - let last_eq = self.update_stash.last().zip(time_diff).map(|((t1, d1), (t2, d2))| { - let t1 = <::ReadItem<'_> as IntoOwned>::borrow_as(t1); - let d1 = <::ReadItem<'_> as IntoOwned>::borrow_as(d1); - t1.eq(&t2) && d1.eq(&d2) - }); - if self.update_stash.len() == 1 && last_eq.unwrap_or(false) { - // Just clear out update_stash, as we won't drain it here. - self.update_stash.clear(); - self.singletons += 1; - } - else { - // Conventional; move `update_stash` into `updates`. - for (time, diff) in self.update_stash.drain(..) { - self.result.times.push(time); - self.result.diffs.push(diff); - } - } - Some(self.result.times.len()) - } else { - None + self.staging.push(new_time, diff.into_owned()); } } } @@ -1009,10 +1021,9 @@ pub mod key_batch { fn key<'a>(&self, storage: &'a Self::Storage) -> Self::Key<'a> { storage.storage.keys.index(self.key_cursor) } fn val<'a>(&self, _storage: &'a Self::Storage) -> &'a () { &() } fn map_times, Self::DiffGat<'_>)>(&mut self, storage: &Self::Storage, mut logic: L2) { - let (lower, upper) = storage.storage.updates_for_key(self.key_cursor); + let (lower, upper) = storage.storage.upds.bounds(self.key_cursor); for index in lower .. upper { - let time = storage.storage.times.index(index); - let diff = storage.storage.diffs.index(index); + let (time, diff) = storage.storage.upds.get_abs(index); logic(time, diff); } } @@ -1054,47 +1065,10 @@ pub mod key_batch { /// /// This is public to allow container implementors to set and inspect their container. pub result: OrdKeyStorage, - singleton: Option<(::Time, ::Diff)>, - /// Counts the number of singleton optimizations we performed. - /// - /// This number allows us to correctly gauge the total number of updates reflected in a batch, - /// even though `updates.len()` may be much shorter than this amount. - singletons: usize, + staging: UpdsBuilder, _marker: PhantomData, } - impl OrdKeyBuilder { - /// Pushes a single update, which may set `self.singleton` rather than push. - /// - /// This operation is meant to be equivalent to `self.results.updates.push((time, diff))`. - /// However, for "clever" reasons it does not do this. Instead, it looks for opportunities - /// to encode a singleton update with an "absert" update: repeating the most recent offset. - /// This otherwise invalid state encodes "look back one element". - /// - /// When `self.singleton` is `Some`, it means that we have seen one update and it matched the - /// previously pushed update exactly. In that case, we do not push the update into `updates`. - /// The update tuple is retained in `self.singleton` in case we see another update and need - /// to recover the singleton to push it into `updates` to join the second update. - fn push_update(&mut self, time: ::Time, diff: ::Diff) { - // If a just-pushed update exactly equals `(time, diff)` we can avoid pushing it. - let t1 = <::ReadItem<'_> as IntoOwned>::borrow_as(&time); - let d1 = <::ReadItem<'_> as IntoOwned>::borrow_as(&diff); - if self.result.times.last().map(|t| t == t1).unwrap_or(false) && self.result.diffs.last().map(|d| d == d1).unwrap_or(false) { - assert!(self.singleton.is_none()); - self.singleton = Some((time, diff)); - } - else { - // If we have pushed a single element, we need to copy it out to meet this one. - if let Some((time, diff)) = self.singleton.take() { - self.result.times.push(time); - self.result.diffs.push(diff); - } - self.result.times.push(time); - self.result.diffs.push(diff); - } - } - } - impl Builder for OrdKeyBuilder where L: for<'a> Layout>>, @@ -1108,16 +1082,12 @@ pub mod key_batch { type Output = OrdKeyBatch; fn with_capacity(keys: usize, _vals: usize, upds: usize) -> Self { - // We don't introduce zero offsets as they will be introduced by the first `push` call. - Self { + Self { result: OrdKeyStorage { keys: L::KeyContainer::with_capacity(keys), - keys_offs: L::OffsetContainer::with_capacity(keys + 1), - times: L::TimeContainer::with_capacity(upds), - diffs: L::DiffContainer::with_capacity(upds), + upds: Upds::with_capacity(keys+1, upds), }, - singleton: None, - singletons: 0, + staging: UpdsBuilder::default(), _marker: PhantomData, } } @@ -1126,15 +1096,16 @@ pub mod key_batch { fn push(&mut self, chunk: &mut Self::Input) { for item in chunk.drain() { let (key, _val, time, diff) = CI::into_parts(item); + if self.result.keys.is_empty() { + self.result.keys.push(key); + self.staging.push(time, diff); + } // Perhaps this is a continuation of an already received key. - if self.result.keys.last().map(|k| CI::key_eq(&key, k)).unwrap_or(false) { - self.push_update(time, diff); + else if self.result.keys.last().map(|k| CI::key_eq(&key, k)).unwrap_or(false) { + self.staging.push(time, diff); } else { - // New key; complete representation of prior key. - self.result.keys_offs.push(self.result.times.len()); - // Remove any pending singleton, and if it was set increment our count. - if self.singleton.take().is_some() { self.singletons += 1; } - self.push_update(time, diff); + self.staging.seal(&mut self.result.upds); + self.staging.push(time, diff); self.result.keys.push(key); } } @@ -1142,12 +1113,9 @@ pub mod key_batch { #[inline(never)] fn done(mut self, description: Description) -> OrdKeyBatch { - // Record the final offsets - self.result.keys_offs.push(self.result.times.len()); - // Remove any pending singleton, and if it was set increment our count. - if self.singleton.take().is_some() { self.singletons += 1; } + self.staging.seal(&mut self.result.upds); OrdKeyBatch { - updates: self.result.times.len() + self.singletons, + updates: self.staging.total(), storage: self.result, description, } @@ -1159,7 +1127,7 @@ pub mod key_batch { for mut chunk in chain.drain(..) { builder.push(&mut chunk); } - + builder.done(description) } }