Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 38 additions & 119 deletions differential-dataflow/examples/columnar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ pub mod dd_builder {
use differential_dataflow::trace::implementations::Layout;
use differential_dataflow::trace::implementations::Update;
use differential_dataflow::trace::implementations::BatchContainer;
use differential_dataflow::trace::implementations::ord_neu::{OrdValBatch, val_batch::OrdValStorage, OrdKeyBatch};
use differential_dataflow::trace::implementations::ord_neu::{OrdValBatch, val_batch::OrdValStorage, OrdKeyBatch, Vals, Upds, layers::UpdsBuilder};
use differential_dataflow::trace::implementations::ord_neu::key_batch::OrdKeyStorage;
use crate::Column;

Expand All @@ -597,44 +597,7 @@ pub mod dd_builder {
///
/// This is public to allow container implementors to set and inspect their container.
pub result: OrdValStorage<L>,
singleton: Option<(<L::Target as Update>::Time, <L::Target as Update>::Diff)>,
/// Counts the number of singleton optimizations we performed.
///
/// This number allows us to correctly gauge the total number of updates reflected in a batch,
/// even though `updates.len()` may be much shorter than this amount.
singletons: usize,
}

impl<L: Layout> OrdValBuilder<L> {
/// Pushes a single update, which may set `self.singleton` rather than push.
///
/// This operation is meant to be equivalent to `self.results.updates.push((time, diff))`.
/// However, for "clever" reasons it does not do this. Instead, it looks for opportunities
/// to encode a singleton update with an "absert" update: repeating the most recent offset.
/// This otherwise invalid state encodes "look back one element".
///
/// When `self.singleton` is `Some`, it means that we have seen one update and it matched the
/// previously pushed update exactly. In that case, we do not push the update into `updates`.
/// The update tuple is retained in `self.singleton` in case we see another update and need
/// to recover the singleton to push it into `updates` to join the second update.
fn push_update(&mut self, time: <L::Target as Update>::Time, diff: <L::Target as Update>::Diff) {
// If a just-pushed update exactly equals `(time, diff)` we can avoid pushing it.
if self.result.times.last().map(|t| t == <<L::TimeContainer as BatchContainer>::ReadItem<'_> as IntoOwned>::borrow_as(&time)) == Some(true) &&
self.result.diffs.last().map(|d| d == <<L::DiffContainer as BatchContainer>::ReadItem<'_> as IntoOwned>::borrow_as(&diff)) == Some(true)
{
assert!(self.singleton.is_none());
self.singleton = Some((time, diff));
}
else {
// If we have pushed a single element, we need to copy it out to meet this one.
if let Some((time, diff)) = self.singleton.take() {
self.result.times.push(time);
self.result.diffs.push(diff);
}
self.result.times.push(time);
self.result.diffs.push(diff);
}
}
staging: UpdsBuilder<L::TimeContainer, L::DiffContainer>,
}

// The layout `L` determines the key, val, time, and diff types.
Expand All @@ -656,18 +619,13 @@ pub mod dd_builder {
type Output = OrdValBatch<L>;

fn with_capacity(keys: usize, vals: usize, upds: usize) -> Self {
// We don't introduce zero offsets as they will be introduced by the first `push` call.
Self {
result: OrdValStorage {
keys: L::KeyContainer::with_capacity(keys),
keys_offs: L::OffsetContainer::with_capacity(keys + 1),
vals: L::ValContainer::with_capacity(vals),
vals_offs: L::OffsetContainer::with_capacity(vals + 1),
times: L::TimeContainer::with_capacity(upds),
diffs: L::DiffContainer::with_capacity(upds),
vals: Vals::with_capacity(keys + 1, vals),
upds: Upds::with_capacity(vals + 1, upds),
},
singleton: None,
singletons: 0,
staging: UpdsBuilder::default(),
}
}

Expand All @@ -689,39 +647,40 @@ pub mod dd_builder {
let time = <<L::TimeContainer as BatchContainer>::Owned as Columnar>::into_owned(time);
let diff = <<L::DiffContainer as BatchContainer>::Owned as Columnar>::into_owned(diff);

// Pre-load the first update.
if self.result.keys.is_empty() {
self.result.vals.vals.push(&val);
self.result.keys.push(&key);
self.staging.push(time, diff);
}
// Perhaps this is a continuation of an already received key.
if self.result.keys.last().map(|k| <<L::KeyContainer as BatchContainer>::ReadItem<'_> as IntoOwned>::borrow_as(&key).eq(&k)).unwrap_or(false) {
else if self.result.keys.last().map(|k| <<L::KeyContainer as BatchContainer>::ReadItem<'_> as IntoOwned>::borrow_as(&key).eq(&k)).unwrap_or(false) {
// Perhaps this is a continuation of an already received value.
if self.result.vals.last().map(|v| <<L::ValContainer as BatchContainer>::ReadItem<'_> as IntoOwned>::borrow_as(&val).eq(&v)).unwrap_or(false) {
self.push_update(time, diff);
if self.result.vals.vals.last().map(|v| <<L::ValContainer as BatchContainer>::ReadItem<'_> as IntoOwned>::borrow_as(&val).eq(&v)).unwrap_or(false) {
self.staging.push(time, diff);
} else {
// New value; complete representation of prior value.
self.result.vals_offs.push(self.result.times.len());
if self.singleton.take().is_some() { self.singletons += 1; }
self.push_update(time, diff);
self.result.vals.push(&val);
self.staging.seal(&mut self.result.upds);
self.staging.push(time, diff);
self.result.vals.vals.push(&val);
}
} else {
// New key; complete representation of prior key.
self.result.vals_offs.push(self.result.times.len());
if self.singleton.take().is_some() { self.singletons += 1; }
self.result.keys_offs.push(self.result.vals.len());
self.push_update(time, diff);
self.result.vals.push(&val);
self.staging.seal(&mut self.result.upds);
self.staging.push(time, diff);
self.result.vals.offs.push(self.result.vals.len());
self.result.vals.vals.push(&val);
self.result.keys.push(&key);
}
}
}

#[inline(never)]
fn done(mut self, description: Description<Self::Time>) -> OrdValBatch<L> {
// Record the final offsets
self.result.vals_offs.push(self.result.times.len());
// Remove any pending singleton, and if it was set increment our count.
if self.singleton.take().is_some() { self.singletons += 1; }
self.result.keys_offs.push(self.result.vals.len());
self.staging.seal(&mut self.result.upds);
self.result.vals.offs.push(self.result.vals.len());
OrdValBatch {
updates: self.result.times.len() + self.singletons,
updates: self.staging.total(),
storage: self.result,
description,
}
Expand All @@ -745,44 +704,7 @@ pub mod dd_builder {
///
/// This is public to allow container implementors to set and inspect their container.
pub result: OrdKeyStorage<L>,
singleton: Option<(<L::Target as Update>::Time, <L::Target as Update>::Diff)>,
/// Counts the number of singleton optimizations we performed.
///
/// This number allows us to correctly gauge the total number of updates reflected in a batch,
/// even though `updates.len()` may be much shorter than this amount.
singletons: usize,
}

impl<L: Layout> OrdKeyBuilder<L> {
/// Pushes a single update, which may set `self.singleton` rather than push.
///
/// This operation is meant to be equivalent to `self.results.updates.push((time, diff))`.
/// However, for "clever" reasons it does not do this. Instead, it looks for opportunities
/// to encode a singleton update with an "absert" update: repeating the most recent offset.
/// This otherwise invalid state encodes "look back one element".
///
/// When `self.singleton` is `Some`, it means that we have seen one update and it matched the
/// previously pushed update exactly. In that case, we do not push the update into `updates`.
/// The update tuple is retained in `self.singleton` in case we see another update and need
/// to recover the singleton to push it into `updates` to join the second update.
fn push_update(&mut self, time: <L::Target as Update>::Time, diff: <L::Target as Update>::Diff) {
// If a just-pushed update exactly equals `(time, diff)` we can avoid pushing it.
if self.result.times.last().map(|t| t == <<L::TimeContainer as BatchContainer>::ReadItem<'_> as IntoOwned>::borrow_as(&time)) == Some(true) &&
self.result.diffs.last().map(|d| d == <<L::DiffContainer as BatchContainer>::ReadItem<'_> as IntoOwned>::borrow_as(&diff)) == Some(true)
{
assert!(self.singleton.is_none());
self.singleton = Some((time, diff));
}
else {
// If we have pushed a single element, we need to copy it out to meet this one.
if let Some((time, diff)) = self.singleton.take() {
self.result.times.push(time);
self.result.diffs.push(diff);
}
self.result.times.push(time);
self.result.diffs.push(diff);
}
}
staging: UpdsBuilder<L::TimeContainer, L::DiffContainer>,
}

// The layout `L` determines the key, val, time, and diff types.
Expand All @@ -804,16 +726,12 @@ pub mod dd_builder {
type Output = OrdKeyBatch<L>;

fn with_capacity(keys: usize, _vals: usize, upds: usize) -> Self {
// We don't introduce zero offsets as they will be introduced by the first `push` call.
Self {
result: OrdKeyStorage {
keys: L::KeyContainer::with_capacity(keys),
keys_offs: L::OffsetContainer::with_capacity(keys + 1),
times: L::TimeContainer::with_capacity(upds),
diffs: L::DiffContainer::with_capacity(upds),
upds: Upds::with_capacity(keys + 1, upds),
},
singleton: None,
singletons: 0,
staging: UpdsBuilder::default(),
}
}

Expand All @@ -834,27 +752,28 @@ pub mod dd_builder {
let time = <<L::TimeContainer as BatchContainer>::Owned as Columnar>::into_owned(time);
let diff = <<L::DiffContainer as BatchContainer>::Owned as Columnar>::into_owned(diff);

// Pre-load the first update.
if self.result.keys.is_empty() {
self.result.keys.push(&key);
self.staging.push(time, diff);
}
// Perhaps this is a continuation of an already received key.
if self.result.keys.last().map(|k| <<L::KeyContainer as BatchContainer>::ReadItem<'_> as IntoOwned>::borrow_as(&key).eq(&k)).unwrap_or(false) {
self.push_update(time, diff);
else if self.result.keys.last().map(|k| <<L::KeyContainer as BatchContainer>::ReadItem<'_> as IntoOwned>::borrow_as(&key).eq(&k)).unwrap_or(false) {
self.staging.push(time, diff);
} else {
// New key; complete representation of prior key.
self.result.keys_offs.push(self.result.times.len());
if self.singleton.take().is_some() { self.singletons += 1; }
self.push_update(time, diff);
self.staging.seal(&mut self.result.upds);
self.staging.push(time, diff);
self.result.keys.push(&key);
}
}
}

#[inline(never)]
fn done(mut self, description: Description<Self::Time>) -> OrdKeyBatch<L> {
// Record the final offsets
self.result.keys_offs.push(self.result.times.len());
// Remove any pending singleton, and if it was set increment our count.
if self.singleton.take().is_some() { self.singletons += 1; }
self.staging.seal(&mut self.result.upds);
OrdKeyBatch {
updates: self.result.times.len() + self.singletons,
updates: self.staging.total(),
storage: self.result,
description,
}
Expand Down
Loading