-
Couldn't load subscription status.
- Fork 266
Factor batch from online #455
Changes from all commits
464dc72
e3e3906
7fe4b50
c293b1a
3100a54
73c2562
2fef9d7
18cb363
da5fbd3
e0937fa
61d4d83
43496db
4a05a55
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,8 +21,7 @@ import com.twitter.algebird.{Semigroup, SummingQueue} | |
| import com.twitter.storehaus.algebra.Mergeable | ||
| import com.twitter.bijection.Injection | ||
|
|
||
| import com.twitter.summingbird.online.{Externalizer, AsyncCache} | ||
| import com.twitter.summingbird.batch.{BatchID, Timestamp} | ||
| import com.twitter.summingbird.online.{FlatMapOperation, Externalizer, AsyncCache, CacheBuilder} | ||
| import com.twitter.summingbird.online.option._ | ||
| import com.twitter.summingbird.option.CacheSize | ||
|
|
||
|
|
@@ -50,34 +49,34 @@ import com.twitter.summingbird.option.CacheSize | |
| * @author Ashu Singhal | ||
| */ | ||
|
|
||
| class Summer[Key, Value: Semigroup, S, D]( | ||
| @transient storeSupplier: () => Mergeable[(Key,BatchID), Value], | ||
| class Summer[Key, Value: Semigroup, Event, S, D]( | ||
| @transient storeSupplier: () => Mergeable[Key, Value], | ||
| @transient flatMapOp: FlatMapOperation[(Key, (Option[Value], Value)), Event], | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is new, right? Adding on a flat map operation directly? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need this rather than make a combinator on AsyncBase directly? Can't we glue two together? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Combining two async's? the return types of apply/tick in AsyncBase make this quite tricky. Having all our bolts include FM operations seems like it might be handy ? (standard, and we can plan downstream maps/option map's etc.. into the back of a summer?). I can look more at this, not entirely sure what the types of the second AsyncBase would need to look like here for this to work |
||
| @transient successHandler: OnlineSuccessHandler, | ||
| @transient exceptionHandler: OnlineExceptionHandler, | ||
| cacheBuilder: (Semigroup[(List[S], Timestamp, Value)]) => AsyncCache[(Key, BatchID), (List[S], Timestamp, Value)], | ||
| cacheBuilder: CacheBuilder[Key, (List[S], Value)], | ||
| maxWaitingFutures: MaxWaitingFutures, | ||
| maxWaitingTime: MaxFutureWaitTime, | ||
| maxEmitPerExec: MaxEmitPerExecute, | ||
| includeSuccessHandler: IncludeSuccessHandler, | ||
| pDecoder: Injection[((Key, BatchID), (Timestamp, Value)), D], | ||
| pEncoder: Injection[(Timestamp, (Key, (Option[Value], Value))), D]) extends | ||
| AsyncBase[((Key, BatchID), (Timestamp, Value)), (Timestamp, (Key, (Option[Value], Value))), S, D]( | ||
| pDecoder: Injection[(Key, Value), D], | ||
| pEncoder: Injection[Event, D]) extends | ||
| AsyncBase[(Key, Value), Event, S, D]( | ||
| maxWaitingFutures, | ||
| maxWaitingTime, | ||
| maxEmitPerExec) { | ||
|
|
||
| val lockedOp = Externalizer(flatMapOp) | ||
| val encoder = pEncoder | ||
| val decoder = pDecoder | ||
|
|
||
| val storeBox = Externalizer(storeSupplier) | ||
| lazy val store = storeBox.get.apply | ||
|
|
||
| // See MaxWaitingFutures for a todo around removing this. | ||
| lazy val sCache: AsyncCache[(Key, BatchID), (List[S], Timestamp, Value)] = cacheBuilder(implicitly[Semigroup[(List[S], Timestamp, Value)]]) | ||
| lazy val sCache: AsyncCache[Key, (List[S], Value)] = cacheBuilder(implicitly[Semigroup[(List[S], Value)]]) | ||
|
|
||
| val exceptionHandlerBox = Externalizer(exceptionHandler.handlerFn.lift) | ||
| val successHandlerBox = Externalizer(successHandler) | ||
|
|
||
| var successHandlerOpt: Option[OnlineSuccessHandler] = null | ||
|
|
||
| override def init { | ||
|
|
@@ -90,24 +89,20 @@ class Summer[Key, Value: Semigroup, S, D]( | |
| exceptionHandlerBox.get.apply(error) | ||
| } | ||
|
|
||
| private def handleResult(kvs: Map[(Key, BatchID), (List[S], Timestamp, Value)]) | ||
| : Iterable[(List[S], Future[TraversableOnce[(Timestamp, (Key, (Option[Value], Value)))]])] = { | ||
| store.multiMerge(kvs.mapValues(_._3)).map{ case (innerKb, beforeF) => | ||
| val (tups, stamp, delta) = kvs(innerKb) | ||
| val (k, _) = innerKb | ||
| (tups, beforeF.map(before => List((stamp, (k, (before, delta))))) | ||
| .onSuccess { _ => successHandlerOpt.get.handlerFn.apply() } ) | ||
| } | ||
| .toList // force, but order does not matter, so we could optimize this | ||
| } | ||
| private def handleResult(kvs: Map[Key, (List[S], Value)]): TraversableOnce[(List[S], Future[TraversableOnce[Event]])] = | ||
| store.multiMerge(kvs.mapValues(_._2)).iterator.map { case (k, beforeF) => | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is lazy (iterator). can you comment that is okay because the AsyncBase will always go through the whole list exactly once to put the results on to storm (the emit phase). But I'm worried here that the success handler is lost if there is no downstream output. How will that get called there? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is just a bug, i don't think we can/want to guarantee that its forced to materialize downstream. I've added a toList to force it at the end of the function. (Keeping the toIterator here so we don't materialize the intermediate list) |
||
| val (tups, delta) = kvs(k) | ||
| (tups, beforeF.flatMap { before => | ||
| lockedOp.get.apply((k, (before, delta))) | ||
| }.onSuccess { _ => successHandlerOpt.get.handlerFn.apply() } ) | ||
| }.toList | ||
|
|
||
| override def tick = sCache.tick.map(handleResult(_)) | ||
|
|
||
| override def apply(state: S, | ||
| tsIn: ((Key, BatchID), (Timestamp, Value))) = { | ||
| val (kb, (ts, v)) = tsIn | ||
| sCache.insert(List(kb -> (List(state), ts, v))).map(handleResult(_)) | ||
| override def apply(state: S, tup: (Key, Value)) = { | ||
| val (k, v) = tup | ||
| sCache.insert(List(k -> (List(state), v))).map(handleResult(_)) | ||
| } | ||
|
|
||
| override def cleanup { Await.result(store.close) } | ||
| override def cleanup = Await.result(store.close) | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
where is the batch added now?