Thanks to visit codestin.com
Credit goes to github.com

Skip to content
This repository was archived by the owner on Jan 20, 2022. It is now read-only.
Merged
3 changes: 1 addition & 2 deletions project/Build.scala
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,7 @@ object SummingbirdBuild extends Build {
withCross("com.twitter" %% "util-core" % utilVersion)
)
).dependsOn(
summingbirdCore % "test->test;compile->compile",
summingbirdBatch
summingbirdCore % "test->test;compile->compile"
)

lazy val summingbirdStorm = module("storm").settings(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,7 @@ trait AsyncCache[Key, Value] {
def insert(vals: TraversableOnce[(Key, Value)]): Future[Map[Key, Value]]
def cleanup: Future[Unit] = Future.Unit
}

trait CacheBuilder[Key, Value] extends Serializable {
def apply(sg: Semigroup[Value]): AsyncCache[Key, Value]
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ class FunctionFlatMapOperation[T, U](@transient fm: T => TraversableOnce[U])
def apply(t: T) = Future.value(boxed.get(t))
}

class GenericFlatMapOperation[T, U](@transient fm: T => Future[TraversableOnce[U]])
extends FlatMapOperation[T, U] {
val boxed = Externalizer(fm)
def apply(t: T) = boxed.get(t)
}

class FunctionKeyFlatMapOperation[K1, K2, V](@transient fm: K1 => TraversableOnce[K2])
extends FlatMapOperation[(K1, V), (K2, V)] {
val boxed = Externalizer(fm)
Expand All @@ -91,6 +97,9 @@ object FlatMapOperation {
def apply[T, U](fm: T => TraversableOnce[U]): FlatMapOperation[T, U] =
new FunctionFlatMapOperation(fm)

def generic[T, U](fm: T => Future[TraversableOnce[U]]): FlatMapOperation[T, U] =
new GenericFlatMapOperation(fm)

def keyFlatMap[K1, K2, V](fm: K1 => TraversableOnce[K2]): FlatMapOperation[(K1, V), (K2, V)] =
new FunctionKeyFlatMapOperation(fm)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,13 @@ import org.slf4j.{LoggerFactory, Logger}
*/

object MultiTriggerCache {
def builder[Key, Value](cacheSize: CacheSize, valueCombinerCacheSize: ValueCombinerCacheSize, flushFrequency: FlushFrequency, softMemoryFlush: SoftMemoryFlushPercent, poolSize: AsyncPoolSize) =
{(sg: Semigroup[Value]) =>
new MultiTriggerCache[Key, Value](cacheSize, valueCombinerCacheSize, flushFrequency, softMemoryFlush, poolSize)(sg) }
def builder[Key, Value](cacheSize: CacheSize, valueCombinerCacheSize: ValueCombinerCacheSize,
flushFrequency: FlushFrequency, softMemoryFlush: SoftMemoryFlushPercent,
poolSize: AsyncPoolSize): CacheBuilder[Key, Value] =
new CacheBuilder[Key, Value] {
def apply(sg: Semigroup[Value]) =
new MultiTriggerCache[Key, Value](cacheSize, valueCombinerCacheSize, flushFrequency, softMemoryFlush, poolSize)(sg)
}
}

case class MultiTriggerCache[Key, Value](cacheSizeOpt: CacheSize, valueCombinerCacheSize: ValueCombinerCacheSize, flushFrequency: FlushFrequency, softMemoryFlush: SoftMemoryFlushPercent, poolSize: AsyncPoolSize)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@ import org.slf4j.{LoggerFactory, Logger}
*/
object SummingQueueCache {
def builder[Key, Value](cacheSize: CacheSize, flushFrequency: FlushFrequency) =
{(sg: Semigroup[Value]) =>
new SummingQueueCache[Key, Value](cacheSize, flushFrequency)(sg) }
new CacheBuilder[Key, Value] {
def apply(sg: Semigroup[Value]) =
new SummingQueueCache[Key, Value](cacheSize, flushFrequency)(sg)
}
}

case class SummingQueueCache[Key, Value](cacheSizeOpt: CacheSize, flushFrequency: FlushFrequency)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import scala.util.{Try, Success, Failure}
import java.util.concurrent.TimeoutException
import org.slf4j.{LoggerFactory, Logger}


abstract class AsyncBase[I,O,S,D](maxWaitingFutures: MaxWaitingFutures, maxWaitingTime: MaxFutureWaitTime, maxEmitPerExec: MaxEmitPerExecute) extends Serializable with OperationContainer[I,O,S,D] {

@transient protected lazy val logger: Logger = LoggerFactory.getLogger(getClass)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@ import com.twitter.bijection.Injection
import com.twitter.util.Future

import com.twitter.summingbird.online.Externalizer
import com.twitter.summingbird.batch.{ Batcher, BatchID, Timestamp}

import com.twitter.summingbird.online.{FlatMapOperation, AsyncCache}
import com.twitter.summingbird.online.{FlatMapOperation, AsyncCache, CacheBuilder}
import com.twitter.summingbird.option.CacheSize
import com.twitter.summingbird.online.option.{
MaxWaitingFutures,
Expand All @@ -42,46 +41,43 @@ import com.twitter.summingbird.online.option.{

class FinalFlatMap[Event, Key, Value, S, D](
@transient flatMapOp: FlatMapOperation[Event, (Key, Value)],
cacheBuilder: (Semigroup[(List[InputState[S]], Timestamp, Value)]) => AsyncCache[(Key, BatchID), (List[InputState[S]], Timestamp, Value)],
cacheBuilder: CacheBuilder[Key, (List[InputState[S]], Value)],
maxWaitingFutures: MaxWaitingFutures,
maxWaitingTime: MaxFutureWaitTime,
maxEmitPerExec: MaxEmitPerExecute,
pDecoder: Injection[(Timestamp, Event), D],
pEncoder: Injection[((Key, BatchID), (Timestamp, Value)), D]
pDecoder: Injection[Event, D],
pEncoder: Injection[(Key, Value), D]
)
(implicit monoid: Semigroup[Value], batcher: Batcher)
extends AsyncBase[(Timestamp, Event), ((Key, BatchID), (Timestamp, Value)), InputState[S], D](maxWaitingFutures,
(implicit monoid: Semigroup[Value])
extends AsyncBase[Event, (Key, Value), InputState[S], D](maxWaitingFutures,
maxWaitingTime,
maxEmitPerExec) {
val encoder = pEncoder
val decoder = pDecoder

val lockedOp = Externalizer(flatMapOp)

lazy val sCache: AsyncCache[(Key, BatchID), (List[InputState[S]], Timestamp, Value)] = cacheBuilder(implicitly[Semigroup[(List[InputState[S]], Timestamp, Value)]])
lazy val sCache: AsyncCache[Key, (List[InputState[S]], Value)] = cacheBuilder(implicitly[Semigroup[(List[InputState[S]], Value)]])


private def formatResult(outData: Map[(Key, BatchID), (List[InputState[S]], Timestamp, Value)])
: Iterable[(List[InputState[S]], Future[TraversableOnce[((Key, BatchID), (Timestamp, Value))]])] = {
outData.toList.map{ case ((key, batchID), (tupList, ts, value)) =>
(tupList, Future.value(List(((key, batchID), (ts, value)))))
private def formatResult(outData: Map[Key, (List[InputState[S]], Value)])
: Iterable[(List[InputState[S]], Future[TraversableOnce[(Key, Value)]])] = {
outData.toList.map{ case (key, (tupList, value)) =>
(tupList, Future.value(List((key, value))))
}
}

override def tick: Future[Iterable[(List[InputState[S]], Future[TraversableOnce[((Key, BatchID), (Timestamp, Value))]])]] = {
override def tick: Future[Iterable[(List[InputState[S]], Future[TraversableOnce[(Key, Value)]])]] = {
sCache.tick.map(formatResult(_))
}

def cache(state: InputState[S],
time: Timestamp,
items: TraversableOnce[(Key, Value)]): Future[Iterable[(List[InputState[S]], Future[TraversableOnce[((Key, BatchID), (Timestamp, Value))]])]] = {
items: TraversableOnce[(Key, Value)]): Future[Iterable[(List[InputState[S]], Future[TraversableOnce[(Key, Value)]])]] = {

val batchID = batcher.batchOf(time)
val itemL = items.toList
if(itemL.size > 0) {
state.fanOut(itemL.size - 1) // Since input state starts at a 1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where is the batch added now?


sCache.insert(itemL.map{case (k, v) => (k, batchID) -> (List(state), time, v)}).map(formatResult(_))
sCache.insert(itemL.map{case (k, v) => k -> (List(state), v)}).map(formatResult(_))
}
else { // Here we handle mapping to nothing, option map et. al
Future.value(
Expand All @@ -93,8 +89,8 @@ class FinalFlatMap[Event, Key, Value, S, D](
}

override def apply(state: InputState[S],
timeIn: (Timestamp, Event)) =
lockedOp.get.apply(timeIn._2).map { cache(state, timeIn._1, _) }.flatten
tup: Event) =
lockedOp.get.apply(tup).map { cache(state, _) }.flatten

override def cleanup {
lockedOp.get.close
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package com.twitter.summingbird.online.executor
import com.twitter.util.Future

import com.twitter.bijection.Injection
import com.twitter.summingbird.batch.Timestamp
import com.twitter.summingbird.online.Externalizer
import com.twitter.summingbird.online.FlatMapOperation
import com.twitter.summingbird.online.option.{
Expand All @@ -34,9 +33,9 @@ class IntermediateFlatMap[T,U,S,D](
maxWaitingFutures: MaxWaitingFutures,
maxWaitingTime: MaxFutureWaitTime,
maxEmitPerExec: MaxEmitPerExecute,
pDecoder: Injection[(Timestamp, T), D],
pEncoder: Injection[(Timestamp, U), D]
) extends AsyncBase[(Timestamp, T), (Timestamp, U), S, D](maxWaitingFutures, maxWaitingTime, maxEmitPerExec) {
pDecoder: Injection[T, D],
pEncoder: Injection[U, D]
) extends AsyncBase[T, U, S, D](maxWaitingFutures, maxWaitingTime, maxEmitPerExec) {

val encoder = pEncoder
val decoder = pDecoder
Expand All @@ -45,9 +44,9 @@ class IntermediateFlatMap[T,U,S,D](


override def apply(state: S,
timeT: (Timestamp, T)): Future[Iterable[(List[S], Future[TraversableOnce[(Timestamp, U)]])]] =
lockedOp.get.apply(timeT._2).map { res =>
List((List(state), Future.value(res.map((timeT._1, _)))))
tup: T): Future[Iterable[(List[S], Future[TraversableOnce[U]])]] =
lockedOp.get.apply(tup).map { res =>
List((List(state), Future.value(res)))
}

override def cleanup { lockedOp.get.close }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package com.twitter.summingbird.online.executor

import scala.util.Try
import com.twitter.bijection.Injection
import com.twitter.summingbird.batch.Timestamp

trait OperationContainer[Input, Output, State, WireFmt] {
def decoder: Injection[Input, WireFmt]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ import com.twitter.algebird.{Semigroup, SummingQueue}
import com.twitter.storehaus.algebra.Mergeable
import com.twitter.bijection.Injection

import com.twitter.summingbird.online.{Externalizer, AsyncCache}
import com.twitter.summingbird.batch.{BatchID, Timestamp}
import com.twitter.summingbird.online.{FlatMapOperation, Externalizer, AsyncCache, CacheBuilder}
import com.twitter.summingbird.online.option._
import com.twitter.summingbird.option.CacheSize

Expand Down Expand Up @@ -50,34 +49,34 @@ import com.twitter.summingbird.option.CacheSize
* @author Ashu Singhal
*/

class Summer[Key, Value: Semigroup, S, D](
@transient storeSupplier: () => Mergeable[(Key,BatchID), Value],
class Summer[Key, Value: Semigroup, Event, S, D](
@transient storeSupplier: () => Mergeable[Key, Value],
@transient flatMapOp: FlatMapOperation[(Key, (Option[Value], Value)), Event],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is new, right? Adding on a flat map operation directly?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need this rather than make a combinator on AsyncBase directly? Can't we glue two together?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Combining two async's? the return types of apply/tick in AsyncBase make this quite tricky. Having all our bolts include FM operations seems like it might be handy ? (standard, and we can plan downstream maps/option map's etc.. into the back of a summer?). I can look more at this, not entirely sure what the types of the second AsyncBase would need to look like here for this to work

@transient successHandler: OnlineSuccessHandler,
@transient exceptionHandler: OnlineExceptionHandler,
cacheBuilder: (Semigroup[(List[S], Timestamp, Value)]) => AsyncCache[(Key, BatchID), (List[S], Timestamp, Value)],
cacheBuilder: CacheBuilder[Key, (List[S], Value)],
maxWaitingFutures: MaxWaitingFutures,
maxWaitingTime: MaxFutureWaitTime,
maxEmitPerExec: MaxEmitPerExecute,
includeSuccessHandler: IncludeSuccessHandler,
pDecoder: Injection[((Key, BatchID), (Timestamp, Value)), D],
pEncoder: Injection[(Timestamp, (Key, (Option[Value], Value))), D]) extends
AsyncBase[((Key, BatchID), (Timestamp, Value)), (Timestamp, (Key, (Option[Value], Value))), S, D](
pDecoder: Injection[(Key, Value), D],
pEncoder: Injection[Event, D]) extends
AsyncBase[(Key, Value), Event, S, D](
maxWaitingFutures,
maxWaitingTime,
maxEmitPerExec) {

val lockedOp = Externalizer(flatMapOp)
val encoder = pEncoder
val decoder = pDecoder

val storeBox = Externalizer(storeSupplier)
lazy val store = storeBox.get.apply

// See MaxWaitingFutures for a todo around removing this.
lazy val sCache: AsyncCache[(Key, BatchID), (List[S], Timestamp, Value)] = cacheBuilder(implicitly[Semigroup[(List[S], Timestamp, Value)]])
lazy val sCache: AsyncCache[Key, (List[S], Value)] = cacheBuilder(implicitly[Semigroup[(List[S], Value)]])

val exceptionHandlerBox = Externalizer(exceptionHandler.handlerFn.lift)
val successHandlerBox = Externalizer(successHandler)

var successHandlerOpt: Option[OnlineSuccessHandler] = null

override def init {
Expand All @@ -90,24 +89,20 @@ class Summer[Key, Value: Semigroup, S, D](
exceptionHandlerBox.get.apply(error)
}

private def handleResult(kvs: Map[(Key, BatchID), (List[S], Timestamp, Value)])
: Iterable[(List[S], Future[TraversableOnce[(Timestamp, (Key, (Option[Value], Value)))]])] = {
store.multiMerge(kvs.mapValues(_._3)).map{ case (innerKb, beforeF) =>
val (tups, stamp, delta) = kvs(innerKb)
val (k, _) = innerKb
(tups, beforeF.map(before => List((stamp, (k, (before, delta)))))
.onSuccess { _ => successHandlerOpt.get.handlerFn.apply() } )
}
.toList // force, but order does not matter, so we could optimize this
}
private def handleResult(kvs: Map[Key, (List[S], Value)]): TraversableOnce[(List[S], Future[TraversableOnce[Event]])] =
store.multiMerge(kvs.mapValues(_._2)).iterator.map { case (k, beforeF) =>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is lazy (iterator). can you comment that is okay because the AsyncBase will always go through the whole list exactly once to put the results on to storm (the emit phase).

But I'm worried here that the success handler is lost if there is no downstream output. How will that get called there?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is just a bug, i don't think we can/want to guarantee that its forced to materialize downstream. I've added a toList to force it at the end of the function. (Keeping the toIterator here so we don't materialize the intermediate list)

val (tups, delta) = kvs(k)
(tups, beforeF.flatMap { before =>
lockedOp.get.apply((k, (before, delta)))
}.onSuccess { _ => successHandlerOpt.get.handlerFn.apply() } )
}.toList

override def tick = sCache.tick.map(handleResult(_))

override def apply(state: S,
tsIn: ((Key, BatchID), (Timestamp, Value))) = {
val (kb, (ts, v)) = tsIn
sCache.insert(List(kb -> (List(state), ts, v))).map(handleResult(_))
override def apply(state: S, tup: (Key, Value)) = {
val (k, v) = tup
sCache.insert(List(k -> (List(state), v))).map(handleResult(_))
}

override def cleanup { Await.result(store.close) }
override def cleanup = Await.result(store.close)
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import com.twitter.algebird.{MapAlgebra, Semigroup}
import com.twitter.storehaus.{ ReadableStore, JMapStore }
import com.twitter.storehaus.algebra.MergeableStore
import com.twitter.summingbird._
import com.twitter.summingbird.batch.{BatchID, Batcher}
import com.twitter.summingbird.memory._
import com.twitter.summingbird.planner._
import com.twitter.util.Future
Expand Down
Loading