Thanks to visit codestin.com
Credit goes to github.com

Skip to content
This repository was archived by the owner on Jan 20, 2022. It is now read-only.
Merged
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*
Copyright 2014 Twitter, Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package com.twitter.summingbird.online

import com.twitter.algebird.Semigroup
import com.twitter.summingbird.option.CacheSize
import com.twitter.util.{Future, Promise, FuturePool, Await}
import com.twitter.summingbird.online.option.{FlushFrequency, SoftMemoryFlushPercent}
import scala.collection.JavaConverters._
import scala.collection.JavaConversions._
import scala.collection.mutable.ListBuffer
import java.util.concurrent._
import org.slf4j.{LoggerFactory, Logger}

object BackgroundCompactionCache {
def builder[Key, Value](cacheSize: CacheSize, flushFrequency: FlushFrequency, softMemoryFlush: SoftMemoryFlushPercent): CacheBuilder[Key, Value] =
new CacheBuilder[Key, Value] {
def apply(sg: Semigroup[Value]) = {
BackgroundCompactionCache(cacheSize, flushFrequency, softMemoryFlush)(sg)
}
}
def apply[Key, Value](cacheSize: CacheSize,
flushFrequency: FlushFrequency,
softMemoryFlush: SoftMemoryFlushPercent)
(implicit sg: Semigroup[Value]): AsyncCache[Key, Value] = {
cacheSize.size.map { _ =>
new NonEmptyBackgroundCompactionCache[Key, Value](cacheSize, flushFrequency, softMemoryFlush)(sg)
}.getOrElse(new EmptyBackgroundCompactionCache[Key, Value]()(sg))
}
}

private[summingbird] trait WithFlushConditions[Key, Value] extends AsyncCache[Key, Value] {
protected var lastDump:Long = System.currentTimeMillis
protected def softMemoryFlush: SoftMemoryFlushPercent
protected def flushFrequency: FlushFrequency

protected def timedOut = (System.currentTimeMillis - lastDump) >= flushFrequency.get.inMilliseconds
protected lazy val runtime = Runtime.getRuntime

protected def didFlush {lastDump = System.currentTimeMillis}

protected def memoryWaterMark = {
val used = ((runtime.totalMemory - runtime.freeMemory).toDouble * 100) / runtime.maxMemory
used > softMemoryFlush.get
}
def tick: Future[Map[Key, Value]] = {
if (timedOut || memoryWaterMark) {
forceTick
}
else {
Future.value(Map.empty)
}
}
}

private[summingbird] trait ParallelCleanup[Key, Value] extends AsyncCache[Key, Value] {
protected def executor: ExecutorService
protected lazy val futurePool = FuturePool(executor)

override def cleanup = {
Future {
executor.shutdown
executor.awaitTermination(10, TimeUnit.SECONDS)
}.flatMap(f => super.cleanup)
}
}

class EmptyBackgroundCompactionCache[Key, Value](implicit semigroup: Semigroup[Value])
extends AsyncCache[Key, Value] {
def forceTick: Future[Map[Key, Value]] = Future.value(Map.empty)
def tick: Future[Map[Key, Value]] = Future.value(Map.empty)
def insert(vals: TraversableOnce[(Key, Value)]): Future[Map[Key, Value]] = Future.value(Semigroup.sumOption(vals.map(Map(_))).getOrElse(Map.empty))
}


class NonEmptyBackgroundCompactionCache[Key, Value](cacheSizeOpt: CacheSize,
override val flushFrequency: FlushFrequency,
override val softMemoryFlush: SoftMemoryFlushPercent)
(implicit semigroup: Semigroup[Value])
extends AsyncCache[Key, Value] with ParallelCleanup[Key, Value] with WithFlushConditions[Key, Value] {

protected override val executor = Executors.newFixedThreadPool(2)
protected val logger: Logger = LoggerFactory.getLogger(getClass)
protected val cacheSize = cacheSizeOpt.size.get

private val queue: ArrayBlockingQueue[Map[Key, Value]] = new ArrayBlockingQueue[Map[Key, Value]](cacheSize, true)

override def forceTick: Future[Map[Key, Value]] = {
didFlush // bumps timeout on the flush conditions
val toSum = ListBuffer[Map[Key, Value]]()
queue.drainTo(toSum.asJava)
futurePool {
Semigroup.sumOption(toSum).getOrElse(Map.empty)
}
}

def insert(vals: TraversableOnce[(Key, Value)]): Future[Map[Key, Value]] = {
val curData = Semigroup.sumOption(vals.map(Map(_))).getOrElse(Map.empty)
if(!queue.offer(curData)) {
forceTick.map { flushRes =>
Semigroup.plus(flushRes, curData)
}
}
else {
Future.value(Map.empty)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ class Summer[Key, Value: Semigroup, Event, S, D](

override def init {
super.init
store.toString // Do the lazy evaluation now so we can connect before tuples arrive.
successHandlerOpt = if (includeSuccessHandler.get) Some(successHandlerBox.get) else None
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
Copyright 2013 Twitter, Inc.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2014?


Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package com.twitter.summingbird.online

import com.twitter.summingbird.online.option._
import com.twitter.summingbird.option._
import com.twitter.summingbird.planner._
import com.twitter.summingbird.memory.Memory
import com.twitter.algebird.{MapAlgebra, Semigroup}
import com.twitter.util.{Future, Await}
import scala.collection.mutable.{Map => MMap}
import org.scalacheck._
import Gen._
import Arbitrary._
import org.scalacheck.Prop._
import scala.util.Random
import com.twitter.util.Duration

object BackgroundCompactionCacheProperties extends Properties("BackgroundCompactionCache") {

implicit def arbFlushFreq = Arbitrary {
Gen.choose(1, 4000)
.map { x: Int => FlushFrequency(Duration.fromMilliseconds(x)) }
}

implicit def arbCacheSize = Arbitrary {
Gen.choose(0, 10)
.map { x =>
CacheSize(x) }
}

def sample[T: Arbitrary]: T = Arbitrary.arbitrary[T].sample.get

property("Summing with and without the cache should match") = forAll { inputs: List[List[(Int, Int)]] =>
val cache = BackgroundCompactionCache[Int, Int](sample[CacheSize], sample[FlushFrequency], SoftMemoryFlushPercent(80.0F))
val reference = MapAlgebra.sumByKey(inputs.flatten)
val resA = Await.result(Future.collect(inputs.map(cache.insert(_)))).map(_.toList).flatten
val resB = Await.result(cache.forceTick)
val other = MapAlgebra.sumByKey(resA.toList ++ resB.toList)
val res = Equiv[Map[Int, Int]].equiv(
reference,
other
)
Await.ready(cache.cleanup)
res
}

property("Input Set must not get duplicates") = forAll { (ids: Set[Int], inputs: List[List[(Int, Int)]]) =>
val cache = BackgroundCompactionCache[Int, (List[Int], Int)](sample[CacheSize], sample[FlushFrequency], SoftMemoryFlushPercent(80.0F))
val idList = (ids ++ Set(1)).toList
var refCount = MMap[Int, Int]()
val realInputs = inputs.map{ iList =>
iList.map{ case (k, v) =>
val id = idList(Random.nextInt(idList.size))
refCount += (id -> (refCount.getOrElse(id, 0) + 1))
(k, (List(id), v))
}
}.toList

val reference = MapAlgebra.sumByKey(realInputs.flatten).mapValues(tupV => (tupV._1.sorted, tupV._2))
val resA = realInputs.map(cache.insert(_)).map(Await.result(_)).map(_.toList).flatten
val resB = Await.result(cache.forceTick)
val other = MapAlgebra.sumByKey(resA.toList ++ resB.toList).mapValues(tupV => (tupV._1.sorted, tupV._2))
Await.ready(cache.cleanup)

val equiv = Equiv[Map[Int, (List[Int], Int)]].equiv(
reference,
other
)
if(equiv) {
val postFreq = MapAlgebra.sumByKey(other.map(_._2._1).flatten.map((_, 1)))
Equiv[Map[Int, Int]].equiv(
refCount.toMap,
postFreq
)
} else {
equiv
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ object StormTestRun {
try {
val cluster = new LocalCluster()
cluster.submitTopology("test topology", plannedTopology.config, plannedTopology.topology)
Thread.sleep(4000)
Thread.sleep(4500)
cluster.killTopology("test topology")
Thread.sleep(1000)
Thread.sleep(1500)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

flaky test fix? Is there a better way?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nah, not with the storm local cluster modes as they are alas. We can't have both tick tuples and the storm feature of run test till all acked. It was still flaky with that, but less so. And had less code coverage for us.

cluster.shutdown
} finally {
System.setSecurityManager(oldSecManager)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import com.twitter.summingbird._
import com.twitter.summingbird.chill._
import com.twitter.summingbird.batch.{BatchID, Batcher, Timestamp}
import com.twitter.summingbird.storm.option.{AckOnEntry, AnchorTuples}
import com.twitter.summingbird.online.{MultiTriggerCache, SummingQueueCache, CacheBuilder}
import com.twitter.summingbird.online.{BackgroundCompactionCache, SummingQueueCache, CacheBuilder}
import com.twitter.summingbird.online.executor.InputState
import com.twitter.summingbird.online.option.{IncludeSuccessHandler, MaxWaitingFutures, MaxFutureWaitTime}
import com.twitter.summingbird.option.CacheSize
Expand Down Expand Up @@ -123,8 +123,7 @@ case class FlatMapBoltProvider(storm: Storm, stormDag: Dag[Storm], node: StormNo

val valueCombinerCrushSize = getOrElse(DEFAULT_VALUE_COMBINER_CACHE_SIZE)
logger.info("[{}] valueCombinerCrushSize : {}", nodeName, valueCombinerCrushSize.get)
MultiTriggerCache.builder[K, V](cacheSize, valueCombinerCrushSize, flushFrequency,
softMemoryFlush, asyncPoolSize)
BackgroundCompactionCache.builder[K, V](cacheSize, flushFrequency, softMemoryFlush)
} else {
SummingQueueCache.builder[K, V](cacheSize, flushFrequency)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import com.twitter.summingbird.viz.VizGraph
import com.twitter.summingbird.chill._
import com.twitter.summingbird.batch.{BatchID, Batcher, Timestamp}
import com.twitter.summingbird.storm.option.{AckOnEntry, AnchorTuples}
import com.twitter.summingbird.online.{MultiTriggerCache, SummingQueueCache}
import com.twitter.summingbird.online.{MultiTriggerCache, BackgroundCompactionCache, SummingQueueCache}
import com.twitter.summingbird.online.executor.InputState
import com.twitter.summingbird.online.option.{IncludeSuccessHandler, MaxWaitingFutures, MaxFutureWaitTime}
import com.twitter.summingbird.option.CacheSize
Expand Down Expand Up @@ -292,8 +292,7 @@ abstract class Storm(options: Map[String, Options], transformConfig: Summingbird

val valueCombinerCrushSize = getOrElse(stormDag, node, DEFAULT_VALUE_COMBINER_CACHE_SIZE)
logger.info("[{}] valueCombinerCrushSize : {}", nodeName, valueCombinerCrushSize.get)

MultiTriggerCache.builder[ExecutorKeyType, (List[InputState[Tuple]], ExecutorValueType)](cacheSize, valueCombinerCrushSize, flushFrequency, softMemoryFlush, asyncPoolSize)
MultiTriggerCache.builder[ExecutorKeyType, (List[InputState[Tuple]], ExecutorValueType)](cacheSize, valueCombinerCrushSize, flushFrequency, softMemoryFlush, asyncPoolSize)
} else {
SummingQueueCache.builder[ExecutorKeyType, (List[InputState[Tuple]], ExecutorValueType)](cacheSize, flushFrequency)
}
Expand Down