Thanks to visit codestin.com
Credit goes to github.com

Skip to content
This repository was archived by the owner on Jan 20, 2022. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
* @author Julien Le Dem
*
*/

public class JMemory {

private final JobId jobId;
Expand Down Expand Up @@ -77,8 +78,8 @@ public static <T> Sink<Memory, Function1<T, Void>, T> sink(JSink<T> sink) {
* @param service
* @return the corresponding Service to use in JProducer.lookup
*/
public static <K,V> Service<Memory, Function1<K, Option<V>>, K, V> service(Function<K, Option<V>> service) {
return new Service<Memory, Function1<K, Option<V>>, K, V>(JProducerImpl.toScala(service));
public static <K,V> Service<Memory, JMemoryService<K, V>, K, V> service(Map<K, V> service) {
return new Service<Memory, JMemoryService<K, V>, K, V>(new JMemoryService(service));
}


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package com.twitter.summingbird.memory.javaapi;

import com.twitter.summingbird.memory.MemoryService;
import java.util.Map;
import scala.Option;

public class JMemoryService<K, V> implements MemoryService<K, V> {

private Map<K, V> serviceMap;

public JMemoryService(Map<K, V> m) {
serviceMap = m;
}

public Option<V> get(K key) {
return Option.apply(serviceMap.get(key));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,18 @@ public class TestJMemory {
private static final Integer[] LENGTH = { 3, 3, 5 };
private static final String[] LESS_THAN_4 = { "one", "two" };
private static final String[] FLATTENED = { "o", "e", "t", "o", "th", "ee" };
private static final HashMap<String, Integer> SERVICE;
static
{
SERVICE = new HashMap<String, Integer>();
SERVICE.put("one", 3);
SERVICE.put("two", 3);
SERVICE.put("three", 5);
}

private static final JProducer<Memory, String> SOURCE = source(asList(INPUT));

private static final Service<Memory, Function1<String, Option<Integer>>, String, Integer> LENGTH_SERVICE = service(new Function<String, Option<Integer>>() {
private static final Service<Memory, JMemoryService<String, Integer>, String, Integer> LENGTH_SERVICE = service(new HashMap<String, Integer>(SERVICE) {
public Option<Integer> apply(String p) {
return new Some<Integer>(p.length());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ object Producer {
case OptionMappedProducer(producer, _) => List(producer)
case FlatMappedProducer(producer, _) => List(producer)
case KeyFlatMappedProducer(producer, _) => List(producer)
case ValueFlatMappedProducer(producer, _) => List(producer)
case WrittenProducer(producer, _) => List(producer)
case LeftJoinedProducer(producer, _) => List(producer)
case Summer(producer, _, _) => List(producer)
Expand All @@ -99,6 +100,7 @@ object Producer {
case OptionMappedProducer(_, _) => false
case FlatMappedProducer(_, _) => false
case KeyFlatMappedProducer(_, _) => false
case ValueFlatMappedProducer(_, _) => false
case WrittenProducer(_, _) => false
case LeftJoinedProducer(_, _) => false
case Summer(_, _, _) => false
Expand Down Expand Up @@ -255,7 +257,7 @@ sealed trait KeyedProducer[P <: Platform[P], K, V] extends Producer[P, (K, V)] {

/** Builds a new KeyedProvider by applying a partial function to values of elements of this one on which the function is defined.*/
def collectValues[V2](pf: PartialFunction[V, V2]): KeyedProducer[P, K, V2] =
IdentityKeyedProducer(collect { case (k, v) if pf.isDefinedAt(v) => (k, pf(v)) })
flatMapValues { v => if (pf.isDefinedAt(v)) Iterator(pf(v)) else Iterator.empty }

/**
* Prefer this to filter or flatMap/flatMapKeys if you are filtering.
Expand All @@ -273,7 +275,7 @@ sealed trait KeyedProducer[P <: Platform[P], K, V] extends Producer[P, (K, V)] {
* the partition.
*/
def filterValues(pred: V => Boolean): KeyedProducer[P, K, V] =
IdentityKeyedProducer(filter { case (_, v) => pred(v) })
flatMapValues { v => if (pred(v)) Iterator(v) else Iterator.empty }

/**
* Prefer to call this method to flatMap if you are expanding only keys.
Expand All @@ -284,7 +286,7 @@ sealed trait KeyedProducer[P <: Platform[P], K, V] extends Producer[P, (K, V)] {

/** Prefer this to a raw map as this may be optimized to avoid a key reshuffle */
def flatMapValues[U](fn: V => TraversableOnce[U]): KeyedProducer[P, K, U] =
IdentityKeyedProducer(flatMap { case (k, v) => fn(v).map((k, _)) })
ValueFlatMappedProducer(this, fn)

/** Return just the keys */
def keys: Producer[P, K] = map(_._1)
Expand Down Expand Up @@ -315,7 +317,7 @@ sealed trait KeyedProducer[P <: Platform[P], K, V] extends Producer[P, (K, V)] {

/** Prefer this to a raw map as this may be optimized to avoid a key reshuffle */
def mapValues[U](fn: V => U): KeyedProducer[P, K, U] =
IdentityKeyedProducer(map { case (k, v) => (k, fn(v)) })
flatMapValues { v => Iterator(fn(v)) }

/**
* emits a KeyedProducer with a value that is the store value, just BEFORE a merge,
Expand All @@ -336,7 +338,10 @@ sealed trait KeyedProducer[P <: Platform[P], K, V] extends Producer[P, (K, V)] {
def values: Producer[P, V] = map(_._2)
}

case class KeyFlatMappedProducer[P <: Platform[P], K, V, K2](producer: KeyedProducer[P, K, V], fn: K => TraversableOnce[K2]) extends KeyedProducer[P, K2, V]
case class KeyFlatMappedProducer[P <: Platform[P], K, V, K2](producer: Producer[P, (K, V)], fn: K => TraversableOnce[K2]) extends KeyedProducer[P, K2, V]

case class ValueFlatMappedProducer[P <: Platform[P], K, V, V2](producer: Producer[P, (K, V)],
fn: V => TraversableOnce[V2]) extends KeyedProducer[P, K, V2]

case class IdentityKeyedProducer[P <: Platform[P], K, V](producer: Producer[P, (K, V)]) extends KeyedProducer[P, K, V]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ class ConcurrentMemory(implicit jobID: JobId = JobId("default.concurrent.memory.
.orElse(FlatMapFusion)
.orElse(RemoveNames)
.orElse(RemoveIdentityKeyed)
.orElse(ValueFlatMapToFlatMap)

val deps = Dependants(optimize(prod, ourRule))
val heads = deps.nodes.collect { case s @ Source(_) => s }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,23 @@ package com.twitter.summingbird.memory
import com.twitter.algebird.Monoid
import com.twitter.summingbird._
import com.twitter.summingbird.option.JobId
import com.twitter.summingbird.planner.DagOptimizer
import collection.mutable.{ Map => MutableMap }

object Memory {
implicit def toSource[T](traversable: TraversableOnce[T])(implicit mf: Manifest[T]): Producer[Memory, T] =
Producer.source[Memory, T](traversable)
}

trait MemoryService[-K, +V] {
def get(k: K): Option[V]
}

class Memory(implicit jobID: JobId = JobId("default.memory.jobId")) extends Platform[Memory] {
type Source[T] = TraversableOnce[T]
type Store[K, V] = MutableMap[K, V]
type Sink[-T] = (T => Unit)
type Service[-K, +V] = (K => Option[V])
type Service[-K, +V] = MemoryService[K, V]
type Plan[T] = Stream[T]

private type Prod[T] = Producer[Memory, T]
Expand Down Expand Up @@ -83,7 +88,7 @@ class Memory(implicit jobID: JobId = JobId("default.memory.jobId")) extends Plat
case LeftJoinedProducer(producer, service) =>
val (s, m) = toStream(producer, jamfs)
val joined = s.map {
case (k, v) => (k, (v, service(k)))
case (k, v) => (k, (v, service.get(k)))
}
(joined, m)

Expand Down Expand Up @@ -111,7 +116,12 @@ class Memory(implicit jobID: JobId = JobId("default.memory.jobId")) extends Plat
MemoryStatProvider.registerCounters(jobID, registeredCounters)
SummingbirdRuntimeStats.addPlatformStatProvider(MemoryStatProvider)
}
toStream(prod, Map.empty)._1

val dagOptimizer = new DagOptimizer[Memory] {}
val memoryTail = dagOptimizer.optimize(prod, dagOptimizer.ValueFlatMapToFlatMap)
val memoryDag = memoryTail.asInstanceOf[TailProducer[Memory, T]]

toStream(memoryDag, Map.empty)._1
}

def run(iter: Stream[_]) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,18 @@ trait DagOptimizer[P <: Platform[P]] {
protected def mkAlso[T, U]: (Prod[T], Prod[U]) => Prod[U] = {
(left, right) => AlsoProducer(left.asInstanceOf[TailProducer[P, T]], right)
}
protected def mkAlsoTail[T, U]: (Prod[T], Prod[U]) => Prod[U] = {
(left, right) => new AlsoTailProducer(left.asInstanceOf[TailProducer[P, T]], right.asInstanceOf[TailProducer[P, U]])
}
protected def mkMerge[T]: (Prod[T], Prod[T]) => Prod[T] = {
(left, right) => MergedProducer(left, right)
}
protected def mkNamed[T](name: String): (Prod[T] => Prod[T]) = {
prod => NamedProducer(prod, name)
}
protected def mkTPNamed[T](name: String): (Prod[T] => Prod[T]) = {
prod => new TPNamedProducer(prod.asInstanceOf[TailProducer[P, T]], name)
}
protected def mkIdentKey[K, V]: (Prod[(K, V)] => Prod[(K, V)]) = {
prod => IdentityKeyedProducer(prod)
}
Expand All @@ -50,6 +56,9 @@ trait DagOptimizer[P <: Platform[P]] {
protected def mkKeyFM[T, U, V](fn: T => TraversableOnce[U]): (Prod[(T, V)] => Prod[(U, V)]) = {
prod => KeyFlatMappedProducer(prod, fn)
}
protected def mkValueFM[K, U, V](fn: U => TraversableOnce[V]): (Prod[(K, U)] => Prod[(K, V)]) = {
prod => ValueFlatMappedProducer(prod, fn)
}
protected def mkWritten[T, U >: T](sink: P#Sink[U]): (Prod[T] => Prod[T]) = {
prod => WrittenProducer[P, T, U](prod, sink)
}
Expand Down Expand Up @@ -90,6 +99,12 @@ trait DagOptimizer[P <: Platform[P]] {
val lit = BinaryLit[R, T, T, N](l1, l2, mkAlso)
(h2 + (a -> lit), lit)
}
def alsoTail[R](a: AlsoTailProducer[P, R, T]): (M, L[T]) = {
val (h1, l1) = toLiteral(hm, a.ensure)
val (h2, l2) = toLiteral(h1, a.result)
val lit = BinaryLit[R, T, T, N](l1, l2, mkAlsoTail)
(h2 + (a -> lit), lit)
}
def merge(m: MergedProducer[P, T]): (M, L[T]) = {
val (h1, l1) = toLiteral(hm, m.left)
val (h2, l2) = toLiteral(h1, m.right)
Expand All @@ -101,6 +116,11 @@ trait DagOptimizer[P <: Platform[P]] {
val lit = UnaryLit[T, T, N](l1, mkNamed(n.id))
(h1 + (n -> lit), lit)
}
def namedTP(n: TPNamedProducer[P, T]): (M, L[T]) = {
val (h1, l1) = toLiteral(hm, n.producer)
val lit = UnaryLit[T, T, N](l1, mkTPNamed(n.id))
(h1 + (n -> lit), lit)
}
def ikp[K, V](ik: IdentityKeyedProducer[P, K, V]): (M, L[(K, V)]) = {
val (h1, l1) = toLiteral(hm, ik.producer)
val lit = UnaryLit[(K, V), (K, V), N](l1, mkIdentKey)
Expand All @@ -121,6 +141,11 @@ trait DagOptimizer[P <: Platform[P]] {
val lit = UnaryLit[(K, V), (K2, V), N](l1, mkKeyFM(kf.fn))
(h1 + (kf -> lit), lit)
}
def vfm[K, V, V2](kf: ValueFlatMappedProducer[P, K, V, V2]): (M, L[(K, V2)]) = {
val (h1, l1) = toLiteral(hm, kf.producer)
val lit = UnaryLit[(K, V), (K, V2), N](l1, mkValueFM(kf.fn))
(h1 + (kf -> lit), lit)
}
def writer[T1 <: T, U >: T1](w: WrittenProducer[P, T1, U]): (M, L[T]) = {
val (h1, l1) = toLiteral(hm, w.producer)
val lit = UnaryLit[T1, T, N](l1, mkWritten[T1, U](w.sink))
Expand Down Expand Up @@ -148,8 +173,10 @@ trait DagOptimizer[P <: Platform[P]] {
case None =>
prod match {
case s @ Source(_) => source(s)
case a: AlsoTailProducer[_, _, _] => alsoTail(a.asInstanceOf[AlsoTailProducer[P, _, T]])
case a @ AlsoProducer(_, _) => also(a)
case m @ MergedProducer(l, r) => merge(m)
case n: TPNamedProducer[_, _] => namedTP(n.asInstanceOf[TPNamedProducer[P, T]])
case n @ NamedProducer(producer, name) => named(n)
case w @ WrittenProducer(producer, sink) => writer(w)
case fm @ FlatMappedProducer(producer, fn) => flm(fm)
Expand All @@ -158,6 +185,7 @@ trait DagOptimizer[P <: Platform[P]] {
// but I can't convince scala of this without the cast.
case ik @ IdentityKeyedProducer(producer) => cast(ikp(ik))
case kf @ KeyFlatMappedProducer(producer, fn) => cast(kfm(kf))
case vf @ ValueFlatMappedProducer(producer, fn) => cast(vfm(vf))
case j @ LeftJoinedProducer(producer, srv) => cast(joined(j))
case s @ Summer(producer, store, sg) => cast(summer(s))
}
Expand Down Expand Up @@ -255,6 +283,18 @@ trait DagOptimizer[P <: Platform[P]] {
cast(in.flatMap { case (k, v) => fn(k).map((_, v)) })
}
}
/**
* If you can't optimize ValueFlatMaps, use this
*/
object ValueFlatMapToFlatMap extends PartialRule[Prod] {
def applyWhere[T](on: ExpressionDag[Prod]) = {
// TODO: we need to case class here to not lose the irreducible which may be named
case ValueFlatMappedProducer(in, fn) =>
// we know that (K, V) <: T due to the case match, but scala can't see it
def cast[K, V](p: Prod[(K, V)]): Prod[T] = p.asInstanceOf[Prod[T]]
cast(in.flatMap { case (k, v) => fn(v).map((k, _)) })
}
}

/**
* Combine flatMaps followed by optionMap into a single operation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ import org.scalacheck.Arbitrary._
import org.scalacheck.Prop._
import scala.util.Random

import scala.collection.mutable.{ Map => MMap }
import scala.collection.mutable.{ Map => MMap, HashMap => MHashMap }

object DependantsTest extends Properties("Dependants") {
import TestGraphGenerators._
import MemoryArbitraries._
implicit def testStore: Memory#Store[Int, Int] = MMap[Int, Int]()
implicit def testService: Memory#Service[Int, Int] = new MHashMap[Int, Int]() with MemoryService[Int, Int]
implicit def sink1: Memory#Sink[Int] = ((_) => Unit)
implicit def sink2: Memory#Sink[(Int, Int)] = ((_) => Unit)
implicit val arbSource1: Arbitrary[Producer[Memory, Int]] = Arbitrary(Gen.listOfN(100, Arbitrary.arbitrary[Int]).map(Producer.source[Memory, Int](_)))
implicit val arbSource2: Arbitrary[KeyedProducer[Memory, Int, Int]] = Arbitrary(Gen.listOfN(100, Arbitrary.arbitrary[(Int, Int)]).map(Producer.source[Memory, (Int, Int)](_)))

implicit def genProducer: Arbitrary[Producer[Memory, _]] = Arbitrary(oneOf(genProd1, genProd2, summed))

Expand Down Expand Up @@ -150,7 +150,7 @@ object DependantsTest extends Properties("Dependants") {
}

property("transitiveDependantsTillOutput finds outputs as a subset of dependants") =
forAll { (prod: Producer[Memory, Any]) =>
forAll { (prod: Producer[Memory, _]) =>
val dependants = Dependants(prod)
dependants.nodes.forall { n =>
val output = dependants.transitiveDependantsTillOutput(n).collect {
Expand All @@ -162,7 +162,7 @@ object DependantsTest extends Properties("Dependants") {
}

property("transitiveDependantsTillOutput is a subset of writers dependencies") =
forAll { (prod: Producer[Memory, Any]) =>
forAll { (prod: Producer[Memory, _]) =>
val dependants = Dependants(prod)
dependants.nodes.forall { n =>
val depTillWrite = dependants.transitiveDependantsTillOutput(n)
Expand All @@ -174,7 +174,7 @@ object DependantsTest extends Properties("Dependants") {
}
}

property("transitiveDependantsTillOutput finds no children of outputs") = forAll { (prod: Producer[Memory, Any]) =>
property("transitiveDependantsTillOutput finds no children of outputs") = forAll { (prod: Producer[Memory, _]) =>
val dependants = Dependants(prod)
dependants.nodes.forall { n =>
val tillWrite = dependants.transitiveDependantsTillOutput(n)
Expand Down
Loading