From d7fbf17945ff88f5415b0b8945c0f29869e63829 Mon Sep 17 00:00:00 2001 From: Kevin Scaldeferri Date: Wed, 19 Mar 2014 10:51:16 -0700 Subject: [PATCH] add variations on pipeFactory to allow a map function before time extraction --- .../scalding/ScaldingPlatform.scala | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/summingbird-scalding/src/main/scala/com/twitter/summingbird/scalding/ScaldingPlatform.scala b/summingbird-scalding/src/main/scala/com/twitter/summingbird/scalding/ScaldingPlatform.scala index e33f57e6b..6202f446d 100644 --- a/summingbird-scalding/src/main/scala/com/twitter/summingbird/scalding/ScaldingPlatform.scala +++ b/summingbird-scalding/src/main/scala/com/twitter/summingbird/scalding/ScaldingPlatform.scala @@ -161,7 +161,27 @@ object Scalding { */ def pipeFactory[T](factory: (DateRange) => Mappable[T]) (implicit timeOf: TimeExtractor[T]): PipeFactory[T] = - StateWithError[(Interval[Timestamp], Mode), List[FailureReason], FlowToPipe[T]]{ + optionMappedPipeFactory(factory)(t => Some(t)) + + /** + * Like pipeFactory, but allows the output of the factory to be mapped. + * + * Useful when using TextLine, for example, where the lines need to be + * parsed before you can extract the timestamps. + */ + def mappedPipeFactory[T,U](factory: (DateRange) => Mappable[T])(fn: T => U) + (implicit timeOf: TimeExtractor[U]): PipeFactory[U] = + optionMappedPipeFactory(factory)(t => Some(fn(t))) + + /** + * Like pipeFactory, but allows the output of the factory to be mapped to an optional value. + * + * Useful when using TextLine, for example, where the lines need to be + * parsed before you can extract the timestamps. + */ + def optionMappedPipeFactory[T,U](factory: (DateRange) => Mappable[T])(fn: T => Option[U]) + (implicit timeOf: TimeExtractor[U]): PipeFactory[U] = + StateWithError[(Interval[Timestamp], Mode), List[FailureReason], FlowToPipe[U]]{ (timeMode: (Interval[Timestamp], Mode)) => { val (timeSpan, mode) = timeMode @@ -173,8 +193,10 @@ object Scalding { ((newIntr, mode), Reader { (fdM: (FlowDef, Mode)) => TypedPipe.from(mappable)(fdM._1, fdM._2) .flatMap { t => - val time = Timestamp(timeOf(t)) - if(newIntr(time)) Some((time, t)) else None + fn(t).flatMap { mapped => + val time = Timestamp(timeOf(mapped)) + if(newIntr(time)) Some((time, mapped)) else None + } } }) }