3737from builtins import chr
3838from builtins import object
3939
40+ from past .builtins import unicode as past_unicode
4041from past .builtins import long
41- from past .builtins import unicode
4242
4343from apache_beam .coders import observable
4444from apache_beam .utils import windowed_value
7171# pylint: enable=wrong-import-order, wrong-import-position, ungrouped-imports
7272
7373
74+ _TIME_SHIFT = 1 << 63
75+ MIN_TIMESTAMP_micros = MIN_TIMESTAMP .micros
76+ MAX_TIMESTAMP_micros = MAX_TIMESTAMP .micros
77+
78+
7479class CoderImpl (object ):
7580 """For internal use only; no backwards-compatibility guarantees."""
7681
@@ -216,7 +221,7 @@ def __init__(self, coder, step_label):
216221 self ._step_label = step_label
217222
218223 def _check_safe (self , value ):
219- if isinstance (value , (bytes , unicode , long , int , float )):
224+ if isinstance (value , (bytes , past_unicode , long , int , float )):
220225 pass
221226 elif value is None :
222227 pass
@@ -321,10 +326,10 @@ def encode_to_stream(self, value, stream, nested):
321326 elif t is bytes :
322327 stream .write_byte (BYTES_TYPE )
323328 stream .write (value , nested )
324- elif t is unicode :
325- text_value = value # for typing
329+ elif t is past_unicode :
330+ unicode_value = value # for typing
326331 stream .write_byte (UNICODE_TYPE )
327- stream .write (text_value .encode ('utf-8' ), nested )
332+ stream .write (unicode_value .encode ('utf-8' ), nested )
328333 elif t is list or t is tuple or t is set :
329334 stream .write_byte (
330335 LIST_TYPE if t is list else TUPLE_TYPE if t is tuple else SET_TYPE )
@@ -413,37 +418,47 @@ def estimate_size(self, unused_value, nested=False):
413418 return 8
414419
415420
421+ IntervalWindow = None
422+
423+
416424class IntervalWindowCoderImpl (StreamCoderImpl ):
417425 """For internal use only; no backwards-compatibility guarantees."""
418426
419427 # TODO: Fn Harness only supports millis. Is this important enough to fix?
420428 def _to_normal_time (self , value ):
421429 """Convert "lexicographically ordered unsigned" to signed."""
422- return value - ( 1 << 63 )
430+ return value - _TIME_SHIFT
423431
424432 def _from_normal_time (self , value ):
425433 """Convert signed to "lexicographically ordered unsigned"."""
426- return value + ( 1 << 63 )
434+ return value + _TIME_SHIFT
427435
428436 def encode_to_stream (self , value , out , nested ):
429- span_micros = value .end .micros - value .start .micros
437+ typed_value = value
438+ span_millis = (typed_value ._end_micros // 1000
439+ - typed_value ._start_micros // 1000 )
430440 out .write_bigendian_uint64 (
431- self ._from_normal_time (value . end . micros // 1000 ))
432- out .write_var_int64 (span_micros // 1000 )
441+ self ._from_normal_time (typed_value . _end_micros // 1000 ))
442+ out .write_var_int64 (span_millis )
433443
434444 def decode_from_stream (self , in_ , nested ):
435- end_millis = self ._to_normal_time (in_ .read_bigendian_uint64 ())
436- start_millis = end_millis - in_ .read_var_int64 ()
437- from apache_beam .transforms .window import IntervalWindow
438- ret = IntervalWindow (start = Timestamp (micros = start_millis * 1000 ),
439- end = Timestamp (micros = end_millis * 1000 ))
440- return ret
445+ global IntervalWindow
446+ if IntervalWindow is None :
447+ from apache_beam .transforms .window import IntervalWindow
448+ typed_value = IntervalWindow (None , None )
449+ typed_value ._end_micros = (
450+ 1000 * self ._to_normal_time (in_ .read_bigendian_uint64 ()))
451+ typed_value ._start_micros = (
452+ typed_value ._end_micros - 1000 * in_ .read_var_int64 ())
453+ return typed_value
441454
442455 def estimate_size (self , value , nested = False ):
443456 # An IntervalWindow is context-insensitive, with a timestamp (8 bytes)
444457 # and a varint timespam.
445- span = value .end .micros - value .start .micros
446- return 8 + get_varint_size (span // 1000 )
458+ typed_value = value
459+ span_millis = (typed_value ._end_micros // 1000
460+ - typed_value ._start_micros // 1000 )
461+ return 8 + get_varint_size (span_millis )
447462
448463
449464class TimestampCoderImpl (StreamCoderImpl ):
@@ -647,10 +662,11 @@ def encode_to_stream(self, value, out, nested):
647662 # -1 to indicate that the length is not known.
648663 out .write_bigendian_int32 (- 1 )
649664 buffer = create_OutputStream ()
665+ target_buffer_size = self ._DEFAULT_BUFFER_SIZE
650666 prev_index = index = - 1
651667 for index , elem in enumerate (value ):
652668 self ._elem_coder .encode_to_stream (elem , buffer , True )
653- if out .size () > self . _DEFAULT_BUFFER_SIZE :
669+ if buffer .size () > target_buffer_size :
654670 out .write_var_int64 (index - prev_index )
655671 out .write (buffer .get ())
656672 prev_index = index
@@ -739,25 +755,31 @@ class PaneInfoEncoding(object):
739755 TWO_INDICES = 2
740756
741757
758+ # These are cdef'd to ints to optimized the common case.
759+ PaneInfoTiming_UNKNOWN = windowed_value .PaneInfoTiming .UNKNOWN
760+ PaneInfoEncoding_FIRST = PaneInfoEncoding .FIRST
761+
762+
742763class PaneInfoCoderImpl (StreamCoderImpl ):
743764 """For internal use only; no backwards-compatibility guarantees.
744765
745766 Coder for a PaneInfo descriptor."""
746767
747768 def _choose_encoding (self , value ):
748- if ((value .index == 0 and value .nonspeculative_index == 0 ) or
749- value .timing == windowed_value . PaneInfoTiming . UNKNOWN ):
750- return PaneInfoEncoding . FIRST
751- elif (value .index == value .nonspeculative_index or
752- value .timing == windowed_value .PaneInfoTiming .EARLY ):
769+ if ((value ._index == 0 and value ._nonspeculative_index == 0 ) or
770+ value ._timing == PaneInfoTiming_UNKNOWN ):
771+ return PaneInfoEncoding_FIRST
772+ elif (value ._index == value ._nonspeculative_index or
773+ value ._timing == windowed_value .PaneInfoTiming .EARLY ):
753774 return PaneInfoEncoding .ONE_INDEX
754775 else :
755776 return PaneInfoEncoding .TWO_INDICES
756777
757778 def encode_to_stream (self , value , out , nested ):
758- encoding_type = self ._choose_encoding (value )
759- out .write_byte (value .encoded_byte | (encoding_type << 4 ))
760- if encoding_type == PaneInfoEncoding .FIRST :
779+ pane_info = value # cast
780+ encoding_type = self ._choose_encoding (pane_info )
781+ out .write_byte (pane_info ._encoded_byte | (encoding_type << 4 ))
782+ if encoding_type == PaneInfoEncoding_FIRST :
761783 return
762784 elif encoding_type == PaneInfoEncoding .ONE_INDEX :
763785 out .write_var_int64 (value .index )
@@ -772,7 +794,7 @@ def decode_from_stream(self, in_stream, nested):
772794 base = windowed_value ._BYTE_TO_PANE_INFO [encoded_first_byte & 0xF ]
773795 assert base is not None
774796 encoding_type = encoded_first_byte >> 4
775- if encoding_type == PaneInfoEncoding . FIRST :
797+ if encoding_type == PaneInfoEncoding_FIRST :
776798 return base
777799 elif encoding_type == PaneInfoEncoding .ONE_INDEX :
778800 index = in_stream .read_var_int64 ()
@@ -811,11 +833,11 @@ class WindowedValueCoderImpl(StreamCoderImpl):
811833 # byte representation of timestamps.
812834 def _to_normal_time (self , value ):
813835 """Convert "lexicographically ordered unsigned" to signed."""
814- return value - ( 1 << 63 )
836+ return value - _TIME_SHIFT
815837
816838 def _from_normal_time (self , value ):
817839 """Convert signed to "lexicographically ordered unsigned"."""
818- return value + ( 1 << 63 )
840+ return value + _TIME_SHIFT
819841
820842 def __init__ (self , value_coder , timestamp_coder , window_coder ):
821843 # TODO(lcwik): Remove the timestamp coder field
@@ -849,16 +871,12 @@ def decode_from_stream(self, in_stream, nested):
849871 # were indeed MIN/MAX timestamps.
850872 # TODO(BEAM-1524): Clean this up once we have a BEAM wide consensus on
851873 # precision of timestamps.
852- if timestamp == - (abs (MIN_TIMESTAMP . micros ) // 1000 ):
853- timestamp = MIN_TIMESTAMP . micros
854- elif timestamp == ( MAX_TIMESTAMP . micros // 1000 ) :
855- timestamp = MAX_TIMESTAMP . micros
874+ if timestamp <= - (abs (MIN_TIMESTAMP_micros ) // 1000 ):
875+ timestamp = MIN_TIMESTAMP_micros
876+ elif timestamp >= MAX_TIMESTAMP_micros // 1000 :
877+ timestamp = MAX_TIMESTAMP_micros
856878 else :
857879 timestamp *= 1000
858- if timestamp > MAX_TIMESTAMP .micros :
859- timestamp = MAX_TIMESTAMP .micros
860- if timestamp < MIN_TIMESTAMP .micros :
861- timestamp = MIN_TIMESTAMP .micros
862880
863881 windows = self ._windows_coder .decode_from_stream (in_stream , True )
864882 # Read PaneInfo encoded byte.
0 commit comments