Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{
"https://github.com/apache/beam/pull/35951": "triggering sideinput test"
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 1
}
2 changes: 2 additions & 0 deletions sdks/python/apache_beam/coders/coder_impl.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,15 @@ cdef class FastPrimitivesCoderImpl(StreamCoderImpl):
cdef CoderImpl iterable_coder_impl
cdef object requires_deterministic_step_label
cdef bint warn_deterministic_fallback
cdef bint force_use_dill

@cython.locals(dict_value=dict, int_value=libc.stdint.int64_t,
unicode_value=unicode)
cpdef encode_to_stream(self, value, OutputStream stream, bint nested)
@cython.locals(t=int)
cpdef decode_from_stream(self, InputStream stream, bint nested)
cdef encode_special_deterministic(self, value, OutputStream stream)
cdef encode_type_2_67_0(self, t, OutputStream stream)
cdef encode_type(self, t, OutputStream stream)
cdef decode_type(self, InputStream stream)

Expand Down
61 changes: 56 additions & 5 deletions sdks/python/apache_beam/coders/coder_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@
from typing import Tuple
from typing import Type

import dill
import numpy as np
from fastavro import parse_schema
from fastavro import schemaless_reader
from fastavro import schemaless_writer

from apache_beam.coders import observable
from apache_beam.coders.avro_record import AvroRecord
from apache_beam.internal import cloudpickle_pickler
from apache_beam.typehints.schemas import named_tuple_from_schema
from apache_beam.utils import proto_utils
from apache_beam.utils import windowed_value
Expand All @@ -71,6 +71,11 @@
except ImportError:
dataclasses = None # type: ignore

try:
import dill
except ImportError:
dill = None

if TYPE_CHECKING:
import proto
from apache_beam.transforms import userstate
Expand Down Expand Up @@ -354,14 +359,30 @@ def decode(self, value):
_ITERABLE_LIKE_TYPES = set() # type: Set[Type]


def _verify_dill_compat():
base_error = (
"This pipeline runs with the pipeline option "
"--update_compatibility_version=2.67.0 or earlier. "
"When running with this option on SDKs 2.68.0 or "
"later, you must ensure dill==0.3.1.1 is installed.")
if not dill:
raise RuntimeError(base_error + ". Dill is not installed.")
if dill.__version__ != "0.3.1.1":
raise RuntimeError(base_error + f". Found dill version '{dill.__version__}")


class FastPrimitivesCoderImpl(StreamCoderImpl):
"""For internal use only; no backwards-compatibility guarantees."""
def __init__(
self, fallback_coder_impl, requires_deterministic_step_label=None):
self,
fallback_coder_impl,
requires_deterministic_step_label=None,
force_use_dill=False):
self.fallback_coder_impl = fallback_coder_impl
self.iterable_coder_impl = IterableCoderImpl(self)
self.requires_deterministic_step_label = requires_deterministic_step_label
self.warn_deterministic_fallback = True
self.force_use_dill = force_use_dill

@staticmethod
def register_iterable_like_type(t):
Expand Down Expand Up @@ -525,10 +546,23 @@ def _deterministic_encoding_error_msg(self, value):
"please provide a type hint for the input of '%s'" %
(value, type(value), self.requires_deterministic_step_label))

def encode_type(self, t, stream):
def encode_type_2_67_0(self, t, stream):
"""
Encode special type with <=2.67.0 compatibility.
"""
_verify_dill_compat()
stream.write(dill.dumps(t), True)

def encode_type(self, t, stream):
if self.force_use_dill:
return self.encode_type_2_67_0(t, stream)
bs = cloudpickle_pickler.dumps(
t, config=cloudpickle_pickler.NO_DYNAMIC_CLASS_TRACKING_CONFIG)
stream.write(bs, True)

def decode_type(self, stream):
if self.force_use_dill:
return _unpickle_type_2_67_0(stream.read_all(True))
return _unpickle_type(stream.read_all(True))

def decode_from_stream(self, stream, nested):
Expand Down Expand Up @@ -589,19 +623,35 @@ def decode_from_stream(self, stream, nested):
_unpickled_types = {} # type: Dict[bytes, type]


def _unpickle_type(bs):
def _unpickle_type_2_67_0(bs):
"""
Decode special type with <=2.67.0 compatibility.
"""
t = _unpickled_types.get(bs, None)
if t is None:
_verify_dill_compat()
t = _unpickled_types[bs] = dill.loads(bs)
# Fix unpicklable anonymous named tuples for Python 3.6.
if t.__base__ is tuple and hasattr(t, '_fields'):
try:
pickle.loads(pickle.dumps(t))
except pickle.PicklingError:
t.__reduce__ = lambda self: (_unpickle_named_tuple, (bs, tuple(self)))
t.__reduce__ = lambda self: (
_unpickle_named_tuple_2_67_0, (bs, tuple(self)))
return t


def _unpickle_named_tuple_2_67_0(bs, items):
return _unpickle_type_2_67_0(bs)(*items)


def _unpickle_type(bs):
if not _unpickled_types.get(bs, None):
_unpickled_types[bs] = cloudpickle_pickler.loads(bs)

return _unpickled_types[bs]


def _unpickle_named_tuple(bs, items):
return _unpickle_type(bs)(*items)

Expand Down Expand Up @@ -837,6 +887,7 @@ def decode_from_stream(self, in_, nested):
if IntervalWindow is None:
from apache_beam.transforms.window import IntervalWindow
# instantiating with None is not part of the public interface
# pylint: disable=too-many-function-args
typed_value = IntervalWindow(None, None) # type: ignore[arg-type]
typed_value._end_micros = (
1000 * self._to_normal_time(in_.read_bigendian_uint64()))
Expand Down
72 changes: 70 additions & 2 deletions sdks/python/apache_beam/coders/coders.py
Original file line number Diff line number Diff line change
Expand Up @@ -911,6 +911,44 @@ def _create_impl(self):
cloudpickle_pickler.dumps, cloudpickle_pickler.loads)


class DeterministicFastPrimitivesCoderV2(FastCoder):
"""Throws runtime errors when encoding non-deterministic values."""
def __init__(self, coder, step_label):
self._underlying_coder = coder
self._step_label = step_label

def _create_impl(self):

return coder_impl.FastPrimitivesCoderImpl(
self._underlying_coder.get_impl(),
requires_deterministic_step_label=self._step_label,
force_use_dill=False)

def is_deterministic(self):
# type: () -> bool
return True

def is_kv_coder(self):
# type: () -> bool
return True

def key_coder(self):
return self

def value_coder(self):
return self

def to_type_hint(self):
return Any

def to_runner_api_parameter(self, context):
# type: (Optional[PipelineContext]) -> Tuple[str, Any, Sequence[Coder]]
return (
python_urns.PICKLED_CODER,
google.protobuf.wrappers_pb2.BytesValue(value=serialize_coder(self)),
())


class DeterministicFastPrimitivesCoder(FastCoder):
"""Throws runtime errors when encoding non-deterministic values."""
def __init__(self, coder, step_label):
Expand All @@ -920,7 +958,8 @@ def __init__(self, coder, step_label):
def _create_impl(self):
return coder_impl.FastPrimitivesCoderImpl(
self._underlying_coder.get_impl(),
requires_deterministic_step_label=self._step_label)
requires_deterministic_step_label=self._step_label,
force_use_dill=True)

def is_deterministic(self):
# type: () -> bool
Expand All @@ -940,6 +979,34 @@ def to_type_hint(self):
return Any


def _should_force_use_dill():
from apache_beam.coders import typecoders
from apache_beam.transforms.util import is_v1_prior_to_v2
update_compat_version = typecoders.registry.update_compatibility_version

if not update_compat_version:
return False

if not is_v1_prior_to_v2(v1=update_compat_version, v2="2.68.0"):
return False

try:
import dill
assert dill.__version__ == "0.3.1.1"
except Exception as e:
raise RuntimeError("This pipeline runs with the pipeline option " \
"--update_compatibility_version=2.67.0 or earlier. When running with " \
"this option on SDKs 2.68.0 or later, you must ensure dill==0.3.1.1 " \
f"is installed. Error {e}")
return True


def _update_compatible_deterministic_fast_primitives_coder(coder, step_label):
if _should_force_use_dill():
return DeterministicFastPrimitivesCoder(coder, step_label)
return DeterministicFastPrimitivesCoderV2(coder, step_label)


class FastPrimitivesCoder(FastCoder):
"""Encodes simple primitives (e.g. str, int) efficiently.

Expand All @@ -960,7 +1027,8 @@ def as_deterministic_coder(self, step_label, error_message=None):
if self.is_deterministic():
return self
else:
return DeterministicFastPrimitivesCoder(self, step_label)
return _update_compatible_deterministic_fast_primitives_coder(
self, step_label)

def to_type_hint(self):
return Any
Expand Down
Loading
Loading