Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion ci/cpp-python-msvc-build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ popd

pushd python

pip install pickle5
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This install call is redundant with the once below.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, no, the second one is in a distinct virtualenv where we install the wheel we just built.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I got confused by the free-standing pip install here with no other packages. This is then just because we have no conda package for it yet?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably, but it's very quick to compile anyway and there are no non-Python dependencies.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW we did add a package to conda-forge. Though it's true this is quite fast to build.

ref: https://github.com/conda-forge/pickle5-feedstock


set PYARROW_CXXFLAGS=/WX
set PYARROW_CMAKE_GENERATOR=%GENERATOR%
set PYARROW_BUNDLE_ARROW_CPP=ON
Expand Down Expand Up @@ -167,6 +169,6 @@ pip install %WHEEL_PATH% || exit /B
python -c "import pyarrow" || exit /B
python -c "import pyarrow.parquet" || exit /B

pip install pandas pytest pytest-faulthandler
pip install pandas pickle5 pytest pytest-faulthandler

py.test -r sxX --durations=15 --pyargs pyarrow.tests || exit /B
3 changes: 3 additions & 0 deletions ci/travis_script_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ pushd $ARROW_PYTHON_DIR

# Other stuff pip install
pip install -q -r requirements.txt
if [ "$PYTHON_VERSION" == "3.6" ]; then
pip install -q pickle5
fi
if [ "$ARROW_TRAVIS_COVERAGE" == "1" ]; then
export PYARROW_GENERATE_COVERAGE=1
pip install -q coverage
Expand Down
10 changes: 5 additions & 5 deletions python/pyarrow/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,10 @@ def frombytes(o):
def unichar(s):
return unichr(s)
else:
import pickle as builtin_pickle
try:
import pickle5 as builtin_pickle
except ImportError:
import pickle as builtin_pickle

unicode_type = str
def lzip(*x):
Expand Down Expand Up @@ -142,10 +145,7 @@ def unichar(s):
try:
import cloudpickle as pickle
except ImportError:
try:
import cPickle as pickle
except ImportError:
import pickle
pickle = builtin_pickle

def encode_file_path(path):
import os
Expand Down
9 changes: 6 additions & 3 deletions python/pyarrow/io.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# arrow::ipc

from libc.stdlib cimport malloc, free
from pyarrow.compat import frombytes, tobytes, encode_file_path
from pyarrow.compat import builtin_pickle, frombytes, tobytes, encode_file_path
from io import BufferedIOBase, UnsupportedOperation

import re
Expand Down Expand Up @@ -810,8 +810,11 @@ cdef class Buffer:
else:
return NotImplemented

def __reduce__(self):
return py_buffer, (self.to_pybytes(),)
def __reduce_ex__(self, protocol):
if protocol >= 5:
return py_buffer, (builtin_pickle.PickleBuffer(self),)
else:
return py_buffer, (self.to_pybytes(),)

def to_pybytes(self):
return cp.PyBytes_FromStringAndSize(
Expand Down
38 changes: 34 additions & 4 deletions python/pyarrow/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,18 @@

import collections
import datetime
import pickle
import pytest
import struct
import sys

import numpy as np
import pandas as pd
import pandas.util.testing as tm
import pickle
try:
import pickle5
except ImportError:
pickle5 = None

import pyarrow as pa
from pyarrow.pandas_compat import get_logical_type
Expand Down Expand Up @@ -633,7 +637,7 @@ def test_cast_date64_to_int():
assert result.equals(expected)


@pytest.mark.parametrize(
pickle_test_parametrize = pytest.mark.parametrize(
('data', 'typ'),
[
([True, False, True, True], pa.bool_()),
Expand All @@ -647,12 +651,38 @@ def test_cast_date64_to_int():
pa.struct([pa.field('a', pa.int64()), pa.field('b', pa.string())]))
]
)


@pickle_test_parametrize
def test_array_pickle(data, typ):
# Allocate here so that we don't have any Arrow data allocated.
# This is needed to ensure that allocator tests can be reliable.
array = pa.array(data, type=typ)
result = pickle.loads(pickle.dumps(array))
assert array.equals(result)
for proto in range(0, pickle.HIGHEST_PROTOCOL + 1):
result = pickle.loads(pickle.dumps(array, proto))
assert array.equals(result)


@pickle_test_parametrize
def test_array_pickle5(data, typ):
# Test zero-copy pickling with protocol 5 (PEP 574)
picklemod = pickle5 or pickle
if pickle5 is None and picklemod.HIGHEST_PROTOCOL < 5:
pytest.skip("need pickle5 package or Python 3.8+")

array = pa.array(data, type=typ)
addresses = [buf.address if buf is not None else 0
for buf in array.buffers()]

for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
buffers = []
pickled = picklemod.dumps(array, proto, buffer_callback=buffers.append)
result = picklemod.loads(pickled, buffers=buffers)
assert array.equals(result)

result_addresses = [buf.address if buf is not None else 0
for buf in result.buffers()]
assert result_addresses == addresses


@pytest.mark.parametrize(
Expand Down