Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 8633d23

Browse files
committed
BUG: Fix read_fwf with compressed files.
`gzip` and `bz2` both now return `bytes` rather than `str` in Python 3, so need to check for bytes and decode as necessary.
1 parent e461793 commit 8633d23

File tree

3 files changed

+41
-5
lines changed

3 files changed

+41
-5
lines changed

doc/source/release.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,8 @@ Bug Fixes
369369
- Bug in ``iloc`` with a slice index failing (:issue:`4771`)
370370
- Incorrect error message with no colspecs or width in ``read_fwf``. (:issue:`4774`)
371371
- Fix bugs in indexing in a Series with a duplicate index (:issue:`4548`, :issue:`4550`)
372+
- Fixed bug with reading compressed files with ``read_fwf`` in Python 3.
373+
(:issue:`3963`)
372374

373375
pandas 0.12.0
374376
-------------

pandas/io/parsers.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1937,11 +1937,20 @@ def __init__(self, f, colspecs, filler, thousands=None):
19371937
isinstance(colspec[1], int) ):
19381938
raise AssertionError()
19391939

1940-
def next(self):
1941-
line = next(self.f)
1942-
# Note: 'colspecs' is a sequence of half-open intervals.
1943-
return [line[fromm:to].strip(self.filler or ' ')
1944-
for (fromm, to) in self.colspecs]
1940+
if compat.PY3:
1941+
def next(self):
1942+
line = next(self.f)
1943+
if isinstance(line, bytes):
1944+
line = line.decode('utf-8')
1945+
# Note: 'colspecs' is a sequence of half-open intervals.
1946+
return [line[fromm:to].strip(self.filler or ' ')
1947+
for (fromm, to) in self.colspecs]
1948+
else:
1949+
def next(self):
1950+
line = next(self.f)
1951+
# Note: 'colspecs' is a sequence of half-open intervals.
1952+
return [line[fromm:to].strip(self.filler or ' ')
1953+
for (fromm, to) in self.colspecs]
19451954

19461955
# Iterator protocol in Python 3 uses __next__()
19471956
__next__ = next

pandas/io/tests/test_parsers.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2028,6 +2028,31 @@ def test_fwf_regression(self):
20282028
res = df.loc[:,c]
20292029
self.assert_(len(res))
20302030

2031+
def test_fwf_compression(self):
2032+
try:
2033+
import gzip
2034+
import bz2
2035+
except ImportError:
2036+
raise nose.SkipTest("Need gzip and bz2 to run this test")
2037+
2038+
data = """1111111111
2039+
2222222222
2040+
3333333333""".strip()
2041+
widths = [5, 5]
2042+
names = ['one', 'two']
2043+
expected = read_fwf(StringIO(data), widths=widths, names=names)
2044+
if compat.PY3:
2045+
data = bytes(data, encoding='utf-8')
2046+
for comp_name, compresser in [('gzip', gzip.GzipFile),
2047+
('bz2', bz2.BZ2File)]:
2048+
with tm.ensure_clean() as path:
2049+
tmp = compresser(path, mode='wb')
2050+
tmp.write(data)
2051+
tmp.close()
2052+
result = read_fwf(path, widths=widths, names=names,
2053+
compression=comp_name)
2054+
tm.assert_frame_equal(result, expected)
2055+
20312056
def test_verbose_import(self):
20322057
text = """a,b,c,d
20332058
one,1,2,3

0 commit comments

Comments
 (0)