BUG: Fix read_fwf with compressed files.

jtratner · jtratner · commit 8633d23f14fd · 2013-09-09T00:31:32.000-04:00
`gzip` and `bz2` both now return `bytes` rather than `str` in Python 3,
so need to check for bytes and decode as necessary.
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -369,6 +369,8 @@ Bug Fixes
   - Bug in ``iloc`` with a slice index failing (:issue:`4771`)
   - Incorrect error message with no colspecs or width in ``read_fwf``. (:issue:`4774`)
   - Fix bugs in indexing in a Series with a duplicate index (:issue:`4548`, :issue:`4550`)
+  - Fixed bug with reading compressed files with ``read_fwf`` in Python 3.
+    (:issue:`3963`)
 
 pandas 0.12.0
 -------------
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -1937,11 +1937,20 @@ def __init__(self, f, colspecs, filler, thousands=None):
                        isinstance(colspec[1], int) ):
                 raise AssertionError()
 
-    def next(self):
-        line = next(self.f)
-        # Note: 'colspecs' is a sequence of half-open intervals.
-        return [line[fromm:to].strip(self.filler or ' ')
-                for (fromm, to) in self.colspecs]
+    if compat.PY3:
+        def next(self):
+            line = next(self.f)
+            if isinstance(line, bytes):
+                line = line.decode('utf-8')
+            # Note: 'colspecs' is a sequence of half-open intervals.
+            return [line[fromm:to].strip(self.filler or ' ')
+                    for (fromm, to) in self.colspecs]
+    else:
+        def next(self):
+            line = next(self.f)
+            # Note: 'colspecs' is a sequence of half-open intervals.
+            return [line[fromm:to].strip(self.filler or ' ')
+                    for (fromm, to) in self.colspecs]
 
     # Iterator protocol in Python 3 uses __next__()
     __next__ = next
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -2028,6 +2028,31 @@ def test_fwf_regression(self):
             res = df.loc[:,c]
             self.assert_(len(res))
 
+    def test_fwf_compression(self):
+        try:
+            import gzip
+            import bz2
+        except ImportError:
+            raise nose.SkipTest("Need gzip and bz2 to run this test")
+
+        data = """1111111111
+        2222222222
+        3333333333""".strip()
+        widths = [5, 5]
+        names = ['one', 'two']
+        expected = read_fwf(StringIO(data), widths=widths, names=names)
+        if compat.PY3:
+            data = bytes(data, encoding='utf-8')
+        for comp_name, compresser in [('gzip', gzip.GzipFile),
+                                      ('bz2', bz2.BZ2File)]:
+            with tm.ensure_clean() as path:
+                tmp = compresser(path, mode='wb')
+                tmp.write(data)
+                tmp.close()
+                result = read_fwf(path, widths=widths, names=names,
+                                  compression=comp_name)
+                tm.assert_frame_equal(result, expected)
+
     def test_verbose_import(self):
         text = """a,b,c,d
 one,1,2,3