From bfb0830283211acc98548f054c8ac32f9a7f7b44 Mon Sep 17 00:00:00 2001
From: Andrew Liu <andrewlliu@gmail.com>
Date: Sun, 22 Sep 2019 11:02:16 -0700
Subject: [PATCH 1/2] ENH: add quoting support

---
 numpy/lib/_iotools.py | 31 +++++++++++++++++++++++++++++--
 numpy/lib/npyio.py    | 12 ++++++++++--
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py
index c392929fd879..793205209631 100644
--- a/numpy/lib/_iotools.py
+++ b/numpy/lib/_iotools.py
@@ -210,12 +210,19 @@ def autostrip(self, method):
         return lambda input: [_.strip() for _ in method(input)]
     #
 
-    def __init__(self, delimiter=None, comments='#', autostrip=True, encoding=None):
+    def __init__(self, delimiter=None, comments='#', autostrip=True, encoding=None, quoter=None):
         delimiter = _decode_line(delimiter)
         comments = _decode_line(comments)
+        quoter = _decode_line(quoter)
 
         self.comments = comments
 
+        # Quoter is a character or None
+        if (quoter is None) or (isinstance(quoter, basestring) and len(quoter) == 1):
+            self.quoter = quoter or None
+        else:
+            self.quoter = None
+
         # Delimiter is a character
         if (delimiter is None) or isinstance(delimiter, basestring):
             delimiter = delimiter or None
@@ -246,7 +253,27 @@ def _delimited_splitter(self, line):
         line = line.strip(" \r\n")
         if not line:
             return []
-        return line.split(self.delimiter)
+
+        if self.quoter is None:
+            return line.split(self.delimiter)
+        else:
+            out = []
+            isQuoted = False
+            chars = list(line)
+            word = ''
+
+            for char in chars:
+                if char == self.quoter:
+                    isQuoted = not isQuoted
+                else if char == self.delimiter and not isQuoted:
+                    out.append(word)
+                else:
+                    word += char
+
+            if word:
+                out.append(word)
+            
+            return out
     #
 
     def _fixedwidth_splitter(self, line):
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index e57a6dd47b98..a77c56fd89d1 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -1564,7 +1564,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
                deletechars=''.join(sorted(NameValidator.defaultdeletechars)),
                replace_space='_', autostrip=False, case_sensitive=True,
                defaultfmt="f%i", unpack=None, usemask=False, loose=True,
-               invalid_raise=True, max_rows=None, encoding='bytes'):
+               invalid_raise=True, max_rows=None, encoding='bytes', quoter=None):
     """
     Load data from a text file, with missing values handled as specified.
 
@@ -1662,6 +1662,13 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
 
         .. versionadded:: 1.14.0
 
+    quoter: str, optional
+        The string used as the quoting character. By default, it is assumed
+        that the values are not quoted. If invalid value is provided, quoter
+        defaults to None.
+
+        .. versionadded:: 1.18.0
+
     Returns
     -------
     out : ndarray
@@ -1780,7 +1787,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
 
     with fid_ctx:
         split_line = LineSplitter(delimiter=delimiter, comments=comments,
-                                  autostrip=autostrip, encoding=encoding)
+                                  autostrip=autostrip, encoding=encoding,
+                                  quoter=quoter)
         validate_names = NameValidator(excludelist=excludelist,
                                        deletechars=deletechars,
                                        case_sensitive=case_sensitive,

From 20f868f9630454dd323cb5563ca45b1e85e8288c Mon Sep 17 00:00:00 2001
From: Andrew Liu <andrewlliu@gmail.com>
Date: Sun, 22 Sep 2019 13:28:49 -0700
Subject: [PATCH 2/2] ENH: support quoting and added tests

---
 numpy/lib/_iotools.py      | 16 ++++++++++------
 numpy/lib/tests/test_io.py | 12 ++++++++++++
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py
index 793205209631..9ce17f7eea21 100644
--- a/numpy/lib/_iotools.py
+++ b/numpy/lib/_iotools.py
@@ -258,23 +258,27 @@ def _delimited_splitter(self, line):
             return line.split(self.delimiter)
         else:
             out = []
+            index = 0
             isQuoted = False
-            chars = list(line)
             word = ''
 
-            for char in chars:
+            while index < len(line):
+                char = line[index]
                 if char == self.quoter:
-                    isQuoted = not isQuoted
-                else if char == self.delimiter and not isQuoted:
+                    if len(word) == 0 and not isQuoted:
+                        isQuoted = True
+                    else:
+                        isQuoted = False
+                elif char == self.delimiter and not isQuoted:
                     out.append(word)
+                    word = ''
                 else:
                     word += char
-
+                index += 1
             if word:
                 out.append(word)
             
             return out
-    #
 
     def _fixedwidth_splitter(self, line):
         if self.comments is not None:
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 6ee17c830f5c..74c0080de378 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -2420,6 +2420,18 @@ def test_genfromtxt(self):
             data = np.genfromtxt(path)
             assert_array_equal(a, data)
 
+    def test_genfromtxt_quoter(self):
+        with temppath(suffix='.txt') as path:
+            path = Path(path)
+            # "This is my text, that has a comma inside","Other value","3"
+            # "Another text, with coma","More text, with comma",5
+            with path.open('w') as f:
+                a = u"\"This is my text, that has a comma inside\",\"Other value\",\"3\"\n\"Another text, with coma\",\"More text, with comma\",5"
+                f.write(a)
+
+            data = np.genfromtxt(path, delimiter=',', quoter='"', encoding=None, dtype=None)
+            assert_equal(data.shape, (2,))
+
     def test_ndfromtxt(self):
         # Test outputting a standard ndarray
         with temppath(suffix='.txt') as path: