diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py index c392929fd879..9ce17f7eea21 100644 --- a/numpy/lib/_iotools.py +++ b/numpy/lib/_iotools.py @@ -210,12 +210,19 @@ def autostrip(self, method): return lambda input: [_.strip() for _ in method(input)] # - def __init__(self, delimiter=None, comments='#', autostrip=True, encoding=None): + def __init__(self, delimiter=None, comments='#', autostrip=True, encoding=None, quoter=None): delimiter = _decode_line(delimiter) comments = _decode_line(comments) + quoter = _decode_line(quoter) self.comments = comments + # Quoter is a character or None + if (quoter is None) or (isinstance(quoter, basestring) and len(quoter) == 1): + self.quoter = quoter or None + else: + self.quoter = None + # Delimiter is a character if (delimiter is None) or isinstance(delimiter, basestring): delimiter = delimiter or None @@ -246,8 +253,32 @@ def _delimited_splitter(self, line): line = line.strip(" \r\n") if not line: return [] - return line.split(self.delimiter) - # + + if self.quoter is None: + return line.split(self.delimiter) + else: + out = [] + index = 0 + isQuoted = False + word = '' + + while index < len(line): + char = line[index] + if char == self.quoter: + if len(word) == 0 and not isQuoted: + isQuoted = True + else: + isQuoted = False + elif char == self.delimiter and not isQuoted: + out.append(word) + word = '' + else: + word += char + index += 1 + if word: + out.append(word) + + return out def _fixedwidth_splitter(self, line): if self.comments is not None: diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index e57a6dd47b98..a77c56fd89d1 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1564,7 +1564,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, deletechars=''.join(sorted(NameValidator.defaultdeletechars)), replace_space='_', autostrip=False, case_sensitive=True, defaultfmt="f%i", unpack=None, usemask=False, loose=True, - invalid_raise=True, max_rows=None, encoding='bytes'): + invalid_raise=True, max_rows=None, encoding='bytes', quoter=None): """ Load data from a text file, with missing values handled as specified. @@ -1662,6 +1662,13 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, .. versionadded:: 1.14.0 + quoter: str, optional + The string used as the quoting character. By default, it is assumed + that the values are not quoted. If invalid value is provided, quoter + defaults to None. + + .. versionadded:: 1.18.0 + Returns ------- out : ndarray @@ -1780,7 +1787,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, with fid_ctx: split_line = LineSplitter(delimiter=delimiter, comments=comments, - autostrip=autostrip, encoding=encoding) + autostrip=autostrip, encoding=encoding, + quoter=quoter) validate_names = NameValidator(excludelist=excludelist, deletechars=deletechars, case_sensitive=case_sensitive, diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index 6ee17c830f5c..74c0080de378 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -2420,6 +2420,18 @@ def test_genfromtxt(self): data = np.genfromtxt(path) assert_array_equal(a, data) + def test_genfromtxt_quoter(self): + with temppath(suffix='.txt') as path: + path = Path(path) + # "This is my text, that has a comma inside","Other value","3" + # "Another text, with coma","More text, with comma",5 + with path.open('w') as f: + a = u"\"This is my text, that has a comma inside\",\"Other value\",\"3\"\n\"Another text, with coma\",\"More text, with comma\",5" + f.write(a) + + data = np.genfromtxt(path, delimiter=',', quoter='"', encoding=None, dtype=None) + assert_equal(data.shape, (2,)) + def test_ndfromtxt(self): # Test outputting a standard ndarray with temppath(suffix='.txt') as path: