numpy · mattip · Feb 8, 2022 · Oct 20, 2021 · Dec 8, 2021 · Dec 8, 2021
diff --git a/doc/release/upcoming_changes/20580.compatibility.rst b/doc/release/upcoming_changes/20580.compatibility.rst
@@ -0,0 +1,33 @@
+``np.loadtxt`` has recieved several changes  
+-------------------------------------------
+
+The row counting of `numpy.loadtxt` was fixed.  ``loadtxt`` ignores fully
+empty lines in the file, but counted them towards ``max_rows``.
+When ``max_rows`` is used and the file contains empty lines, these will now
+not be counted.  Previously, it was possible that the result contained fewer
+than ``max_rows`` rows even though more data was available to be read.
+If the old behaviour is required, ``itertools.islice`` may be used::
+
+    import itertools
+    lines = itertools.islice(open("file"), 0, max_rows)
+    result = np.loadtxt(lines, ...)
+
+While generally much faster and improved, `numpy.loadtxt` may now fail to
+converter certain strings to numbers that were previously successfully read.
+The most important cases for this are:
+
+* Parsing floating point values such as ``1.0`` into integers will now fail
+* Parsing hexadecimal floats such as ``0x3p3`` will fail
+* An ``_`` was previously accepted as a thousands delimiter ``100_000``.
+  This will now result in an error.
+
+If you experience these limitations, they can all be worked around by passing
+appropriate ``converters=``.  NumPy now supports passing a single converter
+to be used for all columns to make this more convenient.
+For example, ``converters=float.fromhex`` can read hexadecimal float numbers
+and ``converters=int`` will be able to read ``100_000``.
+
+Further, the error messages have been generally improved.  However, this means
+that error types may differ.  In particularly, a ``ValueError`` is now always
+raised when parsing of a single entry fails.
+
diff --git a/doc/release/upcoming_changes/20580.new_feature.rst b/doc/release/upcoming_changes/20580.new_feature.rst
@@ -0,0 +1,8 @@
+``np.loadtxt`` now supports quote character and single converter function
+-------------------------------------------------------------------------
+`numpy.loadtxt` now supports an additional ``quotechar`` keyword argument
+which is not set by default.  Using ``quotechar='"'`` will read quoted fields
+as used by the Excel CSV dialect.
+
+Further, it is now possible to pass a single callable rather than a dictionary
+for the ``converters`` argument.
diff --git a/doc/release/upcoming_changes/20580.performance.rst b/doc/release/upcoming_changes/20580.performance.rst
@@ -0,0 +1,4 @@
+Faster ``np.loadtxt``
+---------------------
+`numpy.loadtxt` is now generally much faster than previously as most of it
+is now implemented in C.
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
@@ -868,6 +868,7 @@ def gl_if_msvc(build_cmd):
             join('src', 'multiarray', 'typeinfo.h'),
             join('src', 'multiarray', 'usertypes.h'),
             join('src', 'multiarray', 'vdot.h'),
+            join('src', 'multiarray', 'textreading', 'readtext.h'),
             join('include', 'numpy', 'arrayobject.h'),
             join('include', 'numpy', '_neighborhood_iterator_imp.h'),
             join('include', 'numpy', 'npy_endian.h'),
@@ -955,6 +956,14 @@ def gl_if_msvc(build_cmd):
             join('src', 'npysort', 'selection.c.src'),
             join('src', 'common', 'npy_binsearch.h'),
             join('src', 'npysort', 'binsearch.cpp'),
+            join('src', 'multiarray', 'textreading', 'conversions.c'),
+            join('src', 'multiarray', 'textreading', 'field_types.c'),
+            join('src', 'multiarray', 'textreading', 'growth.c'),
+            join('src', 'multiarray', 'textreading', 'readtext.c'),
+            join('src', 'multiarray', 'textreading', 'rows.c'),
+            join('src', 'multiarray', 'textreading', 'stream_pyobject.c'),
+            join('src', 'multiarray', 'textreading', 'str_to_int.c'),
+            join('src', 'multiarray', 'textreading', 'tokenize.c.src'),
             ]
 
     #######################################################################

diff --git a/numpy/core/src/multiarray/conversion_utils.c b/numpy/core/src/multiarray/conversion_utils.c
@@ -993,6 +993,17 @@ PyArray_PyIntAsIntp(PyObject *o)
 }
 
 
+NPY_NO_EXPORT int
+PyArray_IntpFromPyIntConverter(PyObject *o, npy_intp *val)
+{
+    *val = PyArray_PyIntAsIntp(o);
+    if (error_converting(*val)) {
+        return NPY_FAIL;
+    }
+    return NPY_SUCCEED;
+}
+
+
 /*
  * PyArray_IntpFromIndexSequence
  * Returns the number of dimensions or -1 if an error occurred.

diff --git a/numpy/core/src/multiarray/conversion_utils.h b/numpy/core/src/multiarray/conversion_utils.h
@@ -6,6 +6,9 @@
 NPY_NO_EXPORT int
 PyArray_IntpConverter(PyObject *obj, PyArray_Dims *seq);
 
+NPY_NO_EXPORT int
+PyArray_IntpFromPyIntConverter(PyObject *o, npy_intp *val);
+
 NPY_NO_EXPORT int
 PyArray_OptionalIntpConverter(PyObject *obj, PyArray_Dims *seq);
 

diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
@@ -69,6 +69,7 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0;
 
 #include "get_attr_string.h"
 #include "experimental_public_dtype_api.h"  /* _get_experimental_dtype_api */
+#include "textreading/readtext.h"  /* _readtext_from_file_object */
 
 #include "npy_dlpack.h"
 
@@ -4456,6 +4457,8 @@ static struct PyMethodDef array_module_methods[] = {
         METH_VARARGS | METH_KEYWORDS, NULL},
     {"_get_experimental_dtype_api", (PyCFunction)_get_experimental_dtype_api,
         METH_O, NULL},
+    {"_load_from_filelike", (PyCFunction)_load_from_filelike,
+        METH_FASTCALL | METH_KEYWORDS, NULL},
     /* from umath */
     {"frompyfunc",
         (PyCFunction) ufunc_frompyfunc,