ijl · akx · Nov 4, 2025 · Oct 8, 2025 · Oct 8, 2025
diff --git a/README.md b/README.md
@@ -56,6 +56,7 @@ available in the repository.
         2. [option](https://github.com/ijl/orjson?tab=readme-ov-file#option)
         3. [Fragment](https://github.com/ijl/orjson?tab=readme-ov-file#fragment)
     5. [Deserialize](https://github.com/ijl/orjson?tab=readme-ov-file#deserialize)
+        1. [loads_next](https://github.com/ijl/orjson?tab=readme-ov-file#loads_next)
 2. [Types](https://github.com/ijl/orjson?tab=readme-ov-file#types)
     1. [dataclass](https://github.com/ijl/orjson?tab=readme-ov-file#dataclass)
     2. [datetime](https://github.com/ijl/orjson?tab=readme-ov-file#datetime)
@@ -627,6 +628,90 @@ to parse the document.
 `JSONDecodeError` is a subclass of `json.JSONDecodeError` and `ValueError`.
 This is for compatibility with the standard library.
 
+### loads_next
+
+```python
+def loads_next(__obj: Union[bytes, bytearray, memoryview]) -> Tuple[Any, int]: ...
+```
+
+`loads_next()` deserializes the next JSON document from a buffer and returns
+a tuple of `(parsed_object, bytes_consumed)`. This is useful for parsing
+multiple JSON documents from a single buffer, such as concatenated JSON
+objects or newline-delimited JSON (NDJSON).
+
+Unlike `loads()`, which requires the input to contain exactly one JSON
+document with only whitespace after it, `loads_next()` stops parsing after
+the first complete JSON document and reports how many bytes were consumed.
+This allows the caller to continue parsing additional documents from the
+same buffer.
+
+Only binary input (`bytes`, `bytearray`, `memoryview`) is accepted.
+`str` input is not supported and will raise `TypeError`.
+
+The input must be valid UTF-8.
+
+```python
+>>> import orjson
+>>> data = b'{"a":1}{"b":2}{"c":3}'
+>>> obj1, n1 = orjson.loads_next(data)
+>>> obj1
+{'a': 1}
+>>> n1
+7
+>>> obj2, n2 = orjson.loads_next(data[n1:])
+>>> obj2
+{'b': 2}
+>>> obj3, n3 = orjson.loads_next(data[n1 + n2:])
+>>> obj3
+{'c': 3}
+```
+
+This is particularly useful for processing NDJSON (newline-delimited JSON)
+files or streams:
+
+```python
+>>> import orjson
+>>> ndjson_data = b'{"id":1,"value":"a"}\n{"id":2,"value":"b"}\n{"id":3,"value":"c"}\n'
+>>> offset = 0
+>>> results = []
+>>> while offset < len(ndjson_data):
+    ...
+obj, consumed = orjson.loads_next(ndjson_data[offset:])
+...
+results.append(obj)
+...
+offset += consumed
+>>> results
+[{'id': 1, 'value': 'a'}, {'id': 2, 'value': 'b'}, {'id': 3, 'value': 'c'}]
+```
+
+Whitespace (spaces, tabs, newlines, carriage returns) before and after the
+JSON document is consumed and included in the byte count:
+
+```python
+>>> import orjson
+>>> data = b'  {"key": "value"}  \n{"next": true}'
+>>> obj, consumed = orjson.loads_next(data)
+>>> obj
+{'key': 'value'}
+>>> consumed  # Includes leading and trailing whitespace
+21
+```
+
+All of the same error conditions from `loads()` apply to `loads_next()`,
+with the addition of a `TypeError` if given a `str` instead of binary input.
+
+Note that with a large input, the library may attempt to allocate much more
+memory than is actually required to parse the next document and fail accordingly.
+User code may alleviate this by slicing the input to a smaller size if necessary,
+and retrying with a larger slice if there was not enough data to parse a complete
+document.
+
+**When to use:** Use `loads()` when you have exactly one JSON document.
+Use `loads_next()` when you need to parse multiple JSON documents from a
+single buffer or stream, such as NDJSON, concatenated JSON objects, or
+streaming JSON APIs.
+
 ## Types
 
 ### dataclass

diff --git a/include/yyjson/yyjson.c b/include/yyjson/yyjson.c
@@ -5689,7 +5689,6 @@ static_noinline yyjson_doc *read_root_single(u8 *hdr,
         } else {
             while (char_is_space(*cur)) cur++;
         }
-        if (unlikely(cur < end)) goto fail_garbage;
     }
 
     doc = (yyjson_doc *)val_hdr;
@@ -6053,7 +6052,6 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr,
         } else {
             while (char_is_space(*cur)) cur++;
         }
-        if (unlikely(cur < end)) goto fail_garbage;
     }
 
     doc = (yyjson_doc *)val_hdr;
@@ -6454,7 +6452,6 @@ static_inline yyjson_doc *read_root_pretty(u8 *hdr,
         } else {
             while (char_is_space(*cur)) cur++;
         }
-        if (unlikely(cur < end)) goto fail_garbage;
     }
 
     doc = (yyjson_doc *)val_hdr;

diff --git a/pysrc/orjson/__init__.py b/pysrc/orjson/__init__.py
@@ -10,6 +10,7 @@
     "JSONDecodeError",
     "JSONEncodeError",
     "loads",
+    "loads_next",
     "OPT_APPEND_NEWLINE",
     "OPT_INDENT_2",
     "OPT_NAIVE_UTC",

diff --git a/pysrc/orjson/__init__.pyi b/pysrc/orjson/__init__.pyi
@@ -9,6 +9,7 @@ def dumps(
     option: int | None = ...,
 ) -> bytes: ...
 def loads(__obj: bytes | bytearray | memoryview | str) -> Any: ...
+def loads_next(__obj: bytes | bytearray | memoryview) -> tuple[Any, int]: ...
 
 class JSONDecodeError(json.JSONDecodeError): ...
 class JSONEncodeError(TypeError): ...

diff --git a/script/vendor-yyjson b/script/vendor-yyjson
@@ -34,3 +34,5 @@ sed -i 's/(pre && !false)/(false)/g' include/yyjson/yyjson.c
 
 git apply include/yyjson-recursion-limit.patch
 git apply include/yyjson-reduce-unused.patch
+
+sed -i '/        if (unlikely(cur < end)) goto fail_garbage;/d' include/yyjson/yyjson.c
diff --git a/src/deserialize/backend/yyjson.rs b/src/deserialize/backend/yyjson.rs
@@ -4,10 +4,10 @@ use super::ffi::{
     YYJSON_READ_SUCCESS, yyjson_alc, yyjson_alc_pool_init, yyjson_doc, yyjson_read_err,
     yyjson_read_opts, yyjson_val,
 };
-use crate::deserialize::DeserializeError;
 use crate::deserialize::pyobject::{
     get_unicode_key, parse_f64, parse_false, parse_i64, parse_none, parse_true, parse_u64,
 };
+use crate::deserialize::{DeserializeError, DeserializeResult};
 use crate::str::PyStr;
 use crate::util::usize_to_isize;
 use core::ffi::c_char;
@@ -57,6 +57,10 @@ fn unsafe_yyjson_is_ctn(val: *mut yyjson_val) -> bool {
     unsafe { (*val).tag as u8 & 0b00000110 == 0b00000110 }
 }
 
+fn unsafe_yyjson_doc_get_read_size(doc: *mut yyjson_doc) -> usize {
+    unsafe { (*doc).dat_read }
+}
+
 #[allow(clippy::cast_ptr_alignment)]
 fn unsafe_yyjson_get_next_container(val: *mut yyjson_val) -> *mut yyjson_val {
     unsafe { (val.cast::<u8>().add((*val).uni.ofs)).cast::<yyjson_val>() }
@@ -68,8 +72,9 @@ fn unsafe_yyjson_get_next_non_container(val: *mut yyjson_val) -> *mut yyjson_val
 }
 
 pub(crate) fn deserialize(
-    data: &'static str,
-) -> Result<NonNull<crate::ffi::PyObject>, DeserializeError<'static>> {
+    data: &'static [u8],
+    must_read_all: bool,
+) -> Result<DeserializeResult, DeserializeError<'static>> {
     assume!(!data.is_empty());
     let buffer_capacity = buffer_capacity_to_allocate(data.len());
     let buffer_ptr = ffi!(PyMem_Malloc(buffer_capacity));
@@ -109,6 +114,18 @@ pub(crate) fn deserialize(
         let msg: Cow<str> = unsafe { core::ffi::CStr::from_ptr(err.msg).to_string_lossy() };
         return Err(DeserializeError::from_yyjson(msg, err.pos as i64, data));
     }
+
+    let bytes_read = unsafe { unsafe_yyjson_doc_get_read_size(doc) };
+
+    if must_read_all && bytes_read != data.len() {
+        ffi!(PyMem_Free(buffer_ptr));
+        return Err(DeserializeError::from_yyjson(
+            Cow::Borrowed("Did not consume all input data"),
+            bytes_read as i64,
+            data,
+        ));
+    }
+
     let val = yyjson_doc_get_root(doc);
     let pyval = {
         if !unsafe_yyjson_is_ctn(val) {
@@ -140,7 +157,10 @@ pub(crate) fn deserialize(
         }
     };
     ffi!(PyMem_Free(buffer_ptr));
-    Ok(pyval)
+    Ok(DeserializeResult {
+        obj: pyval,
+        bytes_read,
+    })
 }
 
 enum ElementType {

diff --git a/src/deserialize/deserializer.rs b/src/deserialize/deserializer.rs
@@ -5,25 +5,39 @@ use crate::deserialize::utf8::read_input_to_buf;
 use crate::typeref::EMPTY_UNICODE;
 use core::ptr::NonNull;
 
+pub(crate) struct DeserializeResult {
+    pub(crate) obj: NonNull<crate::ffi::PyObject>,
+    pub(crate) bytes_read: usize,
+}
+
 pub(crate) fn deserialize(
     ptr: *mut crate::ffi::PyObject,
-) -> Result<NonNull<crate::ffi::PyObject>, DeserializeError<'static>> {
+    must_read_all: bool,
+) -> Result<DeserializeResult, DeserializeError<'static>> {
     debug_assert!(ffi!(Py_REFCNT(ptr)) >= 1);
     let buffer = read_input_to_buf(ptr)?;
     debug_assert!(!buffer.is_empty());
 
     if buffer.len() == 2 {
         cold_path!();
         if buffer == b"[]" {
-            return Ok(nonnull!(ffi!(PyList_New(0))));
+            return Ok(DeserializeResult {
+                obj: nonnull!(ffi!(PyList_New(0))),
+                bytes_read: 2,
+            });
         } else if buffer == b"{}" {
-            return Ok(nonnull!(ffi!(PyDict_New())));
+            return Ok(DeserializeResult {
+                obj: nonnull!(ffi!(PyDict_New())),
+                bytes_read: 2,
+            });
         } else if buffer == b"\"\"" {
-            unsafe { return Ok(nonnull!(use_immortal!(EMPTY_UNICODE))) }
+            unsafe {
+                return Ok(DeserializeResult {
+                    obj: nonnull!(use_immortal!(EMPTY_UNICODE)),
+                    bytes_read: 2,
+                });
+            }
         }
     }
-
-    let buffer_str = unsafe { core::str::from_utf8_unchecked(buffer) };
-
-    crate::deserialize::backend::deserialize(buffer_str)
+    crate::deserialize::backend::deserialize(buffer, must_read_all)
 }
diff --git a/src/deserialize/error.rs b/src/deserialize/error.rs
@@ -19,7 +19,8 @@ impl<'a> DeserializeError<'a> {
     }
 
     #[cold]
-    pub fn from_yyjson(message: Cow<'a, str>, pos: i64, data: &'a str) -> Self {
+    pub fn from_yyjson(message: Cow<'a, str>, pos: i64, data: &'a [u8]) -> Self {
+        let data = unsafe { core::str::from_utf8_unchecked(data) };
         DeserializeError {
             message: message,
             data: Some(data),

diff --git a/src/deserialize/mod.rs b/src/deserialize/mod.rs
@@ -10,5 +10,5 @@ mod utf8;
 
 #[cfg(not(Py_GIL_DISABLED))]
 pub(crate) use cache::{KEY_MAP, KeyMap};
-pub(crate) use deserializer::deserialize;
+pub(crate) use deserializer::{DeserializeResult, deserialize};
 pub(crate) use error::DeserializeError;
diff --git a/src/lib.rs b/src/lib.rs
@@ -84,12 +84,13 @@ mod typeref;
 
 use crate::ffi::{
     METH_KEYWORDS, METH_O, Py_DECREF, Py_SIZE, Py_ssize_t, PyCFunction_NewEx, PyErr_SetObject,
-    PyLong_AsLong, PyLong_FromLongLong, PyMethodDef, PyMethodDefPointer, PyModuleDef,
-    PyModuleDef_HEAD_INIT, PyModuleDef_Slot, PyObject, PyTuple_New, PyUnicode_FromStringAndSize,
-    PyUnicode_InternFromString, PyVectorcall_NARGS,
+    PyExc_TypeError, PyLong_AsLong, PyLong_FromLongLong, PyMethodDef, PyMethodDefPointer,
+    PyModuleDef, PyModuleDef_HEAD_INIT, PyModuleDef_Slot, PyObject, PyTuple_New,
+    PyUnicode_FromStringAndSize, PyUnicode_InternFromString, PyVectorcall_NARGS,
 };
 use core::ffi::{c_char, c_int, c_void};
 
+use crate::typeref::STR_TYPE;
 use crate::util::{isize_to_usize, usize_to_isize};
 
 #[allow(unused_imports)]
@@ -185,6 +186,25 @@ pub(crate) unsafe extern "C" fn orjson_init_exec(mptr: *mut PyObject) -> c_int {
             add!(mptr, c"loads", func);
         }
 
+        {
+            let loads_next_doc = c"loads_next(obj, /)\n--\n\nDeserialize the next JSON document from an UTF-8 byte buffer, returning the document and number of bytes read.";
+
+            let wrapped_loads_next = PyMethodDef {
+                ml_name: c"loads_next".as_ptr(),
+                ml_meth: PyMethodDefPointer {
+                    PyCFunction: loads_next,
+                },
+                ml_flags: METH_O,
+                ml_doc: loads_next_doc.as_ptr(),
+            };
+            let func = PyCFunction_NewEx(
+                Box::into_raw(Box::new(wrapped_loads_next)),
+                null_mut(),
+                PyUnicode_InternFromString(c"orjson".as_ptr()),
+            );
+            add!(mptr, c"loads_next", func);
+        }
+
         add!(mptr, c"Fragment", typeref::FRAGMENT_TYPE.cast::<PyObject>());
 
         opt!(mptr, c"OPT_APPEND_NEWLINE", opt::APPEND_NEWLINE);
@@ -268,7 +288,7 @@ pub(crate) unsafe extern "C" fn PyInit_orjson() -> *mut PyModuleDef {
 #[cold]
 #[inline(never)]
 #[cfg_attr(feature = "optimize", optimize(size))]
-fn raise_loads_exception(err: deserialize::DeserializeError) -> *mut PyObject {
+pub(crate) fn raise_loads_exception(err: deserialize::DeserializeError) -> *mut PyObject {
     unsafe {
         let err_pos = err.pos();
         let msg = err.message;
@@ -373,8 +393,37 @@ fn raise_dumps_exception_dynamic(err: &str) -> *mut PyObject {
 
 #[unsafe(no_mangle)]
 pub(crate) unsafe extern "C" fn loads(_self: *mut PyObject, obj: *mut PyObject) -> *mut PyObject {
-    match crate::deserialize::deserialize(obj) {
-        Ok(val) => val.as_ptr(),
+    match deserialize::deserialize(obj, true) {
+        Ok(deserialize::DeserializeResult { obj, .. }) => obj.as_ptr(),
+        Err(err) => raise_loads_exception(err),
+    }
+}
+
+#[unsafe(no_mangle)]
+pub(crate) unsafe extern "C" fn loads_next(
+    _self: *mut PyObject,
+    obj: *mut PyObject,
+) -> *mut PyObject {
+    if is_type!(ob_type!(obj), STR_TYPE) {
+        cold_path!();
+        unsafe {
+            let msg = "loads_next requires binary input, not str";
+            let err_msg =
+                PyUnicode_FromStringAndSize(msg.as_ptr().cast::<c_char>(), msg.len() as isize);
+            PyErr_SetObject(PyExc_TypeError, err_msg);
+            Py_DECREF(err_msg);
+            return null_mut();
+        };
+    }
+
+    match deserialize::deserialize(obj, false) {
+        Ok(deserialize::DeserializeResult { obj, bytes_read }) => unsafe {
+            let result_tuple = PyTuple_New(2);
+            ffi::PyTuple_SET_ITEM(result_tuple, 0, obj.as_ptr());
+            let bytes_read_obj = PyLong_FromLongLong(bytes_read as i64);
+            ffi::PyTuple_SET_ITEM(result_tuple, 1, bytes_read_obj);
+            result_tuple
+        },
         Err(err) => raise_loads_exception(err),
     }
 }
Original file line number	Diff line number	Diff line change
Expand Up		@@ -34,3 +34,5 @@ sed -i 's/(pre && !false)/(false)/g' include/yyjson/yyjson.c

		git apply include/yyjson-recursion-limit.patch
		git apply include/yyjson-reduce-unused.patch

		sed -i '/ if (unlikely(cur < end)) goto fail_garbage;/d' include/yyjson/yyjson.c