From 40a61c150adee6beb9961302fece81c33639082e Mon Sep 17 00:00:00 2001
From: Chongchen Chen <chenkovsky@qq.com>
Date: Sun, 16 Feb 2025 02:31:00 +0800
Subject: [PATCH 01/22] add to_timestamp_nanos (#1020)

---
 python/datafusion/functions.py | 1 +
 python/tests/test_functions.py | 4 ++++
 src/functions.rs               | 2 ++
 3 files changed, 7 insertions(+)

diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index 7c2fa9a8f..5c260aade 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -252,6 +252,7 @@
     "to_hex",
     "to_timestamp",
     "to_timestamp_micros",
+    "to_timestamp_nanos",
     "to_timestamp_millis",
     "to_timestamp_seconds",
     "to_unixtime",
diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py
index 796b1f76e..b1a739b49 100644
--- a/python/tests/test_functions.py
+++ b/python/tests/test_functions.py
@@ -871,6 +871,7 @@ def test_temporal_functions(df):
         f.to_timestamp_millis(literal("2023-09-07 05:06:14.523952")),
         f.to_timestamp_micros(literal("2023-09-07 05:06:14.523952")),
         f.extract(literal("day"), column("d")),
+        f.to_timestamp_nanos(literal("2023-09-07 05:06:14.523952")),
     )
     result = df.collect()
     assert len(result) == 1
@@ -909,6 +910,9 @@ def test_temporal_functions(df):
         [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us")
     )
     assert result.column(10) == pa.array([31, 26, 2], type=pa.int32())
+    assert result.column(11) == pa.array(
+        [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns")
+    )
 
 
 def test_arrow_cast(df):
diff --git a/src/functions.rs b/src/functions.rs
index 46c748cf8..6a8abb18d 100644
--- a/src/functions.rs
+++ b/src/functions.rs
@@ -553,6 +553,7 @@ expr_fn!(
 expr_fn!(now);
 expr_fn_vec!(to_timestamp);
 expr_fn_vec!(to_timestamp_millis);
+expr_fn_vec!(to_timestamp_nanos);
 expr_fn_vec!(to_timestamp_micros);
 expr_fn_vec!(to_timestamp_seconds);
 expr_fn_vec!(to_unixtime);
@@ -977,6 +978,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_wrapped(wrap_pyfunction!(to_hex))?;
     m.add_wrapped(wrap_pyfunction!(to_timestamp))?;
     m.add_wrapped(wrap_pyfunction!(to_timestamp_millis))?;
+    m.add_wrapped(wrap_pyfunction!(to_timestamp_nanos))?;
     m.add_wrapped(wrap_pyfunction!(to_timestamp_micros))?;
     m.add_wrapped(wrap_pyfunction!(to_timestamp_seconds))?;
     m.add_wrapped(wrap_pyfunction!(to_unixtime))?;

From 3584bec8900bcfb33bcae4b85a3c47a46b82c72e Mon Sep 17 00:00:00 2001
From: Kevin Liu <kevinjqliu@users.noreply.github.com>
Date: Wed, 19 Feb 2025 20:50:31 -0500
Subject: [PATCH 02/22] [infra] Fail Clippy on rust build warnings (#1029)

* pyo3 update required changes to deprecated interfaces

* Substrait feature clippy updates

* PyTuple was called twice

* add -D warnings option

---------

Co-authored-by: Tim Saucer <timsaucer@gmail.com>
---
 .github/workflows/test.yaml      |  2 +-
 .pre-commit-config.yaml          |  2 +-
 src/config.rs                    | 10 +++---
 src/context.rs                   | 12 +++----
 src/dataframe.rs                 | 17 +++++----
 src/dataset.rs                   |  2 +-
 src/dataset_exec.rs              |  8 ++---
 src/errors.rs                    |  4 +++
 src/expr.rs                      | 61 ++++++++++++++++----------------
 src/expr/aggregate.rs            |  6 ++--
 src/expr/analyze.rs              |  6 ++--
 src/expr/create_memory_table.rs  |  6 ++--
 src/expr/create_view.rs          |  6 ++--
 src/expr/distinct.rs             |  6 ++--
 src/expr/drop_table.rs           |  6 ++--
 src/expr/empty_relation.rs       |  6 ++--
 src/expr/explain.rs              |  6 ++--
 src/expr/extension.rs            |  6 ++--
 src/expr/filter.rs               |  6 ++--
 src/expr/join.rs                 |  6 ++--
 src/expr/limit.rs                |  6 ++--
 src/expr/literal.rs              |  6 ++--
 src/expr/logical_node.rs         |  4 +--
 src/expr/projection.rs           |  6 ++--
 src/expr/repartition.rs          |  6 ++--
 src/expr/sort.rs                 |  6 ++--
 src/expr/subquery.rs             |  6 ++--
 src/expr/subquery_alias.rs       |  6 ++--
 src/expr/table_scan.rs           |  6 ++--
 src/expr/union.rs                |  6 ++--
 src/expr/unnest.rs               |  6 ++--
 src/expr/window.rs               |  6 ++--
 src/lib.rs                       | 10 +++---
 src/physical_plan.rs             |  2 +-
 src/pyarrow_filter_expression.rs | 36 ++++++++++---------
 src/pyarrow_util.rs              |  4 +--
 src/sql/logical.rs               |  4 +--
 src/substrait.rs                 |  4 +--
 src/udaf.rs                      |  5 +--
 src/udf.rs                       |  5 +--
 src/udwf.rs                      | 44 +++++++++++------------
 41 files changed, 188 insertions(+), 180 deletions(-)

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index c93d4c06f..c1d9ac838 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -71,7 +71,7 @@ jobs:
 
       - name: Run Clippy
         if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }}
-        run: cargo clippy --all-targets --all-features -- -D clippy::all -A clippy::redundant_closure
+        run: cargo clippy --all-targets --all-features -- -D clippy::all -D warnings -A clippy::redundant_closure
 
       - name: Install dependencies and build
         uses: astral-sh/setup-uv@v5
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e20fedf5c..b548ff18f 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -40,7 +40,7 @@ repos:
           - id: rust-clippy
             name: Rust clippy
             description: Run cargo clippy on files included in the commit. clippy should be installed before-hand.
-            entry: cargo clippy --all-targets --all-features -- -Dclippy::all -Aclippy::redundant_closure
+            entry: cargo clippy --all-targets --all-features -- -Dclippy::all -D warnings -Aclippy::redundant_closure
             pass_filenames: false
             types: [file, rust]
             language: system
diff --git a/src/config.rs b/src/config.rs
index cc725b9a3..667d5c590 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -47,14 +47,14 @@ impl PyConfig {
     }
 
     /// Get a configuration option
-    pub fn get(&mut self, key: &str, py: Python) -> PyResult<PyObject> {
+    pub fn get<'py>(&mut self, key: &str, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         let options = self.config.to_owned();
         for entry in options.entries() {
             if entry.key == key {
-                return Ok(entry.value.into_py(py));
+                return Ok(entry.value.into_pyobject(py)?);
             }
         }
-        Ok(None::<String>.into_py(py))
+        Ok(None::<String>.into_pyobject(py)?)
     }
 
     /// Set a configuration option
@@ -66,10 +66,10 @@ impl PyConfig {
 
     /// Get all configuration options
     pub fn get_all(&mut self, py: Python) -> PyResult<PyObject> {
-        let dict = PyDict::new_bound(py);
+        let dict = PyDict::new(py);
         let options = self.config.to_owned();
         for entry in options.entries() {
-            dict.set_item(entry.key, entry.value.clone().into_py(py))?;
+            dict.set_item(entry.key, entry.value.clone().into_pyobject(py)?)?;
         }
         Ok(dict.into())
     }
diff --git a/src/context.rs b/src/context.rs
index ebe7db230..0f962638e 100644
--- a/src/context.rs
+++ b/src/context.rs
@@ -458,8 +458,8 @@ impl PySessionContext {
         let py = data.py();
 
         // Instantiate pyarrow Table object & convert to Arrow Table
-        let table_class = py.import_bound("pyarrow")?.getattr("Table")?;
-        let args = PyTuple::new_bound(py, &[data]);
+        let table_class = py.import("pyarrow")?.getattr("Table")?;
+        let args = PyTuple::new(py, &[data])?;
         let table = table_class.call_method1("from_pylist", args)?;
 
         // Convert Arrow Table to datafusion DataFrame
@@ -478,8 +478,8 @@ impl PySessionContext {
         let py = data.py();
 
         // Instantiate pyarrow Table object & convert to Arrow Table
-        let table_class = py.import_bound("pyarrow")?.getattr("Table")?;
-        let args = PyTuple::new_bound(py, &[data]);
+        let table_class = py.import("pyarrow")?.getattr("Table")?;
+        let args = PyTuple::new(py, &[data])?;
         let table = table_class.call_method1("from_pydict", args)?;
 
         // Convert Arrow Table to datafusion DataFrame
@@ -533,8 +533,8 @@ impl PySessionContext {
         let py = data.py();
 
         // Instantiate pyarrow Table object & convert to Arrow Table
-        let table_class = py.import_bound("pyarrow")?.getattr("Table")?;
-        let args = PyTuple::new_bound(py, &[data]);
+        let table_class = py.import("pyarrow")?.getattr("Table")?;
+        let args = PyTuple::new(py, &[data])?;
         let table = table_class.call_method1("from_pandas", args)?;
 
         // Convert Arrow Table to datafusion DataFrame
diff --git a/src/dataframe.rs b/src/dataframe.rs
index 13d7ae838..ed9578a71 100644
--- a/src/dataframe.rs
+++ b/src/dataframe.rs
@@ -545,12 +545,12 @@ impl PyDataFrame {
     /// Convert to Arrow Table
     /// Collect the batches and pass to Arrow Table
     fn to_arrow_table(&self, py: Python<'_>) -> PyResult<PyObject> {
-        let batches = self.collect(py)?.to_object(py);
-        let schema: PyObject = self.schema().into_pyobject(py)?.to_object(py);
+        let batches = self.collect(py)?.into_pyobject(py)?;
+        let schema = self.schema().into_pyobject(py)?;
 
         // Instantiate pyarrow Table object and use its from_batches method
-        let table_class = py.import_bound("pyarrow")?.getattr("Table")?;
-        let args = PyTuple::new_bound(py, &[batches, schema]);
+        let table_class = py.import("pyarrow")?.getattr("Table")?;
+        let args = PyTuple::new(py, &[batches, schema])?;
         let table: PyObject = table_class.call_method1("from_batches", args)?.into();
         Ok(table)
     }
@@ -585,8 +585,7 @@ impl PyDataFrame {
 
         let ffi_stream = FFI_ArrowArrayStream::new(reader);
         let stream_capsule_name = CString::new("arrow_array_stream").unwrap();
-        PyCapsule::new_bound(py, ffi_stream, Some(stream_capsule_name))
-            .map_err(PyDataFusionError::from)
+        PyCapsule::new(py, ffi_stream, Some(stream_capsule_name)).map_err(PyDataFusionError::from)
     }
 
     fn execute_stream(&self, py: Python) -> PyDataFusionResult<PyRecordBatchStream> {
@@ -649,8 +648,8 @@ impl PyDataFrame {
     /// Collect the batches, pass to Arrow Table & then convert to polars DataFrame
     fn to_polars(&self, py: Python<'_>) -> PyResult<PyObject> {
         let table = self.to_arrow_table(py)?;
-        let dataframe = py.import_bound("polars")?.getattr("DataFrame")?;
-        let args = PyTuple::new_bound(py, &[table]);
+        let dataframe = py.import("polars")?.getattr("DataFrame")?;
+        let args = PyTuple::new(py, &[table])?;
         let result: PyObject = dataframe.call1(args)?.into();
         Ok(result)
     }
@@ -673,7 +672,7 @@ fn print_dataframe(py: Python, df: DataFrame) -> PyDataFusionResult<()> {
 
     // Import the Python 'builtins' module to access the print function
     // Note that println! does not print to the Python debug console and is not visible in notebooks for instance
-    let print = py.import_bound("builtins")?.getattr("print")?;
+    let print = py.import("builtins")?.getattr("print")?;
     print.call1((result,))?;
     Ok(())
 }
diff --git a/src/dataset.rs b/src/dataset.rs
index a8fa21ec5..0baf4da2a 100644
--- a/src/dataset.rs
+++ b/src/dataset.rs
@@ -48,7 +48,7 @@ impl Dataset {
     // Creates a Python PyArrow.Dataset
     pub fn new(dataset: &Bound<'_, PyAny>, py: Python) -> PyResult<Self> {
         // Ensure that we were passed an instance of pyarrow.dataset.Dataset
-        let ds = PyModule::import_bound(py, "pyarrow.dataset")?;
+        let ds = PyModule::import(py, "pyarrow.dataset")?;
         let ds_attr = ds.getattr("Dataset")?;
         let ds_type = ds_attr.downcast::<PyType>()?;
         if dataset.is_instance(ds_type)? {
diff --git a/src/dataset_exec.rs b/src/dataset_exec.rs
index ace42115b..445e4fe74 100644
--- a/src/dataset_exec.rs
+++ b/src/dataset_exec.rs
@@ -104,7 +104,7 @@ impl DatasetExec {
             })
             .transpose()?;
 
-        let kwargs = PyDict::new_bound(py);
+        let kwargs = PyDict::new(py);
 
         kwargs.set_item("columns", columns.clone())?;
         kwargs.set_item(
@@ -121,7 +121,7 @@ impl DatasetExec {
                 .0,
         );
 
-        let builtins = Python::import_bound(py, "builtins")?;
+        let builtins = Python::import(py, "builtins")?;
         let pylist = builtins.getattr("list")?;
 
         // Get the fragments or partitions of the dataset
@@ -198,7 +198,7 @@ impl ExecutionPlan for DatasetExec {
             let dataset_schema = dataset
                 .getattr("schema")
                 .map_err(|err| InnerDataFusionError::External(Box::new(err)))?;
-            let kwargs = PyDict::new_bound(py);
+            let kwargs = PyDict::new(py);
             kwargs
                 .set_item("columns", self.columns.clone())
                 .map_err(|err| InnerDataFusionError::External(Box::new(err)))?;
@@ -223,7 +223,7 @@ impl ExecutionPlan for DatasetExec {
             let record_batches: Bound<'_, PyIterator> = scanner
                 .call_method0("to_batches")
                 .map_err(|err| InnerDataFusionError::External(Box::new(err)))?
-                .iter()
+                .try_iter()
                 .map_err(|err| InnerDataFusionError::External(Box::new(err)))?;
 
             let record_batches = PyArrowBatchesAdapter {
diff --git a/src/errors.rs b/src/errors.rs
index b02b754a2..f1d5aeb23 100644
--- a/src/errors.rs
+++ b/src/errors.rs
@@ -91,3 +91,7 @@ pub fn py_datafusion_err(e: impl Debug) -> PyErr {
 pub fn py_unsupported_variant_err(e: impl Debug) -> PyErr {
     PyErr::new::<pyo3::exceptions::PyValueError, _>(format!("{e:?}"))
 }
+
+pub fn to_datafusion_err(e: impl Debug) -> InnerDataFusionError {
+    InnerDataFusionError::Execution(format!("{e:?}"))
+}
diff --git a/src/expr.rs b/src/expr.rs
index 1e9983d42..e750be6a4 100644
--- a/src/expr.rs
+++ b/src/expr.rs
@@ -19,6 +19,7 @@ use datafusion::logical_expr::utils::exprlist_to_fields;
 use datafusion::logical_expr::{
     ExprFuncBuilder, ExprFunctionExt, LogicalPlan, WindowFunctionDefinition,
 };
+use pyo3::IntoPyObjectExt;
 use pyo3::{basic::CompareOp, prelude::*};
 use std::convert::{From, Into};
 use std::sync::Arc;
@@ -126,35 +127,35 @@ pub fn py_expr_list(expr: &[Expr]) -> PyResult<Vec<PyExpr>> {
 #[pymethods]
 impl PyExpr {
     /// Return the specific expression
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         Python::with_gil(|_| {
             match &self.expr {
-            Expr::Alias(alias) => Ok(PyAlias::from(alias.clone()).into_py(py)),
-            Expr::Column(col) => Ok(PyColumn::from(col.clone()).into_py(py)),
+            Expr::Alias(alias) => Ok(PyAlias::from(alias.clone()).into_bound_py_any(py)?),
+            Expr::Column(col) => Ok(PyColumn::from(col.clone()).into_bound_py_any(py)?),
             Expr::ScalarVariable(data_type, variables) => {
-                Ok(PyScalarVariable::new(data_type, variables).into_py(py))
+                Ok(PyScalarVariable::new(data_type, variables).into_bound_py_any(py)?)
             }
-            Expr::Like(value) => Ok(PyLike::from(value.clone()).into_py(py)),
-            Expr::Literal(value) => Ok(PyLiteral::from(value.clone()).into_py(py)),
-            Expr::BinaryExpr(expr) => Ok(PyBinaryExpr::from(expr.clone()).into_py(py)),
-            Expr::Not(expr) => Ok(PyNot::new(*expr.clone()).into_py(py)),
-            Expr::IsNotNull(expr) => Ok(PyIsNotNull::new(*expr.clone()).into_py(py)),
-            Expr::IsNull(expr) => Ok(PyIsNull::new(*expr.clone()).into_py(py)),
-            Expr::IsTrue(expr) => Ok(PyIsTrue::new(*expr.clone()).into_py(py)),
-            Expr::IsFalse(expr) => Ok(PyIsFalse::new(*expr.clone()).into_py(py)),
-            Expr::IsUnknown(expr) => Ok(PyIsUnknown::new(*expr.clone()).into_py(py)),
-            Expr::IsNotTrue(expr) => Ok(PyIsNotTrue::new(*expr.clone()).into_py(py)),
-            Expr::IsNotFalse(expr) => Ok(PyIsNotFalse::new(*expr.clone()).into_py(py)),
-            Expr::IsNotUnknown(expr) => Ok(PyIsNotUnknown::new(*expr.clone()).into_py(py)),
-            Expr::Negative(expr) => Ok(PyNegative::new(*expr.clone()).into_py(py)),
+            Expr::Like(value) => Ok(PyLike::from(value.clone()).into_bound_py_any(py)?),
+            Expr::Literal(value) => Ok(PyLiteral::from(value.clone()).into_bound_py_any(py)?),
+            Expr::BinaryExpr(expr) => Ok(PyBinaryExpr::from(expr.clone()).into_bound_py_any(py)?),
+            Expr::Not(expr) => Ok(PyNot::new(*expr.clone()).into_bound_py_any(py)?),
+            Expr::IsNotNull(expr) => Ok(PyIsNotNull::new(*expr.clone()).into_bound_py_any(py)?),
+            Expr::IsNull(expr) => Ok(PyIsNull::new(*expr.clone()).into_bound_py_any(py)?),
+            Expr::IsTrue(expr) => Ok(PyIsTrue::new(*expr.clone()).into_bound_py_any(py)?),
+            Expr::IsFalse(expr) => Ok(PyIsFalse::new(*expr.clone()).into_bound_py_any(py)?),
+            Expr::IsUnknown(expr) => Ok(PyIsUnknown::new(*expr.clone()).into_bound_py_any(py)?),
+            Expr::IsNotTrue(expr) => Ok(PyIsNotTrue::new(*expr.clone()).into_bound_py_any(py)?),
+            Expr::IsNotFalse(expr) => Ok(PyIsNotFalse::new(*expr.clone()).into_bound_py_any(py)?),
+            Expr::IsNotUnknown(expr) => Ok(PyIsNotUnknown::new(*expr.clone()).into_bound_py_any(py)?),
+            Expr::Negative(expr) => Ok(PyNegative::new(*expr.clone()).into_bound_py_any(py)?),
             Expr::AggregateFunction(expr) => {
-                Ok(PyAggregateFunction::from(expr.clone()).into_py(py))
+                Ok(PyAggregateFunction::from(expr.clone()).into_bound_py_any(py)?)
             }
-            Expr::SimilarTo(value) => Ok(PySimilarTo::from(value.clone()).into_py(py)),
-            Expr::Between(value) => Ok(between::PyBetween::from(value.clone()).into_py(py)),
-            Expr::Case(value) => Ok(case::PyCase::from(value.clone()).into_py(py)),
-            Expr::Cast(value) => Ok(cast::PyCast::from(value.clone()).into_py(py)),
-            Expr::TryCast(value) => Ok(cast::PyTryCast::from(value.clone()).into_py(py)),
+            Expr::SimilarTo(value) => Ok(PySimilarTo::from(value.clone()).into_bound_py_any(py)?),
+            Expr::Between(value) => Ok(between::PyBetween::from(value.clone()).into_bound_py_any(py)?),
+            Expr::Case(value) => Ok(case::PyCase::from(value.clone()).into_bound_py_any(py)?),
+            Expr::Cast(value) => Ok(cast::PyCast::from(value.clone()).into_bound_py_any(py)?),
+            Expr::TryCast(value) => Ok(cast::PyTryCast::from(value.clone()).into_bound_py_any(py)?),
             Expr::ScalarFunction(value) => Err(py_unsupported_variant_err(format!(
                 "Converting Expr::ScalarFunction to a Python object is not implemented: {:?}",
                 value
@@ -163,29 +164,29 @@ impl PyExpr {
                 "Converting Expr::WindowFunction to a Python object is not implemented: {:?}",
                 value
             ))),
-            Expr::InList(value) => Ok(in_list::PyInList::from(value.clone()).into_py(py)),
-            Expr::Exists(value) => Ok(exists::PyExists::from(value.clone()).into_py(py)),
+            Expr::InList(value) => Ok(in_list::PyInList::from(value.clone()).into_bound_py_any(py)?),
+            Expr::Exists(value) => Ok(exists::PyExists::from(value.clone()).into_bound_py_any(py)?),
             Expr::InSubquery(value) => {
-                Ok(in_subquery::PyInSubquery::from(value.clone()).into_py(py))
+                Ok(in_subquery::PyInSubquery::from(value.clone()).into_bound_py_any(py)?)
             }
             Expr::ScalarSubquery(value) => {
-                Ok(scalar_subquery::PyScalarSubquery::from(value.clone()).into_py(py))
+                Ok(scalar_subquery::PyScalarSubquery::from(value.clone()).into_bound_py_any(py)?)
             }
             Expr::Wildcard { qualifier, options } => Err(py_unsupported_variant_err(format!(
                 "Converting Expr::Wildcard to a Python object is not implemented : {:?} {:?}",
                 qualifier, options
             ))),
             Expr::GroupingSet(value) => {
-                Ok(grouping_set::PyGroupingSet::from(value.clone()).into_py(py))
+                Ok(grouping_set::PyGroupingSet::from(value.clone()).into_bound_py_any(py)?)
             }
             Expr::Placeholder(value) => {
-                Ok(placeholder::PyPlaceholder::from(value.clone()).into_py(py))
+                Ok(placeholder::PyPlaceholder::from(value.clone()).into_bound_py_any(py)?)
             }
             Expr::OuterReferenceColumn(data_type, column) => Err(py_unsupported_variant_err(format!(
                 "Converting Expr::OuterReferenceColumn to a Python object is not implemented: {:?} - {:?}",
                 data_type, column
             ))),
-            Expr::Unnest(value) => Ok(unnest_expr::PyUnnestExpr::from(value.clone()).into_py(py)),
+            Expr::Unnest(value) => Ok(unnest_expr::PyUnnestExpr::from(value.clone()).into_bound_py_any(py)?),
         }
         })
     }
diff --git a/src/expr/aggregate.rs b/src/expr/aggregate.rs
index 389bfb332..8fc9da5b0 100644
--- a/src/expr/aggregate.rs
+++ b/src/expr/aggregate.rs
@@ -19,7 +19,7 @@ use datafusion::common::DataFusionError;
 use datafusion::logical_expr::expr::{AggregateFunction, Alias};
 use datafusion::logical_expr::logical_plan::Aggregate;
 use datafusion::logical_expr::Expr;
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 use std::fmt::{self, Display, Formatter};
 
 use super::logical_node::LogicalNode;
@@ -151,7 +151,7 @@ impl LogicalNode for PyAggregate {
         vec![PyLogicalPlan::from((*self.aggregate.input).clone())]
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/expr/analyze.rs b/src/expr/analyze.rs
index 084513971..62f93cd26 100644
--- a/src/expr/analyze.rs
+++ b/src/expr/analyze.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use datafusion::logical_expr::logical_plan::Analyze;
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 use std::fmt::{self, Display, Formatter};
 
 use super::logical_node::LogicalNode;
@@ -78,7 +78,7 @@ impl LogicalNode for PyAnalyze {
         vec![PyLogicalPlan::from((*self.analyze.input).clone())]
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/expr/create_memory_table.rs b/src/expr/create_memory_table.rs
index 01ebb66b0..8872b2d47 100644
--- a/src/expr/create_memory_table.rs
+++ b/src/expr/create_memory_table.rs
@@ -18,7 +18,7 @@
 use std::fmt::{self, Display, Formatter};
 
 use datafusion::logical_expr::CreateMemoryTable;
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 
 use crate::sql::logical::PyLogicalPlan;
 
@@ -91,7 +91,7 @@ impl LogicalNode for PyCreateMemoryTable {
         vec![PyLogicalPlan::from((*self.create.input).clone())]
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/expr/create_view.rs b/src/expr/create_view.rs
index d119f5c21..87bb76876 100644
--- a/src/expr/create_view.rs
+++ b/src/expr/create_view.rs
@@ -18,7 +18,7 @@
 use std::fmt::{self, Display, Formatter};
 
 use datafusion::logical_expr::{CreateView, DdlStatement, LogicalPlan};
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 
 use crate::{errors::py_type_err, sql::logical::PyLogicalPlan};
 
@@ -88,8 +88,8 @@ impl LogicalNode for PyCreateView {
         vec![PyLogicalPlan::from((*self.create.input).clone())]
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
 
diff --git a/src/expr/distinct.rs b/src/expr/distinct.rs
index 061ab4824..b62b776f8 100644
--- a/src/expr/distinct.rs
+++ b/src/expr/distinct.rs
@@ -18,7 +18,7 @@
 use std::fmt::{self, Display, Formatter};
 
 use datafusion::logical_expr::Distinct;
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 
 use crate::sql::logical::PyLogicalPlan;
 
@@ -89,7 +89,7 @@ impl LogicalNode for PyDistinct {
         }
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/expr/drop_table.rs b/src/expr/drop_table.rs
index 330156abe..96983c1cf 100644
--- a/src/expr/drop_table.rs
+++ b/src/expr/drop_table.rs
@@ -18,7 +18,7 @@
 use std::fmt::{self, Display, Formatter};
 
 use datafusion::logical_expr::logical_plan::DropTable;
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 
 use crate::sql::logical::PyLogicalPlan;
 
@@ -83,7 +83,7 @@ impl LogicalNode for PyDropTable {
         vec![]
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/expr/empty_relation.rs b/src/expr/empty_relation.rs
index ce7163466..a1534ac15 100644
--- a/src/expr/empty_relation.rs
+++ b/src/expr/empty_relation.rs
@@ -17,7 +17,7 @@
 
 use crate::{common::df_schema::PyDFSchema, sql::logical::PyLogicalPlan};
 use datafusion::logical_expr::EmptyRelation;
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 use std::fmt::{self, Display, Formatter};
 
 use super::logical_node::LogicalNode;
@@ -79,7 +79,7 @@ impl LogicalNode for PyEmptyRelation {
         vec![]
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/expr/explain.rs b/src/expr/explain.rs
index 8e7fb8843..fc02fe2b5 100644
--- a/src/expr/explain.rs
+++ b/src/expr/explain.rs
@@ -18,7 +18,7 @@
 use std::fmt::{self, Display, Formatter};
 
 use datafusion::logical_expr::{logical_plan::Explain, LogicalPlan};
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 
 use crate::{common::df_schema::PyDFSchema, errors::py_type_err, sql::logical::PyLogicalPlan};
 
@@ -104,7 +104,7 @@ impl LogicalNode for PyExplain {
         vec![]
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/expr/extension.rs b/src/expr/extension.rs
index a29802b0b..1e3fbb199 100644
--- a/src/expr/extension.rs
+++ b/src/expr/extension.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use datafusion::logical_expr::Extension;
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 
 use crate::sql::logical::PyLogicalPlan;
 
@@ -46,7 +46,7 @@ impl LogicalNode for PyExtension {
         vec![]
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/expr/filter.rs b/src/expr/filter.rs
index a6d8aa7ee..9bdb667cd 100644
--- a/src/expr/filter.rs
+++ b/src/expr/filter.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use datafusion::logical_expr::logical_plan::Filter;
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 use std::fmt::{self, Display, Formatter};
 
 use crate::common::df_schema::PyDFSchema;
@@ -81,7 +81,7 @@ impl LogicalNode for PyFilter {
         vec![PyLogicalPlan::from((*self.filter.input).clone())]
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/expr/join.rs b/src/expr/join.rs
index 66e677f8a..76ec532e7 100644
--- a/src/expr/join.rs
+++ b/src/expr/join.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use datafusion::logical_expr::logical_plan::{Join, JoinConstraint, JoinType};
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 use std::fmt::{self, Display, Formatter};
 
 use crate::common::df_schema::PyDFSchema;
@@ -193,7 +193,7 @@ impl LogicalNode for PyJoin {
         ]
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/expr/limit.rs b/src/expr/limit.rs
index 84ad7d68b..c2a33ff89 100644
--- a/src/expr/limit.rs
+++ b/src/expr/limit.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use datafusion::logical_expr::logical_plan::Limit;
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 use std::fmt::{self, Display, Formatter};
 
 use crate::common::df_schema::PyDFSchema;
@@ -90,7 +90,7 @@ impl LogicalNode for PyLimit {
         vec![PyLogicalPlan::from((*self.limit.input).clone())]
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/expr/literal.rs b/src/expr/literal.rs
index 2cb2079f1..a660ac914 100644
--- a/src/expr/literal.rs
+++ b/src/expr/literal.rs
@@ -17,7 +17,7 @@
 
 use crate::errors::PyDataFusionError;
 use datafusion::common::ScalarValue;
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 
 #[pyclass(name = "Literal", module = "datafusion.expr", subclass)]
 #[derive(Clone)]
@@ -144,8 +144,8 @@ impl PyLiteral {
     }
 
     #[allow(clippy::wrong_self_convention)]
-    fn into_type(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn into_type<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 
     fn __repr__(&self) -> PyResult<String> {
diff --git a/src/expr/logical_node.rs b/src/expr/logical_node.rs
index 757e4f94b..5aff70059 100644
--- a/src/expr/logical_node.rs
+++ b/src/expr/logical_node.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use pyo3::{PyObject, PyResult, Python};
+use pyo3::{Bound, PyAny, PyResult, Python};
 
 use crate::sql::logical::PyLogicalPlan;
 
@@ -25,5 +25,5 @@ pub trait LogicalNode {
     /// The input plan to the current logical node instance.
     fn inputs(&self) -> Vec<PyLogicalPlan>;
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject>;
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>>;
 }
diff --git a/src/expr/projection.rs b/src/expr/projection.rs
index 36534fdb2..dc7e5e3c1 100644
--- a/src/expr/projection.rs
+++ b/src/expr/projection.rs
@@ -17,7 +17,7 @@
 
 use datafusion::logical_expr::logical_plan::Projection;
 use datafusion::logical_expr::Expr;
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 use std::fmt::{self, Display, Formatter};
 
 use crate::common::df_schema::PyDFSchema;
@@ -113,7 +113,7 @@ impl LogicalNode for PyProjection {
         vec![PyLogicalPlan::from((*self.projection.input).clone())]
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/expr/repartition.rs b/src/expr/repartition.rs
index 4e680e181..3e782d6af 100644
--- a/src/expr/repartition.rs
+++ b/src/expr/repartition.rs
@@ -18,7 +18,7 @@
 use std::fmt::{self, Display, Formatter};
 
 use datafusion::logical_expr::{logical_plan::Repartition, Expr, Partitioning};
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 
 use crate::{errors::py_type_err, sql::logical::PyLogicalPlan};
 
@@ -121,7 +121,7 @@ impl LogicalNode for PyRepartition {
         vec![PyLogicalPlan::from((*self.repartition.input).clone())]
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/expr/sort.rs b/src/expr/sort.rs
index a1803ccaf..ed4947591 100644
--- a/src/expr/sort.rs
+++ b/src/expr/sort.rs
@@ -17,7 +17,7 @@
 
 use datafusion::common::DataFusionError;
 use datafusion::logical_expr::logical_plan::Sort;
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 use std::fmt::{self, Display, Formatter};
 
 use crate::common::df_schema::PyDFSchema;
@@ -96,7 +96,7 @@ impl LogicalNode for PySort {
         vec![PyLogicalPlan::from((*self.sort.input).clone())]
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/expr/subquery.rs b/src/expr/subquery.rs
index dac8d0a2b..5ebfe6927 100644
--- a/src/expr/subquery.rs
+++ b/src/expr/subquery.rs
@@ -18,7 +18,7 @@
 use std::fmt::{self, Display, Formatter};
 
 use datafusion::logical_expr::Subquery;
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 
 use crate::sql::logical::PyLogicalPlan;
 
@@ -75,7 +75,7 @@ impl LogicalNode for PySubquery {
         vec![]
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/expr/subquery_alias.rs b/src/expr/subquery_alias.rs
index a83cff96d..267a4d485 100644
--- a/src/expr/subquery_alias.rs
+++ b/src/expr/subquery_alias.rs
@@ -18,7 +18,7 @@
 use std::fmt::{self, Display, Formatter};
 
 use datafusion::logical_expr::SubqueryAlias;
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 
 use crate::{common::df_schema::PyDFSchema, sql::logical::PyLogicalPlan};
 
@@ -85,7 +85,7 @@ impl LogicalNode for PySubqueryAlias {
         vec![PyLogicalPlan::from((*self.subquery_alias.input).clone())]
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/expr/table_scan.rs b/src/expr/table_scan.rs
index f61be7fe4..6a0d53f0f 100644
--- a/src/expr/table_scan.rs
+++ b/src/expr/table_scan.rs
@@ -17,7 +17,7 @@
 
 use datafusion::common::TableReference;
 use datafusion::logical_expr::logical_plan::TableScan;
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 use std::fmt::{self, Display, Formatter};
 
 use crate::expr::logical_node::LogicalNode;
@@ -146,7 +146,7 @@ impl LogicalNode for PyTableScan {
         vec![]
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/expr/union.rs b/src/expr/union.rs
index 62488d9a1..5a08ccc13 100644
--- a/src/expr/union.rs
+++ b/src/expr/union.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use datafusion::logical_expr::logical_plan::Union;
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 use std::fmt::{self, Display, Formatter};
 
 use crate::common::df_schema::PyDFSchema;
@@ -83,7 +83,7 @@ impl LogicalNode for PyUnion {
             .collect()
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/expr/unnest.rs b/src/expr/unnest.rs
index adc705035..8e70e0990 100644
--- a/src/expr/unnest.rs
+++ b/src/expr/unnest.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use datafusion::logical_expr::logical_plan::Unnest;
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 use std::fmt::{self, Display, Formatter};
 
 use crate::common::df_schema::PyDFSchema;
@@ -79,7 +79,7 @@ impl LogicalNode for PyUnnest {
         vec![PyLogicalPlan::from((*self.unnest_.input).clone())]
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/expr/window.rs b/src/expr/window.rs
index 4dc6cb9c9..13deaec25 100644
--- a/src/expr/window.rs
+++ b/src/expr/window.rs
@@ -18,7 +18,7 @@
 use datafusion::common::{DataFusionError, ScalarValue};
 use datafusion::logical_expr::expr::WindowFunction;
 use datafusion::logical_expr::{Expr, Window, WindowFrame, WindowFrameBound, WindowFrameUnits};
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 use std::fmt::{self, Display, Formatter};
 
 use crate::common::data_type::PyScalarValue;
@@ -289,7 +289,7 @@ impl LogicalNode for PyWindowExpr {
         vec![self.window.input.as_ref().clone().into()]
     }
 
-    fn to_variant(&self, py: Python) -> PyResult<PyObject> {
-        Ok(self.clone().into_py(py))
+    fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        self.clone().into_bound_py_any(py)
     }
 }
diff --git a/src/lib.rs b/src/lib.rs
index 317c3a49a..ce93ff0c3 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -94,21 +94,21 @@ fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<record_batch::PyRecordBatchStream>()?;
 
     // Register `common` as a submodule. Matching `datafusion-common` https://docs.rs/datafusion-common/latest/datafusion_common/
-    let common = PyModule::new_bound(py, "common")?;
+    let common = PyModule::new(py, "common")?;
     common::init_module(&common)?;
     m.add_submodule(&common)?;
 
     // Register `expr` as a submodule. Matching `datafusion-expr` https://docs.rs/datafusion-expr/latest/datafusion_expr/
-    let expr = PyModule::new_bound(py, "expr")?;
+    let expr = PyModule::new(py, "expr")?;
     expr::init_module(&expr)?;
     m.add_submodule(&expr)?;
 
     // Register the functions as a submodule
-    let funcs = PyModule::new_bound(py, "functions")?;
+    let funcs = PyModule::new(py, "functions")?;
     functions::init_module(&funcs)?;
     m.add_submodule(&funcs)?;
 
-    let store = PyModule::new_bound(py, "object_store")?;
+    let store = PyModule::new(py, "object_store")?;
     store::init_module(&store)?;
     m.add_submodule(&store)?;
 
@@ -121,7 +121,7 @@ fn _internal(py: Python, m: Bound<'_, PyModule>) -> PyResult<()> {
 
 #[cfg(feature = "substrait")]
 fn setup_substrait_module(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
-    let substrait = PyModule::new_bound(py, "substrait")?;
+    let substrait = PyModule::new(py, "substrait")?;
     substrait::init_module(&substrait)?;
     m.add_submodule(&substrait)?;
     Ok(())
diff --git a/src/physical_plan.rs b/src/physical_plan.rs
index 295908dc7..f0be45c6a 100644
--- a/src/physical_plan.rs
+++ b/src/physical_plan.rs
@@ -66,7 +66,7 @@ impl PyExecutionPlan {
         )?;
 
         let bytes = proto.encode_to_vec();
-        Ok(PyBytes::new_bound(py, &bytes))
+        Ok(PyBytes::new(py, &bytes))
     }
 
     #[staticmethod]
diff --git a/src/pyarrow_filter_expression.rs b/src/pyarrow_filter_expression.rs
index 314eebf4f..4b4c86597 100644
--- a/src/pyarrow_filter_expression.rs
+++ b/src/pyarrow_filter_expression.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 /// Converts a Datafusion logical plan expression (Expr) into a PyArrow compute expression
-use pyo3::prelude::*;
+use pyo3::{prelude::*, IntoPyObjectExt};
 
 use std::convert::TryFrom;
 use std::result::Result;
@@ -53,24 +53,28 @@ fn operator_to_py<'py>(
     Ok(py_op)
 }
 
-fn extract_scalar_list(exprs: &[Expr], py: Python) -> PyDataFusionResult<Vec<PyObject>> {
+fn extract_scalar_list<'py>(
+    exprs: &[Expr],
+    py: Python<'py>,
+) -> PyDataFusionResult<Vec<Bound<'py, PyAny>>> {
     let ret = exprs
         .iter()
         .map(|expr| match expr {
             // TODO: should we also leverage `ScalarValue::to_pyarrow` here?
             Expr::Literal(v) => match v {
-                ScalarValue::Boolean(Some(b)) => Ok(b.into_py(py)),
-                ScalarValue::Int8(Some(i)) => Ok(i.into_py(py)),
-                ScalarValue::Int16(Some(i)) => Ok(i.into_py(py)),
-                ScalarValue::Int32(Some(i)) => Ok(i.into_py(py)),
-                ScalarValue::Int64(Some(i)) => Ok(i.into_py(py)),
-                ScalarValue::UInt8(Some(i)) => Ok(i.into_py(py)),
-                ScalarValue::UInt16(Some(i)) => Ok(i.into_py(py)),
-                ScalarValue::UInt32(Some(i)) => Ok(i.into_py(py)),
-                ScalarValue::UInt64(Some(i)) => Ok(i.into_py(py)),
-                ScalarValue::Float32(Some(f)) => Ok(f.into_py(py)),
-                ScalarValue::Float64(Some(f)) => Ok(f.into_py(py)),
-                ScalarValue::Utf8(Some(s)) => Ok(s.into_py(py)),
+                // The unwraps here are for infallible conversions
+                ScalarValue::Boolean(Some(b)) => Ok(b.into_bound_py_any(py)?),
+                ScalarValue::Int8(Some(i)) => Ok(i.into_bound_py_any(py)?),
+                ScalarValue::Int16(Some(i)) => Ok(i.into_bound_py_any(py)?),
+                ScalarValue::Int32(Some(i)) => Ok(i.into_bound_py_any(py)?),
+                ScalarValue::Int64(Some(i)) => Ok(i.into_bound_py_any(py)?),
+                ScalarValue::UInt8(Some(i)) => Ok(i.into_bound_py_any(py)?),
+                ScalarValue::UInt16(Some(i)) => Ok(i.into_bound_py_any(py)?),
+                ScalarValue::UInt32(Some(i)) => Ok(i.into_bound_py_any(py)?),
+                ScalarValue::UInt64(Some(i)) => Ok(i.into_bound_py_any(py)?),
+                ScalarValue::Float32(Some(f)) => Ok(f.into_bound_py_any(py)?),
+                ScalarValue::Float64(Some(f)) => Ok(f.into_bound_py_any(py)?),
+                ScalarValue::Utf8(Some(s)) => Ok(s.into_bound_py_any(py)?),
                 _ => Err(PyDataFusionError::Common(format!(
                     "PyArrow can't handle ScalarValue: {v:?}"
                 ))),
@@ -98,8 +102,8 @@ impl TryFrom<&Expr> for PyArrowFilterExpression {
     // https://arrow.apache.org/docs/python/generated/pyarrow.dataset.Expression.html#pyarrow-dataset-expression
     fn try_from(expr: &Expr) -> Result<Self, Self::Error> {
         Python::with_gil(|py| {
-            let pc = Python::import_bound(py, "pyarrow.compute")?;
-            let op_module = Python::import_bound(py, "operator")?;
+            let pc = Python::import(py, "pyarrow.compute")?;
+            let op_module = Python::import(py, "operator")?;
             let pc_expr: PyDataFusionResult<Bound<'_, PyAny>> = match expr {
                 Expr::Column(Column { name, .. }) => Ok(pc.getattr("field")?.call1((name,))?),
                 Expr::Literal(scalar) => Ok(scalar_to_pyarrow(scalar, py)?.into_bound(py)),
diff --git a/src/pyarrow_util.rs b/src/pyarrow_util.rs
index 2b31467f8..cab708458 100644
--- a/src/pyarrow_util.rs
+++ b/src/pyarrow_util.rs
@@ -33,8 +33,8 @@ impl FromPyArrow for PyScalarValue {
         let val = value.call_method0("as_py")?;
 
         // construct pyarrow array from the python value and pyarrow type
-        let factory = py.import_bound("pyarrow")?.getattr("array")?;
-        let args = PyList::new_bound(py, [val]);
+        let factory = py.import("pyarrow")?.getattr("array")?;
+        let args = PyList::new(py, [val])?;
         let array = factory.call1((args, typ))?;
 
         // convert the pyarrow array to rust array using C data interface
diff --git a/src/sql/logical.rs b/src/sql/logical.rs
index 1be33b75f..96561c434 100644
--- a/src/sql/logical.rs
+++ b/src/sql/logical.rs
@@ -64,7 +64,7 @@ impl PyLogicalPlan {
 #[pymethods]
 impl PyLogicalPlan {
     /// Return the specific logical operator
-    pub fn to_variant(&self, py: Python) -> PyResult<PyObject> {
+    pub fn to_variant<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         match self.plan.as_ref() {
             LogicalPlan::Aggregate(plan) => PyAggregate::from(plan.clone()).to_variant(py),
             LogicalPlan::Analyze(plan) => PyAnalyze::from(plan.clone()).to_variant(py),
@@ -132,7 +132,7 @@ impl PyLogicalPlan {
             datafusion_proto::protobuf::LogicalPlanNode::try_from_logical_plan(&self.plan, &codec)?;
 
         let bytes = proto.encode_to_vec();
-        Ok(PyBytes::new_bound(py, &bytes))
+        Ok(PyBytes::new(py, &bytes))
     }
 
     #[staticmethod]
diff --git a/src/substrait.rs b/src/substrait.rs
index 8dcf3e8a7..1fefc0bbd 100644
--- a/src/substrait.rs
+++ b/src/substrait.rs
@@ -40,7 +40,7 @@ impl PyPlan {
         self.plan
             .encode(&mut proto_bytes)
             .map_err(PyDataFusionError::EncodeError)?;
-        Ok(PyBytes::new_bound(py, &proto_bytes).unbind().into())
+        Ok(PyBytes::new(py, &proto_bytes).into())
     }
 }
 
@@ -95,7 +95,7 @@ impl PySubstraitSerializer {
         py: Python,
     ) -> PyDataFusionResult<PyObject> {
         let proto_bytes: Vec<u8> = wait_for_future(py, serializer::serialize_bytes(sql, &ctx.ctx))?;
-        Ok(PyBytes::new_bound(py, &proto_bytes).unbind().into())
+        Ok(PyBytes::new(py, &proto_bytes).into())
     }
 
     #[staticmethod]
diff --git a/src/udaf.rs b/src/udaf.rs
index 5f21533e0..34a9cd51d 100644
--- a/src/udaf.rs
+++ b/src/udaf.rs
@@ -29,6 +29,7 @@ use datafusion::logical_expr::{
 };
 
 use crate::common::data_type::PyScalarValue;
+use crate::errors::to_datafusion_err;
 use crate::expr::PyExpr;
 use crate::utils::parse_volatility;
 
@@ -73,7 +74,7 @@ impl Accumulator for RustAccumulator {
                 .iter()
                 .map(|arg| arg.into_data().to_pyarrow(py).unwrap())
                 .collect::<Vec<_>>();
-            let py_args = PyTuple::new_bound(py, py_args);
+            let py_args = PyTuple::new(py, py_args).map_err(to_datafusion_err)?;
 
             // 2. call function
             self.accum
@@ -119,7 +120,7 @@ impl Accumulator for RustAccumulator {
                 .iter()
                 .map(|arg| arg.into_data().to_pyarrow(py).unwrap())
                 .collect::<Vec<_>>();
-            let py_args = PyTuple::new_bound(py, py_args);
+            let py_args = PyTuple::new(py, py_args).map_err(to_datafusion_err)?;
 
             // 2. call function
             self.accum
diff --git a/src/udf.rs b/src/udf.rs
index 4570e77a6..574c9d7b5 100644
--- a/src/udf.rs
+++ b/src/udf.rs
@@ -28,6 +28,7 @@ use datafusion::logical_expr::function::ScalarFunctionImplementation;
 use datafusion::logical_expr::ScalarUDF;
 use datafusion::logical_expr::{create_udf, ColumnarValue};
 
+use crate::errors::to_datafusion_err;
 use crate::expr::PyExpr;
 use crate::utils::parse_volatility;
 
@@ -46,11 +47,11 @@ fn pyarrow_function_to_rust(
                         .map_err(|e| DataFusionError::Execution(format!("{e:?}")))
                 })
                 .collect::<Result<Vec<_>, _>>()?;
-            let py_args = PyTuple::new_bound(py, py_args);
+            let py_args = PyTuple::new(py, py_args).map_err(to_datafusion_err)?;
 
             // 2. call function
             let value = func
-                .call_bound(py, py_args, None)
+                .call(py, py_args, None)
                 .map_err(|e| DataFusionError::Execution(format!("{e:?}")))?;
 
             // 3. cast to arrow::array::Array
diff --git a/src/udwf.rs b/src/udwf.rs
index 04a4a1640..defd9c522 100644
--- a/src/udwf.rs
+++ b/src/udwf.rs
@@ -27,6 +27,7 @@ use pyo3::exceptions::PyValueError;
 use pyo3::prelude::*;
 
 use crate::common::data_type::PyScalarValue;
+use crate::errors::to_datafusion_err;
 use crate::expr::PyExpr;
 use crate::utils::parse_volatility;
 use datafusion::arrow::datatypes::DataType;
@@ -56,8 +57,8 @@ impl PartitionEvaluator for RustPartitionEvaluator {
 
     fn get_range(&self, idx: usize, n_rows: usize) -> Result<Range<usize>> {
         Python::with_gil(|py| {
-            let py_args = vec![idx.to_object(py), n_rows.to_object(py)];
-            let py_args = PyTuple::new_bound(py, py_args);
+            let py_args = vec![idx.into_pyobject(py)?, n_rows.into_pyobject(py)?];
+            let py_args = PyTuple::new(py, py_args)?;
 
             self.evaluator
                 .bind(py)
@@ -93,17 +94,14 @@ impl PartitionEvaluator for RustPartitionEvaluator {
     fn evaluate_all(&mut self, values: &[ArrayRef], num_rows: usize) -> Result<ArrayRef> {
         println!("evaluate all called with number of values {}", values.len());
         Python::with_gil(|py| {
-            let py_values = PyList::new_bound(
+            let py_values = PyList::new(
                 py,
                 values
                     .iter()
                     .map(|arg| arg.into_data().to_pyarrow(py).unwrap()),
-            );
-            let py_num_rows = num_rows.to_object(py).into_bound(py);
-            let py_args = PyTuple::new_bound(
-                py,
-                PyTuple::new_bound(py, vec![py_values.as_any(), &py_num_rows]),
-            );
+            )?;
+            let py_num_rows = num_rows.into_pyobject(py)?;
+            let py_args = PyTuple::new(py, vec![py_values.as_any(), &py_num_rows])?;
 
             self.evaluator
                 .bind(py)
@@ -112,32 +110,28 @@ impl PartitionEvaluator for RustPartitionEvaluator {
                     let array_data = ArrayData::from_pyarrow_bound(&v).unwrap();
                     make_array(array_data)
                 })
-                .map_err(|e| DataFusionError::Execution(format!("{e}")))
         })
+        .map_err(to_datafusion_err)
     }
 
     fn evaluate(&mut self, values: &[ArrayRef], range: &Range<usize>) -> Result<ScalarValue> {
         Python::with_gil(|py| {
-            let py_values = PyList::new_bound(
+            let py_values = PyList::new(
                 py,
                 values
                     .iter()
                     .map(|arg| arg.into_data().to_pyarrow(py).unwrap()),
-            );
-            let range_tuple =
-                PyTuple::new_bound(py, vec![range.start.to_object(py), range.end.to_object(py)]);
-            let py_args = PyTuple::new_bound(
-                py,
-                PyTuple::new_bound(py, vec![py_values.as_any(), range_tuple.as_any()]),
-            );
+            )?;
+            let range_tuple = PyTuple::new(py, vec![range.start, range.end])?;
+            let py_args = PyTuple::new(py, vec![py_values.as_any(), range_tuple.as_any()])?;
 
             self.evaluator
                 .bind(py)
                 .call_method1("evaluate", py_args)
                 .and_then(|v| v.extract::<PyScalarValue>())
                 .map(|v| v.0)
-                .map_err(|e| DataFusionError::Execution(format!("{e}")))
         })
+        .map_err(to_datafusion_err)
     }
 
     fn evaluate_all_with_rank(
@@ -148,23 +142,27 @@ impl PartitionEvaluator for RustPartitionEvaluator {
         Python::with_gil(|py| {
             let ranks = ranks_in_partition
                 .iter()
-                .map(|r| PyTuple::new_bound(py, vec![r.start, r.end]));
+                .map(|r| PyTuple::new(py, vec![r.start, r.end]))
+                .collect::<PyResult<Vec<_>>>()?;
 
             // 1. cast args to Pyarrow array
-            let py_args = vec![num_rows.to_object(py), PyList::new_bound(py, ranks).into()];
+            let py_args = vec![
+                num_rows.into_pyobject(py)?.into_any(),
+                PyList::new(py, ranks)?.into_any(),
+            ];
 
-            let py_args = PyTuple::new_bound(py, py_args);
+            let py_args = PyTuple::new(py, py_args)?;
 
             // 2. call function
             self.evaluator
                 .bind(py)
                 .call_method1("evaluate_all_with_rank", py_args)
-                .map_err(|e| DataFusionError::Execution(format!("{e}")))
                 .map(|v| {
                     let array_data = ArrayData::from_pyarrow_bound(&v).unwrap();
                     make_array(array_data)
                 })
         })
+        .map_err(to_datafusion_err)
     }
 
     fn supports_bounded_execution(&self) -> bool {

From e6f6e66c1d180246ad933f8bcc0d40faa8426dfa Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Fri, 21 Feb 2025 16:03:36 -0500
Subject: [PATCH 03/22] Add user documentation for the FFI approach (#1031)

* Initial commit for FFI user documentation

* Update readme to point to the online documentation. Fix a small typo.

* Small text adjustments for clarity and formatting
---
 README.md                             |  11 +-
 docs/source/contributor-guide/ffi.rst | 212 ++++++++++++++++++++++++++
 docs/source/index.rst                 |   1 +
 3 files changed, 220 insertions(+), 4 deletions(-)
 create mode 100644 docs/source/contributor-guide/ffi.rst

diff --git a/README.md b/README.md
index 5aaf7f5f3..9c56b62dd 100644
--- a/README.md
+++ b/README.md
@@ -30,10 +30,8 @@ DataFusion's Python bindings can be used as a foundation for building new data s
   planning, and logical plan optimizations, and then transpiles the logical plan to Dask operations for execution.
 - [DataFusion Ballista](https://github.com/apache/datafusion-ballista) is a distributed SQL query engine that extends
   DataFusion's Python bindings for distributed use cases.
-
-It is also possible to use these Python bindings directly for DataFrame and SQL operations, but you may find that
-[Polars](http://pola.rs/) and [DuckDB](http://www.duckdb.org/) are more suitable for this use case, since they have
-more of an end-user focus and are more actively maintained than these Python bindings.
+- [DataFusion Ray](https://github.com/apache/datafusion-ray) is another distributed query engine that uses
+  DataFusion's Python bindings.
 
 ## Features
 
@@ -114,6 +112,11 @@ Printing the context will show the current configuration settings.
 print(ctx)
 ```
 
+## Extensions
+
+For information about how to extend DataFusion Python, please see the extensions page of the
+[online documentation](https://datafusion.apache.org/python/).
+
 ## More Examples
 
 See [examples](examples/README.md) for more information.
diff --git a/docs/source/contributor-guide/ffi.rst b/docs/source/contributor-guide/ffi.rst
new file mode 100644
index 000000000..c1f9806b3
--- /dev/null
+++ b/docs/source/contributor-guide/ffi.rst
@@ -0,0 +1,212 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Python Extensions
+=================
+
+The DataFusion in Python project is designed to allow users to extend its functionality in a few core
+areas. Ideally many users would like to package their extensions as a Python package and easily
+integrate that package with this project. This page serves to describe some of the challenges we face
+when doing these integrations and the approach our project uses.
+
+The Primary Issue
+-----------------
+
+Suppose you wish to use DataFusion and you have a custom data source that can produce tables that
+can then be queried against, similar to how you can register a :ref:`CSV <io_csv>` or
+:ref:`Parquet <io_parquet>` file. In DataFusion terminology, you likely want to implement a 
+:ref:`Custom Table Provider <io_custom_table_provider>`. In an effort to make your data source
+as performant as possible and to utilize the features of DataFusion, you may decide to write
+your source in Rust and then expose it through `PyO3 <https://pyo3.rs>`_ as a Python library.
+
+At first glance, it may appear the best way to do this is to add the ``datafusion-python``
+crate as a dependency, provide a ``PyTable``, and then to register it with the 
+``SessionContext``. Unfortunately, this will not work.
+
+When you produce your code as a Python library and it needs to interact with the DataFusion
+library, at the lowest level they communicate through an Application Binary Interface (ABI).
+The acronym sounds similar to API (Application Programming Interface), but it is distinctly
+different.
+
+The ABI sets the standard for how these libraries can share data and functions between each
+other. One of the key differences between Rust and other programming languages is that Rust
+does not have a stable ABI. What this means in practice is that if you compile a Rust library
+with one version of the ``rustc`` compiler and I compile another library to interface with it
+but I use a different version of the compiler, there is no guarantee the interface will be
+the same.
+
+In practice, this means that a Python library built with ``datafusion-python`` as a Rust
+dependency will generally **not** be compatible with the DataFusion Python package, even
+if they reference the same version of ``datafusion-python``. If you attempt to do this, it may
+work on your local computer if you have built both packages with the same optimizations.
+This can sometimes lead to a false expectation that the code will work, but it frequently
+breaks the moment you try to use your package against the released packages.
+
+You can find more information about the Rust ABI in their
+`online documentation <https://doc.rust-lang.org/reference/abi.html>`_.
+
+The FFI Approach
+----------------
+
+Rust supports interacting with other programming languages through it's Foreign Function
+Interface (FFI). The advantage of using the FFI is that it enables you to write data structures
+and functions that have a stable ABI. The allows you to use Rust code with C, Python, and
+other languages. In fact, the `PyO3 <https://pyo3.rs>`_ library uses the FFI to share data
+and functions between Python and Rust.
+
+The approach we are taking in the DataFusion in Python project is to incrementally expose
+more portions of the DataFusion project via FFI interfaces. This allows users to write Rust
+code that does **not** require the ``datafusion-python`` crate as a dependency, expose their
+code in Python via PyO3, and have it interact with the DataFusion Python package.
+
+Early adopters of this approach include `delta-rs <https://delta-io.github.io/delta-rs/>`_
+who has adapted their Table Provider for use in ```datafusion-python``` with only a few lines
+of code. Also, the DataFusion Python project uses the existing definitions from
+`Apache Arrow CStream Interface <https://arrow.apache.org/docs/format/CStreamInterface.html>`_
+to support importing **and** exporting tables. Any Python package that supports reading
+the Arrow C Stream interface can work with DataFusion Python out of the box! You can read
+more about working with Arrow sources in the :ref:`Data Sources <user_guide_data_sources>`
+page.
+
+To learn more about the Foreign Function Interface in Rust, the
+`Rustonomicon <https://doc.rust-lang.org/nomicon/ffi.html>`_ is a good resource.
+
+Inspiration from Arrow
+----------------------
+
+DataFusion is built upon `Apache Arrow <https://arrow.apache.org/>`_. The canonical Python
+Arrow implementation, `pyarrow <https://arrow.apache.org/docs/python/index.html>`_ provides
+an excellent way to share Arrow data between Python projects without performing any copy
+operations on the data. They do this by using a well defined set of interfaces. You can
+find the details about their stream interface
+`here <https://arrow.apache.org/docs/format/CStreamInterface.html>`_. The
+`Rust Arrow Implementation <https://github.com/apache/arrow-rs>`_ also supports these
+``C`` style definitions via the Foreign Function Interface.
+
+In addition to using these interfaces to transfer Arrow data between libraries, ``pyarrow``
+goes one step further to make sharing the interfaces easier in Python. They do this
+by exposing PyCapsules that contain the expected functionality.
+
+You can learn more about PyCapsules from the official
+`Python online documentation <https://docs.python.org/3/c-api/capsule.html>`_. PyCapsules
+have excellent support in PyO3 already. The
+`PyO3 online documentation <https://pyo3.rs/main/doc/pyo3/types/struct.pycapsule>`_ is a good source
+for more details on using PyCapsules in Rust.
+
+Two lessons we leverage from the Arrow project in DataFusion Python are:
+
+- We reuse the existing Arrow FFI functionality wherever possible.
+- We expose PyCapsules that contain a FFI stable struct.
+
+Implementation Details
+----------------------
+
+The bulk of the code necessary to perform our FFI operations is in the upstream 
+`DataFusion <https://datafusion.apache.org/>`_ core repository. You can review the code and
+documentation in the `datafusion-ffi`_ crate.
+
+Our FFI implementation is narrowly focused at sharing data and functions with Rust backed
+libraries. This allows us to use the `abi_stable crate <https://crates.io/crates/abi_stable>`_.
+This is an excellent crate that allows for easy conversion between Rust native types
+and FFI-safe alternatives. For example, if you needed to pass a ``Vec<String>`` via FFI,
+you can simply convert it to a ``RVec<RString>`` in an intuitive manner. It also supports
+features like ``RResult`` and ``ROption`` that do not have an obvious translation to a
+C equivalent.
+
+The `datafusion-ffi`_ crate has been designed to make it easy to convert from DataFusion
+traits into their FFI counterparts. For example, if you have defined a custom
+`TableProvider <https://docs.rs/datafusion/45.0.0/datafusion/catalog/trait.TableProvider.html>`_
+and you want to create a sharable FFI counterpart, you could write:
+
+.. code-block:: rust
+
+    let my_provider = MyTableProvider::default();
+    let ffi_provider = FFI_TableProvider::new(Arc::new(my_provider), false, None);
+
+If you were interfacing with a library that provided the above ``FFI_TableProvider`` and
+you needed to turn it back into an ``TableProvider``, you can turn it into a
+``ForeignTableProvider`` with implements the ``TableProvider`` trait.
+
+.. code-block:: rust
+
+    let foreign_provider: ForeignTableProvider = ffi_provider.into();
+
+If you review the code in `datafusion-ffi`_ you will find that each of the traits we share
+across the boundary has two portions, one with a ``FFI_`` prefix and one with a ``Foreign``
+prefix. This is used to distinguish which side of the FFI boundary that struct is
+designed to be used on. The structures with the ``FFI_`` prefix are to be used on the
+**provider** of the structure. In the example we're showing, this means the code that has
+written the underlying ``TableProvider`` implementation to access your custom data source.
+The structures with the ``Foreign`` prefix are to be used by the receiver. In this case,
+it is the ``datafusion-python`` library.
+
+In order to share these FFI structures, we need to wrap them in some kind of Python object
+that can be used to interface from one package to another. As described in the above
+section on our inspiration from Arrow, we use ``PyCapsule``. We can create a ``PyCapsule``
+for our provider thusly:
+
+.. code-block:: rust
+
+    let name = CString::new("datafusion_table_provider")?;
+    let my_capsule = PyCapsule::new_bound(py, provider, Some(name))?;
+
+On the receiving side, turn this pycapsule object into the ``FFI_TableProvider``, which
+can then be turned into a ``ForeignTableProvider`` the associated code is:
+
+.. code-block:: rust
+
+    let capsule = capsule.downcast::<PyCapsule>()?;
+    let provider = unsafe { capsule.reference::<FFI_TableProvider>() };
+
+By convention the ``datafusion-python`` library expects a Python object that has a
+``TableProvider`` PyCapsule to have this capsule accessible by calling a function named
+``__datafusion_table_provider__``. You can see a complete working example of how to
+share a ``TableProvider`` from one python library to DataFusion Python in the
+`repository examples folder <https://github.com/apache/datafusion-python/tree/main/examples/ffi-table-provider>`_.
+
+This section has been written using ``TableProvider`` as an example. It is the first
+extension that has been written using this approach and the most thoroughly implemented.
+As we continue to expose more of the DataFusion features, we intend to follow this same
+design pattern.
+
+Alternative Approach
+--------------------
+
+Suppose you needed to expose some other features of DataFusion and you could not wait
+for the upstream repository to implement the FFI approach we describe. In this case
+you decide to create your dependency on the ``datafusion-python`` crate instead.
+
+As we discussed, this is not guaranteed to work across different compiler versions and
+optimization levels. If you wish to go down this route, there are two approaches we
+have identified you can use.
+
+#. Re-export all of ``datafusion-python`` yourself with your extensions built in.
+#. Carefully synchonize your software releases with the ``datafusion-python`` CI build
+   system so that your libraries use the exact same compiler, features, and
+   optimization level.
+
+We currently do not recommend either of these approaches as they are difficult to
+maintain over a long period. Additionally, they require a tight version coupling
+between libraries.
+
+Status of Work
+--------------
+
+At the time of this writing, the FFI features are under active development. To see
+the latest status, we recommend reviewing the code in the `datafusion-ffi`_ crate.
+
+.. _datafusion-ffi: https://crates.io/crates/datafusion-ffi
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 34eb23b28..558b2d572 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -85,6 +85,7 @@ Example
    :caption: CONTRIBUTOR GUIDE
 
    contributor-guide/introduction
+   contributor-guide/ffi
 
 .. _toc.api:
 .. toctree::

From 3f3983cc86ffe267cff97480241e8a588ac38fa3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sun, 23 Feb 2025 08:00:52 -0500
Subject: [PATCH 04/22] build(deps): bump arrow from 54.1.0 to 54.2.0 (#1035)

Bumps [arrow](https://github.com/apache/arrow-rs) from 54.1.0 to 54.2.0.
- [Release notes](https://github.com/apache/arrow-rs/releases)
- [Changelog](https://github.com/apache/arrow-rs/blob/main/CHANGELOG-old.md)
- [Commits](https://github.com/apache/arrow-rs/compare/54.1.0...54.2.0)

---
updated-dependencies:
- dependency-name: arrow
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock | 56 +++++++++++++++++++++++++++---------------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index f1b1ed50a..d23ed6169 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -179,9 +179,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "arrow"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6422e12ac345a0678d7a17e316238e3a40547ae7f92052b77bd86d5e0239f3fc"
+checksum = "755b6da235ac356a869393c23668c663720b8749dd6f15e52b6c214b4b964cc7"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -201,9 +201,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-arith"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23cf34bb1f48c41d3475927bcc7be498665b8e80b379b88f62a840337f8b8248"
+checksum = "64656a1e0b13ca766f8440752e9a93e11014eec7b67909986f83ed0ab1fe37b8"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -215,9 +215,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-array"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fb4a06d507f54b70a277be22a127c8ffe0cec6cd98c0ad8a48e77779bbda8223"
+checksum = "57a4a6d2896083cfbdf84a71a863b22460d0708f8206a8373c52e326cc72ea1a"
 dependencies = [
  "ahash",
  "arrow-buffer",
@@ -232,9 +232,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-buffer"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d69d326d5ad1cb82dcefa9ede3fee8fdca98f9982756b16f9cb142f4aa6edc89"
+checksum = "cef870583ce5e4f3b123c181706f2002fb134960f9a911900f64ba4830c7a43a"
 dependencies = [
  "bytes",
  "half",
@@ -243,9 +243,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-cast"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "626e65bd42636a84a238bed49d09c8777e3d825bf81f5087a70111c2831d9870"
+checksum = "1ac7eba5a987f8b4a7d9629206ba48e19a1991762795bbe5d08497b7736017ee"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -264,9 +264,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-csv"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "71c8f959f7a1389b1dbd883cdcd37c3ed12475329c111912f7f69dad8195d8c6"
+checksum = "90f12542b8164398fc9ec595ff783c4cf6044daa89622c5a7201be920e4c0d4c"
 dependencies = [
  "arrow-array",
  "arrow-cast",
@@ -280,9 +280,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-data"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1858e7c7d01c44cf71c21a85534fd1a54501e8d60d1195d0d6fbcc00f4b10754"
+checksum = "b095e8a4f3c309544935d53e04c3bfe4eea4e71c3de6fe0416d1f08bb4441a83"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
@@ -292,9 +292,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ipc"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6bb3f727f049884c7603f0364bc9315363f356b59e9f605ea76541847e06a1e"
+checksum = "65c63da4afedde2b25ef69825cd4663ca76f78f79ffe2d057695742099130ff6"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -306,9 +306,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-json"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35de94f165ed8830aede72c35f238763794f0d49c69d30c44d49c9834267ff8c"
+checksum = "9551d9400532f23a370cabbea1dc5a53c49230397d41f96c4c8eedf306199305"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -326,9 +326,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ord"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8aa06e5f267dc53efbacb933485c79b6fc1685d3ffbe870a16ce4e696fb429da"
+checksum = "6c07223476f8219d1ace8cd8d85fa18c4ebd8d945013f25ef5c72e85085ca4ee"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -339,9 +339,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-row"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "66f1144bb456a2f9d82677bd3abcea019217e572fc8f07de5a7bac4b2c56eb2c"
+checksum = "91b194b38bfd89feabc23e798238989c6648b2506ad639be42ec8eb1658d82c4"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -352,18 +352,18 @@ dependencies = [
 
 [[package]]
 name = "arrow-schema"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "105f01ec0090259e9a33a9263ec18ff223ab91a0ea9fbc18042f7e38005142f6"
+checksum = "0f40f6be8f78af1ab610db7d9b236e21d587b7168e368a36275d2e5670096735"
 dependencies = [
  "bitflags 2.8.0",
 ]
 
 [[package]]
 name = "arrow-select"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f690752fdbd2dee278b5f1636fefad8f2f7134c85e20fd59c4199e15a39a6807"
+checksum = "ac265273864a820c4a179fc67182ccc41ea9151b97024e1be956f0f2369c2539"
 dependencies = [
  "ahash",
  "arrow-array",
@@ -375,9 +375,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-string"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0fff9cd745a7039b66c47ecaf5954460f9fa12eed628f65170117ea93e64ee0"
+checksum = "d44c8eed43be4ead49128370f7131f054839d3d6003e52aebf64322470b8fbd0"
 dependencies = [
  "arrow-array",
  "arrow-buffer",

From 69ebf70bd821d0ae516d2f61d96058e2252a7a1f Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Mon, 24 Feb 2025 21:30:52 +0100
Subject: [PATCH 05/22] Chore: Release datafusion-python 45 (#1024)

* Bump version number to prepare for release

* Add changelog 45.0.0

* Add deprecated marker from either typing or typing_extensions based on the python version

* Limit pyarrow version per issue # 1023

* Bumping the version number to support new release candidate

* There was no guarantee that the record batches would be returned in a single partition, so update the unit test to check all partitions.

* Revert "Limit pyarrow version per issue # 1023"

This reverts commit b48d5872661017ec21ea71f7dbb9569f2f0bf797.

* Correct import for python 3.13 and above

* Bump minor version due to pypi requirement

* Update cargo lock
---
 Cargo.lock                     | 113 +++++++++++++--------------------
 Cargo.toml                     |   2 +-
 dev/changelog/45.0.0.md        |  42 ++++++++++++
 python/datafusion/context.py   |   5 +-
 python/datafusion/dataframe.py |   5 +-
 python/datafusion/expr.py      |   6 +-
 python/datafusion/substrait.py |   5 +-
 python/tests/test_dataframe.py |  21 ++++--
 8 files changed, 118 insertions(+), 81 deletions(-)
 create mode 100644 dev/changelog/45.0.0.md

diff --git a/Cargo.lock b/Cargo.lock
index d23ed6169..5c7f2bf3c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -606,19 +606,18 @@ dependencies = [
 
 [[package]]
 name = "bzip2"
-version = "0.5.0"
+version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bafdbf26611df8c14810e268ddceda071c297570a5fb360ceddf617fe417ef58"
+checksum = "75b89e7c29231c673a61a46e722602bcd138298f6b9e81e71119693534585f5c"
 dependencies = [
  "bzip2-sys",
- "libc",
 ]
 
 [[package]]
 name = "bzip2-sys"
-version = "0.1.11+1.0.8"
+version = "0.1.12+1.0.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
+checksum = "72ebc2f1a417f01e1da30ef264ee86ae31d2dcd2d603ea283d3c244a883ca2a9"
 dependencies = [
  "cc",
  "libc",
@@ -627,9 +626,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.2.12"
+version = "1.2.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "755717a7de9ec452bf7f3f1a3099085deabd7f2962b861dae91ecd7a365903d2"
+checksum = "0c3d1b2e905a3a7b00a6141adb0e4c0bb941d11caf55349d863942a1cc44e3c9"
 dependencies = [
  "jobserver",
  "libc",
@@ -684,21 +683,20 @@ dependencies = [
 
 [[package]]
 name = "cmake"
-version = "0.1.53"
+version = "0.1.54"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e24a03c8b52922d68a1589ad61032f2c1aa5a8158d2aa0d93c6e9534944bbad6"
+checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0"
 dependencies = [
  "cc",
 ]
 
 [[package]]
 name = "comfy-table"
-version = "7.1.3"
+version = "7.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24f165e7b643266ea80cb858aed492ad9280e3e05ce24d4a99d7d7b889b6a4d9"
+checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a"
 dependencies = [
- "strum",
- "strum_macros",
+ "unicode-segmentation",
  "unicode-width",
 ]
 
@@ -837,9 +835,9 @@ dependencies = [
 
 [[package]]
 name = "csv-core"
-version = "0.1.11"
+version = "0.1.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70"
+checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d"
 dependencies = [
  "memchr",
 ]
@@ -878,7 +876,7 @@ dependencies = [
  "async-compression",
  "async-trait",
  "bytes",
- "bzip2 0.5.0",
+ "bzip2 0.5.1",
  "chrono",
  "datafusion-catalog",
  "datafusion-common",
@@ -1240,7 +1238,7 @@ dependencies = [
  "itertools 0.14.0",
  "log",
  "paste",
- "petgraph 0.7.1",
+ "petgraph",
 ]
 
 [[package]]
@@ -1341,7 +1339,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-python"
-version = "44.0.0"
+version = "45.2.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1436,9 +1434,9 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
 
 [[package]]
 name = "equivalent"
-version = "1.0.1"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
 
 [[package]]
 name = "errno"
@@ -1456,12 +1454,6 @@ version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
 
-[[package]]
-name = "fixedbitset"
-version = "0.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
-
 [[package]]
 name = "fixedbitset"
 version = "0.5.7"
@@ -2269,9 +2261,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
 
 [[package]]
 name = "miniz_oxide"
-version = "0.8.3"
+version = "0.8.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924"
+checksum = "b3b1c9bd4fe1f0f8b387f6eb9eb3b4a1aa26185e5750efb9140301703f62cd1b"
 dependencies = [
  "adler2",
 ]
@@ -2548,23 +2540,13 @@ version = "2.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
 
-[[package]]
-name = "petgraph"
-version = "0.6.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
-dependencies = [
- "fixedbitset 0.4.2",
- "indexmap",
-]
-
 [[package]]
 name = "petgraph"
 version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
 dependencies = [
- "fixedbitset 0.5.7",
+ "fixedbitset",
  "indexmap",
 ]
 
@@ -2660,9 +2642,9 @@ dependencies = [
 
 [[package]]
 name = "prost"
-version = "0.13.4"
+version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c0fef6c4230e4ccf618a35c59d7ede15dea37de8427500f50aff708806e42ec"
+checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
 dependencies = [
  "bytes",
  "prost-derive",
@@ -2670,16 +2652,16 @@ dependencies = [
 
 [[package]]
 name = "prost-build"
-version = "0.13.4"
+version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0f3e5beed80eb580c68e2c600937ac2c4eedabdfd5ef1e5b7ea4f3fba84497b"
+checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
 dependencies = [
  "heck",
- "itertools 0.13.0",
+ "itertools 0.14.0",
  "log",
  "multimap",
  "once_cell",
- "petgraph 0.6.5",
+ "petgraph",
  "prettyplease",
  "prost",
  "prost-types",
@@ -2690,12 +2672,12 @@ dependencies = [
 
 [[package]]
 name = "prost-derive"
-version = "0.13.4"
+version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "157c5a9d7ea5c2ed2d9fb8f495b64759f7816c7eaea54ba3978f0d63000162e3"
+checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
 dependencies = [
  "anyhow",
- "itertools 0.13.0",
+ "itertools 0.14.0",
  "proc-macro2",
  "quote",
  "syn 2.0.98",
@@ -2703,9 +2685,9 @@ dependencies = [
 
 [[package]]
 name = "prost-types"
-version = "0.13.4"
+version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc2f1e56baa61e93533aebc21af4d2134b70f66275e0fcdf3cbe43d77ff7e8fc"
+checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16"
 dependencies = [
  "prost",
 ]
@@ -2721,9 +2703,9 @@ dependencies = [
 
 [[package]]
 name = "psm"
-version = "0.1.24"
+version = "0.1.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "200b9ff220857e53e184257720a14553b2f4aa02577d2ed9842d45d4b9654810"
+checksum = "f58e5423e24c18cc840e1c98370b3993c6649cd1678b4d24318bcf0a083cbe88"
 dependencies = [
  "cc",
 ]
@@ -2860,9 +2842,9 @@ dependencies = [
 
 [[package]]
 name = "quinn-udp"
-version = "0.5.9"
+version = "0.5.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1c40286217b4ba3a71d644d752e6a0b71f13f1b6a2c5311acfcbe0c2418ed904"
+checksum = "e46f3055866785f6b92bc6164b76be02ca8f2eb4b002c0354b28cf4c119e5944"
 dependencies = [
  "cfg_aliases",
  "libc",
@@ -3042,15 +3024,14 @@ dependencies = [
 
 [[package]]
 name = "ring"
-version = "0.17.8"
+version = "0.17.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d"
+checksum = "e75ec5e92c4d8aede845126adc388046234541629e76029599ed35a003c7ed24"
 dependencies = [
  "cc",
  "cfg-if",
  "getrandom 0.2.15",
  "libc",
- "spin",
  "untrusted",
  "windows-sys 0.52.0",
 ]
@@ -3097,9 +3078,9 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.23.22"
+version = "0.23.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9fb9263ab4eb695e42321db096e3b8fbd715a59b154d5c88d82db2175b681ba7"
+checksum = "47796c98c480fce5406ef69d1c76378375492c3b0a0de587be0c1d9feb12f395"
 dependencies = [
  "once_cell",
  "ring",
@@ -3377,9 +3358,9 @@ dependencies = [
 
 [[package]]
 name = "smallvec"
-version = "1.13.2"
+version = "1.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
+checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd"
 
 [[package]]
 name = "snafu"
@@ -3418,12 +3399,6 @@ dependencies = [
  "windows-sys 0.52.0",
 ]
 
-[[package]]
-name = "spin"
-version = "0.9.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
-
 [[package]]
 name = "sqlparser"
 version = "0.53.0"
@@ -3453,9 +3428,9 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
 
 [[package]]
 name = "stacker"
-version = "0.1.17"
+version = "0.1.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b"
+checksum = "1d08feb8f695b465baed819b03c128dc23f57a694510ab1f06c77f763975685e"
 dependencies = [
  "cc",
  "cfg-if",
diff --git a/Cargo.toml b/Cargo.toml
index d18e0e8f0..5358b1836 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "datafusion-python"
-version = "44.0.0"
+version = "45.2.0"
 homepage = "https://datafusion.apache.org/python"
 repository = "https://github.com/apache/datafusion-python"
 authors = ["Apache DataFusion <dev@datafusion.apache.org>"]
diff --git a/dev/changelog/45.0.0.md b/dev/changelog/45.0.0.md
new file mode 100644
index 000000000..93659b171
--- /dev/null
+++ b/dev/changelog/45.0.0.md
@@ -0,0 +1,42 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Apache DataFusion Python 45.0.0 Changelog
+
+This release consists of 2 commits from 2 contributors. See credits at the end of this changelog for more information.
+
+**Fixed bugs:**
+
+- fix: add to_timestamp_nanos [#1020](https://github.com/apache/datafusion-python/pull/1020) (chenkovsky)
+
+**Other:**
+
+- Chore/upgrade datafusion 45 [#1010](https://github.com/apache/datafusion-python/pull/1010) (kevinjqliu)
+
+## Credits
+
+Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor.
+
+```
+     1	Kevin Liu
+     1	Tim Saucer
+```
+
+Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release.
+
diff --git a/python/datafusion/context.py b/python/datafusion/context.py
index 864ef1c8b..21955b6d1 100644
--- a/python/datafusion/context.py
+++ b/python/datafusion/context.py
@@ -21,7 +21,10 @@
 
 from typing import TYPE_CHECKING, Any, Protocol
 
-from typing_extensions import deprecated
+try:
+    from warnings import deprecated  # Python 3.13+
+except ImportError:
+    from typing_extensions import deprecated  # Python 3.12
 
 from datafusion.catalog import Catalog, Table
 from datafusion.dataframe import DataFrame
diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
index 7413a5fa3..23b5d630b 100644
--- a/python/datafusion/dataframe.py
+++ b/python/datafusion/dataframe.py
@@ -33,7 +33,10 @@
     overload,
 )
 
-from typing_extensions import deprecated
+try:
+    from warnings import deprecated  # Python 3.13+
+except ImportError:
+    from typing_extensions import deprecated  # Python 3.12
 
 from datafusion.plan import ExecutionPlan, LogicalPlan
 from datafusion.record_batch import RecordBatchStream
diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py
index 68ddd7c9a..e3d7158eb 100644
--- a/python/datafusion/expr.py
+++ b/python/datafusion/expr.py
@@ -25,7 +25,11 @@
 from typing import TYPE_CHECKING, Any, Optional, Type
 
 import pyarrow as pa
-from typing_extensions import deprecated
+
+try:
+    from warnings import deprecated  # Python 3.13+
+except ImportError:
+    from typing_extensions import deprecated  # Python 3.12
 
 from datafusion.common import DataTypeMap, NullTreatment, RexType
 
diff --git a/python/datafusion/substrait.py b/python/datafusion/substrait.py
index 402184d3f..06302fe38 100644
--- a/python/datafusion/substrait.py
+++ b/python/datafusion/substrait.py
@@ -26,7 +26,10 @@
 import pathlib
 from typing import TYPE_CHECKING
 
-from typing_extensions import deprecated
+try:
+    from warnings import deprecated  # Python 3.13+
+except ImportError:
+    from typing_extensions import deprecated  # Python 3.12
 
 from datafusion.plan import LogicalPlan
 
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index 5bc3fb094..c636e896a 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -755,13 +755,20 @@ def test_execution_plan(aggregate_df):
     assert "CsvExec:" in indent
 
     ctx = SessionContext()
-    stream = ctx.execute(plan, 0)
-    # get the one and only batch
-    batch = stream.next()
-    assert batch is not None
-    # there should be no more batches
-    with pytest.raises(StopIteration):
-        stream.next()
+    rows_returned = 0
+    for idx in range(0, plan.partition_count):
+        stream = ctx.execute(plan, idx)
+        try:
+            batch = stream.next()
+            assert batch is not None
+            rows_returned += len(batch.to_pyarrow()[0])
+        except StopIteration:
+            # This is one of the partitions with no values
+            pass
+        with pytest.raises(StopIteration):
+            stream.next()
+
+    assert rows_returned == 5
 
 
 def test_repartition(df):

From a80a788f69cf46ef002b3c537837548cc103748c Mon Sep 17 00:00:00 2001
From: kosiew <kosiew@gmail.com>
Date: Sat, 8 Mar 2025 21:22:36 +0800
Subject: [PATCH 06/22] Enable Dataframe to be converted into views which can
 be used in register_table (#1016)

* add test_view

* feat: add into_view method to register DataFrame as a view

* add pytableprovider

* feat: add as_table method to PyTableProvider and update into_view to return PyTable

* refactor: simplify as_table method and update documentation for into_view

* test: improve test_register_filtered_dataframe by removing redundant comments and assertions

* test: enhance test_register_filtered_dataframe with additional assertions for DataFrame results

* ruff formatted

* cleanup: remove unused imports from test_view.py

* docs: add example for registering a DataFrame as a view in README.md

* docs: update docstring for into_view method to clarify usage as ViewTable

* chore: add license header to test_view.py

* ruff correction

* refactor: rename into_view method to _into_view

* ruff lint

* refactor: simplify into_view method and update Rust binding convention

* docs: add views section to user guide with example on registering views

* feat: add register_view method to SessionContext for DataFrame registration

* docs: update README and user guide to reflect register_view method for DataFrame registration

* docs: remove some documentation from PyDataFrame
---
 README.md                                     | 40 +++++++++++++
 .../user-guide/common-operations/index.rst    |  1 +
 .../user-guide/common-operations/views.rst    | 58 +++++++++++++++++++
 python/datafusion/context.py                  | 12 ++++
 python/datafusion/dataframe.py                |  4 ++
 python/tests/test_view.py                     | 49 ++++++++++++++++
 src/dataframe.rs                              | 39 +++++++++++++
 7 files changed, 203 insertions(+)
 create mode 100644 docs/source/user-guide/common-operations/views.rst
 create mode 100644 python/tests/test_view.py

diff --git a/README.md b/README.md
index 9c56b62dd..4f80dbe18 100644
--- a/README.md
+++ b/README.md
@@ -79,6 +79,46 @@ This produces the following chart:
 
 ![Chart](examples/chart.png)
 
+## Registering a DataFrame as a View
+
+You can use SessionContext's `register_view` method to convert a DataFrame into a view and register it with the context.
+
+```python
+from datafusion import SessionContext, col, literal
+
+# Create a DataFusion context
+ctx = SessionContext()
+
+# Create sample data
+data = {"a": [1, 2, 3, 4, 5], "b": [10, 20, 30, 40, 50]}
+
+# Create a DataFrame from the dictionary
+df = ctx.from_pydict(data, "my_table")
+
+# Filter the DataFrame (for example, keep rows where a > 2)
+df_filtered = df.filter(col("a") > literal(2))
+
+# Register the dataframe as a view with the context
+ctx.register_view("view1", df_filtered)
+
+# Now run a SQL query against the registered view
+df_view = ctx.sql("SELECT * FROM view1")
+
+# Collect the results
+results = df_view.collect()
+
+# Convert results to a list of dictionaries for display
+result_dicts = [batch.to_pydict() for batch in results]
+
+print(result_dicts)
+```
+
+This will output:
+
+```python
+[{'a': [3, 4, 5], 'b': [30, 40, 50]}]
+```
+
 ## Configuration
 
 It is possible to configure runtime (memory and disk settings) and configuration settings when creating a context.
diff --git a/docs/source/user-guide/common-operations/index.rst b/docs/source/user-guide/common-operations/index.rst
index d7c708c21..7abd1f138 100644
--- a/docs/source/user-guide/common-operations/index.rst
+++ b/docs/source/user-guide/common-operations/index.rst
@@ -23,6 +23,7 @@ The contents of this section are designed to guide a new user through how to use
 .. toctree::
    :maxdepth: 2
 
+   views
    basic-info
    select-and-filter
    expressions
diff --git a/docs/source/user-guide/common-operations/views.rst b/docs/source/user-guide/common-operations/views.rst
new file mode 100644
index 000000000..df11e3abe
--- /dev/null
+++ b/docs/source/user-guide/common-operations/views.rst
@@ -0,0 +1,58 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+======================
+Registering Views
+======================
+
+You can use the context's ``register_view`` method to register a DataFrame as a view
+
+.. code-block:: python
+
+    from datafusion import SessionContext, col, literal
+
+    # Create a DataFusion context
+    ctx = SessionContext()
+
+    # Create sample data
+    data = {"a": [1, 2, 3, 4, 5], "b": [10, 20, 30, 40, 50]}
+
+    # Create a DataFrame from the dictionary
+    df = ctx.from_pydict(data, "my_table")
+
+    # Filter the DataFrame (for example, keep rows where a > 2)
+    df_filtered = df.filter(col("a") > literal(2))
+
+    # Register the dataframe as a view with the context
+    ctx.register_view("view1", df_filtered)
+
+    # Now run a SQL query against the registered view
+    df_view = ctx.sql("SELECT * FROM view1")
+
+    # Collect the results
+    results = df_view.collect()
+
+    # Convert results to a list of dictionaries for display
+    result_dicts = [batch.to_pydict() for batch in results]
+
+    print(result_dicts)
+
+This will output:
+
+.. code-block:: python
+
+    [{'a': [3, 4, 5], 'b': [30, 40, 50]}]
diff --git a/python/datafusion/context.py b/python/datafusion/context.py
index 21955b6d1..befc4dce6 100644
--- a/python/datafusion/context.py
+++ b/python/datafusion/context.py
@@ -707,6 +707,18 @@ def from_polars(self, data: polars.DataFrame, name: str | None = None) -> DataFr
         """
         return DataFrame(self.ctx.from_polars(data, name))
 
+    # https://github.com/apache/datafusion-python/pull/1016#discussion_r1983239116
+    # is the discussion on how we arrived at adding register_view
+    def register_view(self, name: str, df: DataFrame):
+        """Register a :py:class: `~datafusion.detaframe.DataFrame` as a view.
+
+        Args:
+            name (str): The name to register the view under.
+            df (DataFrame): The DataFrame to be converted into a view and registered.
+        """
+        view = df.into_view()
+        self.ctx.register_table(name, view)
+
     def register_table(self, name: str, table: Table) -> None:
         """Register a :py:class: `~datafusion.catalog.Table` as a table.
 
diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
index 23b5d630b..85a179ec9 100644
--- a/python/datafusion/dataframe.py
+++ b/python/datafusion/dataframe.py
@@ -124,6 +124,10 @@ def __init__(self, df: DataFrameInternal) -> None:
         """
         self.df = df
 
+    def into_view(self) -> pa.Table:
+        """Convert DataFrame as a ViewTable which can be used in register_table."""
+        return self.df.into_view()
+
     def __getitem__(self, key: str | List[str]) -> DataFrame:
         """Return a new :py:class`DataFrame` with the specified column or columns.
 
diff --git a/python/tests/test_view.py b/python/tests/test_view.py
new file mode 100644
index 000000000..1d92cc0d4
--- /dev/null
+++ b/python/tests/test_view.py
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+from datafusion import SessionContext, col, literal
+
+
+def test_register_filtered_dataframe():
+    ctx = SessionContext()
+
+    data = {"a": [1, 2, 3, 4, 5], "b": [10, 20, 30, 40, 50]}
+
+    df = ctx.from_pydict(data, "my_table")
+
+    df_filtered = df.filter(col("a") > literal(2))
+
+    ctx.register_view("view1", df_filtered)
+
+    df_view = ctx.sql("SELECT * FROM view1")
+
+    filtered_results = df_view.collect()
+
+    result_dicts = [batch.to_pydict() for batch in filtered_results]
+
+    expected_results = [{"a": [3, 4, 5], "b": [30, 40, 50]}]
+
+    assert result_dicts == expected_results
+
+    df_results = df.collect()
+
+    df_result_dicts = [batch.to_pydict() for batch in df_results]
+
+    expected_df_results = [{"a": [1, 2, 3, 4, 5], "b": [10, 20, 30, 40, 50]}]
+
+    assert df_result_dicts == expected_df_results
diff --git a/src/dataframe.rs b/src/dataframe.rs
index ed9578a71..243e2e14f 100644
--- a/src/dataframe.rs
+++ b/src/dataframe.rs
@@ -30,6 +30,7 @@ use datafusion::arrow::util::pretty;
 use datafusion::common::UnnestOptions;
 use datafusion::config::{CsvOptions, TableParquetOptions};
 use datafusion::dataframe::{DataFrame, DataFrameWriteOptions};
+use datafusion::datasource::TableProvider;
 use datafusion::execution::SendableRecordBatchStream;
 use datafusion::parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel};
 use datafusion::prelude::*;
@@ -39,6 +40,7 @@ use pyo3::pybacked::PyBackedStr;
 use pyo3::types::{PyCapsule, PyTuple, PyTupleMethods};
 use tokio::task::JoinHandle;
 
+use crate::catalog::PyTable;
 use crate::errors::{py_datafusion_err, PyDataFusionError};
 use crate::expr::sort_expr::to_sort_expressions;
 use crate::physical_plan::PyExecutionPlan;
@@ -50,6 +52,25 @@ use crate::{
     expr::{sort_expr::PySortExpr, PyExpr},
 };
 
+// https://github.com/apache/datafusion-python/pull/1016#discussion_r1983239116
+// - we have not decided on the table_provider approach yet
+// this is an interim implementation
+#[pyclass(name = "TableProvider", module = "datafusion")]
+pub struct PyTableProvider {
+    provider: Arc<dyn TableProvider>,
+}
+
+impl PyTableProvider {
+    pub fn new(provider: Arc<dyn TableProvider>) -> Self {
+        Self { provider }
+    }
+
+    pub fn as_table(&self) -> PyTable {
+        let table_provider: Arc<dyn TableProvider> = self.provider.clone();
+        PyTable::new(table_provider)
+    }
+}
+
 /// A PyDataFrame is a representation of a logical plan and an API to compose statements.
 /// Use it to build a plan and `.collect()` to execute the plan and collect the result.
 /// The actual execution of a plan runs natively on Rust and Arrow on a multi-threaded environment.
@@ -156,6 +177,24 @@ impl PyDataFrame {
         PyArrowType(self.df.schema().into())
     }
 
+    /// Convert this DataFrame into a Table that can be used in register_table
+    /// By convention, into_... methods consume self and return the new object.
+    /// Disabling the clippy lint, so we can use &self
+    /// because we're working with Python bindings
+    /// where objects are shared
+    /// https://github.com/apache/datafusion-python/pull/1016#discussion_r1983239116
+    /// - we have not decided on the table_provider approach yet
+    #[allow(clippy::wrong_self_convention)]
+    fn into_view(&self) -> PyDataFusionResult<PyTable> {
+        // Call the underlying Rust DataFrame::into_view method.
+        // Note that the Rust method consumes self; here we clone the inner Arc<DataFrame>
+        // so that we don’t invalidate this PyDataFrame.
+        let table_provider = self.df.as_ref().clone().into_view();
+        let table_provider = PyTableProvider::new(table_provider);
+
+        Ok(table_provider.as_table())
+    }
+
     #[pyo3(signature = (*args))]
     fn select_columns(&self, args: Vec<PyBackedStr>) -> PyDataFusionResult<Self> {
         let args = args.iter().map(|s| s.as_ref()).collect::<Vec<&str>>();

From 9027b4d79fdd7a41dd9c1f25c2ecebc1fabf50f2 Mon Sep 17 00:00:00 2001
From: Chen Chongchen <chenkovsky@qq.com>
Date: Sat, 8 Mar 2025 21:24:02 +0800
Subject: [PATCH 07/22] fix: type checking (#993)

* fix: type checking

* update license

* format

* format

* update catalog

* revert type annotation

* format

* format

* update
---
 python/datafusion/catalog.py        |  5 +++--
 python/datafusion/context.py        | 19 ++++++++++++------
 python/datafusion/dataframe.py      |  3 ++-
 python/datafusion/expr.py           |  8 ++++----
 python/datafusion/functions.py      | 10 +++++++---
 python/datafusion/input/location.py | 10 +++++-----
 python/datafusion/udf.py            |  7 ++++---
 python/tests/test_functions.py      | 30 +++++++++++++++++++++++++++++
 8 files changed, 68 insertions(+), 24 deletions(-)

diff --git a/python/datafusion/catalog.py b/python/datafusion/catalog.py
index 703037665..0560f4704 100644
--- a/python/datafusion/catalog.py
+++ b/python/datafusion/catalog.py
@@ -66,11 +66,12 @@ def __init__(self, table: df_internal.Table) -> None:
         """This constructor is not typically called by the end user."""
         self.table = table
 
+    @property
     def schema(self) -> pyarrow.Schema:
         """Returns the schema associated with this table."""
-        return self.table.schema()
+        return self.table.schema
 
     @property
     def kind(self) -> str:
         """Returns the kind of table."""
-        return self.table.kind()
+        return self.table.kind
diff --git a/python/datafusion/context.py b/python/datafusion/context.py
index befc4dce6..282b2a477 100644
--- a/python/datafusion/context.py
+++ b/python/datafusion/context.py
@@ -728,7 +728,7 @@ def register_table(self, name: str, table: Table) -> None:
             name: Name of the resultant table.
             table: DataFusion table to add to the session context.
         """
-        self.ctx.register_table(name, table)
+        self.ctx.register_table(name, table.table)
 
     def deregister_table(self, name: str) -> None:
         """Remove a table from the session."""
@@ -767,7 +767,7 @@ def register_parquet(
         file_extension: str = ".parquet",
         skip_metadata: bool = True,
         schema: pyarrow.Schema | None = None,
-        file_sort_order: list[list[Expr]] | None = None,
+        file_sort_order: list[list[SortExpr]] | None = None,
     ) -> None:
         """Register a Parquet file as a table.
 
@@ -798,7 +798,9 @@ def register_parquet(
             file_extension,
             skip_metadata,
             schema,
-            file_sort_order,
+            [sort_list_to_raw_sort_list(exprs) for exprs in file_sort_order]
+            if file_sort_order is not None
+            else None,
         )
 
     def register_csv(
@@ -934,7 +936,7 @@ def register_udwf(self, udwf: WindowUDF) -> None:
 
     def catalog(self, name: str = "datafusion") -> Catalog:
         """Retrieve a catalog by name."""
-        return self.ctx.catalog(name)
+        return Catalog(self.ctx.catalog(name))
 
     @deprecated(
         "Use the catalog provider interface ``SessionContext.Catalog`` to "
@@ -1054,7 +1056,7 @@ def read_parquet(
         file_extension: str = ".parquet",
         skip_metadata: bool = True,
         schema: pyarrow.Schema | None = None,
-        file_sort_order: list[list[Expr]] | None = None,
+        file_sort_order: list[list[Expr | SortExpr]] | None = None,
     ) -> DataFrame:
         """Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`.
 
@@ -1078,6 +1080,11 @@ def read_parquet(
         """
         if table_partition_cols is None:
             table_partition_cols = []
+        file_sort_order = (
+            [sort_list_to_raw_sort_list(f) for f in file_sort_order]
+            if file_sort_order is not None
+            else None
+        )
         return DataFrame(
             self.ctx.read_parquet(
                 str(path),
@@ -1121,7 +1128,7 @@ def read_table(self, table: Table) -> DataFrame:
         :py:class:`~datafusion.catalog.ListingTable`, create a
         :py:class:`~datafusion.dataframe.DataFrame`.
         """
-        return DataFrame(self.ctx.read_table(table))
+        return DataFrame(self.ctx.read_table(table.table))
 
     def execute(self, plan: ExecutionPlan, partitions: int) -> RecordBatchStream:
         """Execute the ``plan`` and return the results."""
diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
index 85a179ec9..de5d8376e 100644
--- a/python/datafusion/dataframe.py
+++ b/python/datafusion/dataframe.py
@@ -52,6 +52,7 @@
 from enum import Enum
 
 from datafusion._internal import DataFrame as DataFrameInternal
+from datafusion._internal import expr as expr_internal
 from datafusion.expr import Expr, SortExpr, sort_or_default
 
 
@@ -277,7 +278,7 @@ def with_columns(
 
         def _simplify_expression(
             *exprs: Expr | Iterable[Expr], **named_exprs: Expr
-        ) -> list[Expr]:
+        ) -> list[expr_internal.Expr]:
             expr_list = []
             for expr in exprs:
                 if isinstance(expr, Expr):
diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py
index e3d7158eb..3639abec6 100644
--- a/python/datafusion/expr.py
+++ b/python/datafusion/expr.py
@@ -176,7 +176,7 @@ def sort_or_default(e: Expr | SortExpr) -> expr_internal.SortExpr:
     """Helper function to return a default Sort if an Expr is provided."""
     if isinstance(e, SortExpr):
         return e.raw_sort
-    return SortExpr(e.expr, True, True).raw_sort
+    return SortExpr(e, True, True).raw_sort
 
 
 def sort_list_to_raw_sort_list(
@@ -231,7 +231,7 @@ def variant_name(self) -> str:
 
     def __richcmp__(self, other: Expr, op: int) -> Expr:
         """Comparison operator."""
-        return Expr(self.expr.__richcmp__(other, op))
+        return Expr(self.expr.__richcmp__(other.expr, op))
 
     def __repr__(self) -> str:
         """Generate a string representation of this expression."""
@@ -417,7 +417,7 @@ def sort(self, ascending: bool = True, nulls_first: bool = True) -> SortExpr:
             ascending: If true, sort in ascending order.
             nulls_first: Return null values first.
         """
-        return SortExpr(self.expr, ascending=ascending, nulls_first=nulls_first)
+        return SortExpr(self, ascending=ascending, nulls_first=nulls_first)
 
     def is_null(self) -> Expr:
         """Returns ``True`` if this expression is null."""
@@ -789,7 +789,7 @@ class SortExpr:
 
     def __init__(self, expr: Expr, ascending: bool, nulls_first: bool) -> None:
         """This constructor should not be called by the end user."""
-        self.raw_sort = expr_internal.SortExpr(expr, ascending, nulls_first)
+        self.raw_sort = expr_internal.SortExpr(expr.expr, ascending, nulls_first)
 
     def expr(self) -> Expr:
         """Return the raw expr backing the SortExpr."""
diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index 5c260aade..b449c4868 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -366,7 +366,7 @@ def concat_ws(separator: str, *args: Expr) -> Expr:
 
 def order_by(expr: Expr, ascending: bool = True, nulls_first: bool = True) -> SortExpr:
     """Creates a new sort expression."""
-    return SortExpr(expr.expr, ascending=ascending, nulls_first=nulls_first)
+    return SortExpr(expr, ascending=ascending, nulls_first=nulls_first)
 
 
 def alias(expr: Expr, name: str) -> Expr:
@@ -942,6 +942,7 @@ def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr:
 
     See :py:func:`to_timestamp` for a description on how to use formatters.
     """
+    formatters = [f.expr for f in formatters]
     return Expr(f.to_timestamp_millis(arg.expr, *formatters))
 
 
@@ -950,6 +951,7 @@ def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr:
 
     See :py:func:`to_timestamp` for a description on how to use formatters.
     """
+    formatters = [f.expr for f in formatters]
     return Expr(f.to_timestamp_micros(arg.expr, *formatters))
 
 
@@ -958,6 +960,7 @@ def to_timestamp_nanos(arg: Expr, *formatters: Expr) -> Expr:
 
     See :py:func:`to_timestamp` for a description on how to use formatters.
     """
+    formatters = [f.expr for f in formatters]
     return Expr(f.to_timestamp_nanos(arg.expr, *formatters))
 
 
@@ -966,6 +969,7 @@ def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr:
 
     See :py:func:`to_timestamp` for a description on how to use formatters.
     """
+    formatters = [f.expr for f in formatters]
     return Expr(f.to_timestamp_seconds(arg.expr, *formatters))
 
 
@@ -1078,9 +1082,9 @@ def range(start: Expr, stop: Expr, step: Expr) -> Expr:
     return Expr(f.range(start.expr, stop.expr, step.expr))
 
 
-def uuid(arg: Expr) -> Expr:
+def uuid() -> Expr:
     """Returns uuid v4 as a string value."""
-    return Expr(f.uuid(arg.expr))
+    return Expr(f.uuid())
 
 
 def struct(*args: Expr) -> Expr:
diff --git a/python/datafusion/input/location.py b/python/datafusion/input/location.py
index a8252b53c..517cd1578 100644
--- a/python/datafusion/input/location.py
+++ b/python/datafusion/input/location.py
@@ -37,12 +37,12 @@ def is_correct_input(self, input_item: Any, table_name: str, **kwargs):
 
     def build_table(
         self,
-        input_file: str,
+        input_item: str,
         table_name: str,
         **kwargs,
     ) -> SqlTable:
         """Create a table from the input source."""
-        _, extension = os.path.splitext(input_file)
+        _, extension = os.path.splitext(input_item)
         format = extension.lstrip(".").lower()
         num_rows = 0  # Total number of rows in the file. Used for statistics
         columns = []
@@ -50,7 +50,7 @@ def build_table(
             import pyarrow.parquet as pq
 
             # Read the Parquet metadata
-            metadata = pq.read_metadata(input_file)
+            metadata = pq.read_metadata(input_item)
             num_rows = metadata.num_rows
             # Iterate through the schema and build the SqlTable
             for col in metadata.schema:
@@ -69,7 +69,7 @@ def build_table(
             # to get that information. However, this should only be occurring
             # at table creation time and therefore shouldn't
             # slow down query performance.
-            with open(input_file, "r") as file:
+            with open(input_item, "r") as file:
                 reader = csv.reader(file)
                 header_row = next(reader)
                 print(header_row)
@@ -84,6 +84,6 @@ def build_table(
             )
 
         # Input could possibly be multiple files. Create a list if so
-        input_files = glob.glob(input_file)
+        input_files = glob.glob(input_item)
 
         return SqlTable(table_name, columns, num_rows, input_files)
diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py
index c97f453d0..0bba3d723 100644
--- a/python/datafusion/udf.py
+++ b/python/datafusion/udf.py
@@ -85,7 +85,7 @@ class ScalarUDF:
 
     def __init__(
         self,
-        name: Optional[str],
+        name: str,
         func: Callable[..., _R],
         input_types: pyarrow.DataType | list[pyarrow.DataType],
         return_type: _R,
@@ -182,7 +182,7 @@ class AggregateUDF:
 
     def __init__(
         self,
-        name: Optional[str],
+        name: str,
         accumulator: Callable[[], Accumulator],
         input_types: list[pyarrow.DataType],
         return_type: pyarrow.DataType,
@@ -277,6 +277,7 @@ def sum_bias_10() -> Summarize:
             )
         if name is None:
             name = accum.__call__().__class__.__qualname__.lower()
+        assert name is not None
         if isinstance(input_types, pyarrow.DataType):
             input_types = [input_types]
         return AggregateUDF(
@@ -462,7 +463,7 @@ class WindowUDF:
 
     def __init__(
         self,
-        name: Optional[str],
+        name: str,
         func: Callable[[], WindowEvaluator],
         input_types: list[pyarrow.DataType],
         return_type: pyarrow.DataType,
diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py
index b1a739b49..fca05bb8f 100644
--- a/python/tests/test_functions.py
+++ b/python/tests/test_functions.py
@@ -871,7 +871,22 @@ def test_temporal_functions(df):
         f.to_timestamp_millis(literal("2023-09-07 05:06:14.523952")),
         f.to_timestamp_micros(literal("2023-09-07 05:06:14.523952")),
         f.extract(literal("day"), column("d")),
+        f.to_timestamp(
+            literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f")
+        ),
+        f.to_timestamp_seconds(
+            literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f")
+        ),
+        f.to_timestamp_millis(
+            literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f")
+        ),
+        f.to_timestamp_micros(
+            literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f")
+        ),
         f.to_timestamp_nanos(literal("2023-09-07 05:06:14.523952")),
+        f.to_timestamp_nanos(
+            literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f")
+        ),
     )
     result = df.collect()
     assert len(result) == 1
@@ -913,6 +928,21 @@ def test_temporal_functions(df):
     assert result.column(11) == pa.array(
         [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns")
     )
+    assert result.column(12) == pa.array(
+        [datetime(2023, 9, 7, 5, 6, 14)] * 3, type=pa.timestamp("s")
+    )
+    assert result.column(13) == pa.array(
+        [datetime(2023, 9, 7, 5, 6, 14, 523000)] * 3, type=pa.timestamp("ms")
+    )
+    assert result.column(14) == pa.array(
+        [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us")
+    )
+    assert result.column(15) == pa.array(
+        [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns")
+    )
+    assert result.column(16) == pa.array(
+        [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns")
+    )
 
 
 def test_arrow_cast(df):

From acd70409f73f299a144e7ff4115c6e6035c3ffb5 Mon Sep 17 00:00:00 2001
From: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com>
Date: Sat, 8 Mar 2025 16:37:10 +0100
Subject: [PATCH 08/22] feat: reads using global ctx (#982)

* feat: reads using global ctx

* Add text to io methods to describe the context they are using

---------

Co-authored-by: Tim Saucer <timsaucer@gmail.com>
---
 python/datafusion/__init__.py         |   5 +
 python/datafusion/io.py               | 199 ++++++++++++++++++++++++++
 python/tests/test_io.py               |  95 ++++++++++++
 python/tests/test_wrapper_coverage.py |   2 +
 src/context.rs                        |  12 +-
 src/utils.rs                          |   8 ++
 6 files changed, 319 insertions(+), 2 deletions(-)
 create mode 100644 python/datafusion/io.py
 create mode 100644 python/tests/test_io.py

diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py
index 85aefcce7..f11ce54a6 100644
--- a/python/datafusion/__init__.py
+++ b/python/datafusion/__init__.py
@@ -45,6 +45,7 @@
     Expr,
     WindowFrame,
 )
+from .io import read_avro, read_csv, read_json, read_parquet
 from .plan import ExecutionPlan, LogicalPlan
 from .record_batch import RecordBatch, RecordBatchStream
 from .udf import Accumulator, AggregateUDF, ScalarUDF, WindowUDF
@@ -81,6 +82,10 @@
     "functions",
     "object_store",
     "substrait",
+    "read_parquet",
+    "read_avro",
+    "read_csv",
+    "read_json",
 ]
 
 
diff --git a/python/datafusion/io.py b/python/datafusion/io.py
new file mode 100644
index 000000000..7f3b77efa
--- /dev/null
+++ b/python/datafusion/io.py
@@ -0,0 +1,199 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""IO read functions using global context."""
+
+import pathlib
+
+import pyarrow
+
+from datafusion.dataframe import DataFrame
+from datafusion.expr import Expr
+
+from ._internal import SessionContext as SessionContextInternal
+
+
+def read_parquet(
+    path: str | pathlib.Path,
+    table_partition_cols: list[tuple[str, str]] | None = None,
+    parquet_pruning: bool = True,
+    file_extension: str = ".parquet",
+    skip_metadata: bool = True,
+    schema: pyarrow.Schema | None = None,
+    file_sort_order: list[list[Expr]] | None = None,
+) -> DataFrame:
+    """Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`.
+
+    This function will use the global context. Any functions or tables registered
+    with another context may not be accessible when used with a DataFrame created
+    using this function.
+
+    Args:
+        path: Path to the Parquet file.
+        table_partition_cols: Partition columns.
+        parquet_pruning: Whether the parquet reader should use the predicate
+            to prune row groups.
+        file_extension: File extension; only files with this extension are
+            selected for data input.
+        skip_metadata: Whether the parquet reader should skip any metadata
+            that may be in the file schema. This can help avoid schema
+            conflicts due to metadata.
+        schema: An optional schema representing the parquet files. If None,
+            the parquet reader will try to infer it based on data in the
+            file.
+        file_sort_order: Sort order for the file.
+
+    Returns:
+        DataFrame representation of the read Parquet files
+    """
+    if table_partition_cols is None:
+        table_partition_cols = []
+    return DataFrame(
+        SessionContextInternal._global_ctx().read_parquet(
+            str(path),
+            table_partition_cols,
+            parquet_pruning,
+            file_extension,
+            skip_metadata,
+            schema,
+            file_sort_order,
+        )
+    )
+
+
+def read_json(
+    path: str | pathlib.Path,
+    schema: pyarrow.Schema | None = None,
+    schema_infer_max_records: int = 1000,
+    file_extension: str = ".json",
+    table_partition_cols: list[tuple[str, str]] | None = None,
+    file_compression_type: str | None = None,
+) -> DataFrame:
+    """Read a line-delimited JSON data source.
+
+    This function will use the global context. Any functions or tables registered
+    with another context may not be accessible when used with a DataFrame created
+    using this function.
+
+    Args:
+        path: Path to the JSON file.
+        schema: The data source schema.
+        schema_infer_max_records: Maximum number of rows to read from JSON
+            files for schema inference if needed.
+        file_extension: File extension; only files with this extension are
+            selected for data input.
+        table_partition_cols: Partition columns.
+        file_compression_type: File compression type.
+
+    Returns:
+        DataFrame representation of the read JSON files.
+    """
+    if table_partition_cols is None:
+        table_partition_cols = []
+    return DataFrame(
+        SessionContextInternal._global_ctx().read_json(
+            str(path),
+            schema,
+            schema_infer_max_records,
+            file_extension,
+            table_partition_cols,
+            file_compression_type,
+        )
+    )
+
+
+def read_csv(
+    path: str | pathlib.Path | list[str] | list[pathlib.Path],
+    schema: pyarrow.Schema | None = None,
+    has_header: bool = True,
+    delimiter: str = ",",
+    schema_infer_max_records: int = 1000,
+    file_extension: str = ".csv",
+    table_partition_cols: list[tuple[str, str]] | None = None,
+    file_compression_type: str | None = None,
+) -> DataFrame:
+    """Read a CSV data source.
+
+    This function will use the global context. Any functions or tables registered
+    with another context may not be accessible when used with a DataFrame created
+    using this function.
+
+    Args:
+        path: Path to the CSV file
+        schema: An optional schema representing the CSV files. If None, the
+            CSV reader will try to infer it based on data in file.
+        has_header: Whether the CSV file have a header. If schema inference
+            is run on a file with no headers, default column names are
+            created.
+        delimiter: An optional column delimiter.
+        schema_infer_max_records: Maximum number of rows to read from CSV
+            files for schema inference if needed.
+        file_extension:  File extension; only files with this extension are
+            selected for data input.
+        table_partition_cols:  Partition columns.
+        file_compression_type:  File compression type.
+
+    Returns:
+        DataFrame representation of the read CSV files
+    """
+    if table_partition_cols is None:
+        table_partition_cols = []
+
+    path = [str(p) for p in path] if isinstance(path, list) else str(path)
+
+    return DataFrame(
+        SessionContextInternal._global_ctx().read_csv(
+            path,
+            schema,
+            has_header,
+            delimiter,
+            schema_infer_max_records,
+            file_extension,
+            table_partition_cols,
+            file_compression_type,
+        )
+    )
+
+
+def read_avro(
+    path: str | pathlib.Path,
+    schema: pyarrow.Schema | None = None,
+    file_partition_cols: list[tuple[str, str]] | None = None,
+    file_extension: str = ".avro",
+) -> DataFrame:
+    """Create a :py:class:`DataFrame` for reading Avro data source.
+
+    This function will use the global context. Any functions or tables registered
+    with another context may not be accessible when used with a DataFrame created
+    using this function.
+
+    Args:
+        path: Path to the Avro file.
+        schema: The data source schema.
+        file_partition_cols: Partition columns.
+        file_extension: File extension to select.
+
+    Returns:
+        DataFrame representation of the read Avro file
+    """
+    if file_partition_cols is None:
+        file_partition_cols = []
+    return DataFrame(
+        SessionContextInternal._global_ctx().read_avro(
+            str(path), schema, file_partition_cols, file_extension
+        )
+    )
diff --git a/python/tests/test_io.py b/python/tests/test_io.py
new file mode 100644
index 000000000..21ad188ee
--- /dev/null
+++ b/python/tests/test_io.py
@@ -0,0 +1,95 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import os
+import pathlib
+
+import pyarrow as pa
+from datafusion import column
+from datafusion.io import read_avro, read_csv, read_json, read_parquet
+
+
+def test_read_json_global_ctx(ctx):
+    path = os.path.dirname(os.path.abspath(__file__))
+
+    # Default
+    test_data_path = os.path.join(path, "data_test_context", "data.json")
+    df = read_json(test_data_path)
+    result = df.collect()
+
+    assert result[0].column(0) == pa.array(["a", "b", "c"])
+    assert result[0].column(1) == pa.array([1, 2, 3])
+
+    # Schema
+    schema = pa.schema(
+        [
+            pa.field("A", pa.string(), nullable=True),
+        ]
+    )
+    df = read_json(test_data_path, schema=schema)
+    result = df.collect()
+
+    assert result[0].column(0) == pa.array(["a", "b", "c"])
+    assert result[0].schema == schema
+
+    # File extension
+    test_data_path = os.path.join(path, "data_test_context", "data.json")
+    df = read_json(test_data_path, file_extension=".json")
+    result = df.collect()
+
+    assert result[0].column(0) == pa.array(["a", "b", "c"])
+    assert result[0].column(1) == pa.array([1, 2, 3])
+
+
+def test_read_parquet_global():
+    parquet_df = read_parquet(path="parquet/data/alltypes_plain.parquet")
+    parquet_df.show()
+    assert parquet_df is not None
+
+    path = pathlib.Path.cwd() / "parquet/data/alltypes_plain.parquet"
+    parquet_df = read_parquet(path=path)
+    assert parquet_df is not None
+
+
+def test_read_csv():
+    csv_df = read_csv(path="testing/data/csv/aggregate_test_100.csv")
+    csv_df.select(column("c1")).show()
+
+
+def test_read_csv_list():
+    csv_df = read_csv(path=["testing/data/csv/aggregate_test_100.csv"])
+    expected = csv_df.count() * 2
+
+    double_csv_df = read_csv(
+        path=[
+            "testing/data/csv/aggregate_test_100.csv",
+            "testing/data/csv/aggregate_test_100.csv",
+        ]
+    )
+    actual = double_csv_df.count()
+
+    double_csv_df.select(column("c1")).show()
+    assert actual == expected
+
+
+def test_read_avro():
+    avro_df = read_avro(path="testing/data/avro/alltypes_plain.avro")
+    avro_df.show()
+    assert avro_df is not None
+
+    path = pathlib.Path.cwd() / "testing/data/avro/alltypes_plain.avro"
+    avro_df = read_avro(path=path)
+    assert avro_df is not None
diff --git a/python/tests/test_wrapper_coverage.py b/python/tests/test_wrapper_coverage.py
index 86f2d57f2..ac064ba95 100644
--- a/python/tests/test_wrapper_coverage.py
+++ b/python/tests/test_wrapper_coverage.py
@@ -34,6 +34,8 @@ def missing_exports(internal_obj, wrapped_obj) -> None:
         return
 
     for attr in dir(internal_obj):
+        if attr in ["_global_ctx"]:
+            continue
         assert attr in dir(wrapped_obj)
 
         internal_attr = getattr(internal_obj, attr)
diff --git a/src/context.rs b/src/context.rs
index 0f962638e..9ba87eb8a 100644
--- a/src/context.rs
+++ b/src/context.rs
@@ -44,7 +44,7 @@ use crate::store::StorageContexts;
 use crate::udaf::PyAggregateUDF;
 use crate::udf::PyScalarUDF;
 use crate::udwf::PyWindowUDF;
-use crate::utils::{get_tokio_runtime, validate_pycapsule, wait_for_future};
+use crate::utils::{get_global_ctx, get_tokio_runtime, validate_pycapsule, wait_for_future};
 use datafusion::arrow::datatypes::{DataType, Schema, SchemaRef};
 use datafusion::arrow::pyarrow::PyArrowType;
 use datafusion::arrow::record_batch::RecordBatch;
@@ -69,7 +69,7 @@ use datafusion::prelude::{
     AvroReadOptions, CsvReadOptions, DataFrame, NdJsonReadOptions, ParquetReadOptions,
 };
 use datafusion_ffi::table_provider::{FFI_TableProvider, ForeignTableProvider};
-use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple};
+use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple, PyType};
 use tokio::task::JoinHandle;
 
 /// Configuration options for a SessionContext
@@ -306,6 +306,14 @@ impl PySessionContext {
         })
     }
 
+    #[classmethod]
+    #[pyo3(signature = ())]
+    fn _global_ctx(_cls: &Bound<'_, PyType>) -> PyResult<Self> {
+        Ok(Self {
+            ctx: get_global_ctx().clone(),
+        })
+    }
+
     /// Register an object store with the given name
     #[pyo3(signature = (scheme, store, host=None))]
     pub fn register_object_store(
diff --git a/src/utils.rs b/src/utils.rs
index ed224b364..999aad755 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -17,6 +17,7 @@
 
 use crate::errors::{PyDataFusionError, PyDataFusionResult};
 use crate::TokioRuntime;
+use datafusion::execution::context::SessionContext;
 use datafusion::logical_expr::Volatility;
 use pyo3::exceptions::PyValueError;
 use pyo3::prelude::*;
@@ -37,6 +38,13 @@ pub(crate) fn get_tokio_runtime() -> &'static TokioRuntime {
     RUNTIME.get_or_init(|| TokioRuntime(tokio::runtime::Runtime::new().unwrap()))
 }
 
+/// Utility to get the Global Datafussion CTX
+#[inline]
+pub(crate) fn get_global_ctx() -> &'static SessionContext {
+    static CTX: OnceLock<SessionContext> = OnceLock::new();
+    CTX.get_or_init(|| SessionContext::new())
+}
+
 /// Utility to collect rust futures with GIL released
 pub fn wait_for_future<F>(py: Python, f: F) -> F::Output
 where

From 973d7ec4a8196a78bc4fb32db4f24e523997ba4c Mon Sep 17 00:00:00 2001
From: Crystal Zhou <45134936+CrystalZhou0529@users.noreply.github.com>
Date: Sat, 8 Mar 2025 16:23:54 -0500
Subject: [PATCH 09/22] feat: Implementation of udf and udaf decorator (#1040)

* Implementation of udf and udaf decorator

* Rename decorators back to udf and udaf, update documentations

* Minor typo fixes

* Fixing linting errors

* ruff formatting

---------

Co-authored-by: Tim Saucer <timsaucer@gmail.com>
---
 python/datafusion/udf.py  | 257 +++++++++++++++++++++++++++-----------
 python/tests/test_udaf.py |  42 +++++++
 python/tests/test_udf.py  |  42 ++++++-
 3 files changed, 265 insertions(+), 76 deletions(-)

diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py
index 0bba3d723..af7bcf2ed 100644
--- a/python/datafusion/udf.py
+++ b/python/datafusion/udf.py
@@ -19,6 +19,7 @@
 
 from __future__ import annotations
 
+import functools
 from abc import ABCMeta, abstractmethod
 from enum import Enum
 from typing import TYPE_CHECKING, Callable, List, Optional, TypeVar
@@ -110,43 +111,102 @@ def __call__(self, *args: Expr) -> Expr:
         args_raw = [arg.expr for arg in args]
         return Expr(self._udf.__call__(*args_raw))
 
-    @staticmethod
-    def udf(
-        func: Callable[..., _R],
-        input_types: list[pyarrow.DataType],
-        return_type: _R,
-        volatility: Volatility | str,
-        name: Optional[str] = None,
-    ) -> ScalarUDF:
-        """Create a new User-Defined Function.
+    class udf:
+        """Create a new User-Defined Function (UDF).
+
+        This class can be used both as a **function** and as a **decorator**.
+
+        Usage:
+            - **As a function**: Call `udf(func, input_types, return_type, volatility,
+              name)`.
+            - **As a decorator**: Use `@udf(input_types, return_type, volatility,
+              name)`. In this case, do **not** pass `func` explicitly.
 
         Args:
-            func: A callable python function.
-            input_types: The data types of the arguments to ``func``. This list
-                must be of the same length as the number of arguments.
-            return_type: The data type of the return value from the python
-                function.
-            volatility: See ``Volatility`` for allowed values.
-            name: A descriptive name for the function.
+            func (Callable, optional): **Only needed when calling as a function.**
+                Skip this argument when using `udf` as a decorator.
+            input_types (list[pyarrow.DataType]): The data types of the arguments
+                to `func`. This list must be of the same length as the number of
+                arguments.
+            return_type (_R): The data type of the return value from the function.
+            volatility (Volatility | str): See `Volatility` for allowed values.
+            name (Optional[str]): A descriptive name for the function.
 
         Returns:
-            A user-defined aggregate function, which can be used in either data
-                aggregation or window function calls.
+            A user-defined function that can be used in SQL expressions,
+            data aggregation, or window function calls.
+
+        Example:
+            **Using `udf` as a function:**
+            ```
+            def double_func(x):
+                return x * 2
+            double_udf = udf(double_func, [pyarrow.int32()], pyarrow.int32(),
+            "volatile", "double_it")
+            ```
+
+            **Using `udf` as a decorator:**
+            ```
+            @udf([pyarrow.int32()], pyarrow.int32(), "volatile", "double_it")
+            def double_udf(x):
+                return x * 2
+            ```
         """
-        if not callable(func):
-            raise TypeError("`func` argument must be callable")
-        if name is None:
-            if hasattr(func, "__qualname__"):
-                name = func.__qualname__.lower()
+
+        def __new__(cls, *args, **kwargs):
+            """Create a new UDF.
+
+            Trigger UDF function or decorator depending on if the first args is callable
+            """
+            if args and callable(args[0]):
+                # Case 1: Used as a function, require the first parameter to be callable
+                return cls._function(*args, **kwargs)
             else:
-                name = func.__class__.__name__.lower()
-        return ScalarUDF(
-            name=name,
-            func=func,
-            input_types=input_types,
-            return_type=return_type,
-            volatility=volatility,
-        )
+                # Case 2: Used as a decorator with parameters
+                return cls._decorator(*args, **kwargs)
+
+        @staticmethod
+        def _function(
+            func: Callable[..., _R],
+            input_types: list[pyarrow.DataType],
+            return_type: _R,
+            volatility: Volatility | str,
+            name: Optional[str] = None,
+        ) -> ScalarUDF:
+            if not callable(func):
+                raise TypeError("`func` argument must be callable")
+            if name is None:
+                if hasattr(func, "__qualname__"):
+                    name = func.__qualname__.lower()
+                else:
+                    name = func.__class__.__name__.lower()
+            return ScalarUDF(
+                name=name,
+                func=func,
+                input_types=input_types,
+                return_type=return_type,
+                volatility=volatility,
+            )
+
+        @staticmethod
+        def _decorator(
+            input_types: list[pyarrow.DataType],
+            return_type: _R,
+            volatility: Volatility | str,
+            name: Optional[str] = None,
+        ):
+            def decorator(func):
+                udf_caller = ScalarUDF.udf(
+                    func, input_types, return_type, volatility, name
+                )
+
+                @functools.wraps(func)
+                def wrapper(*args, **kwargs):
+                    return udf_caller(*args, **kwargs)
+
+                return wrapper
+
+            return decorator
 
 
 class Accumulator(metaclass=ABCMeta):
@@ -212,25 +272,27 @@ def __call__(self, *args: Expr) -> Expr:
         args_raw = [arg.expr for arg in args]
         return Expr(self._udaf.__call__(*args_raw))
 
-    @staticmethod
-    def udaf(
-        accum: Callable[[], Accumulator],
-        input_types: pyarrow.DataType | list[pyarrow.DataType],
-        return_type: pyarrow.DataType,
-        state_type: list[pyarrow.DataType],
-        volatility: Volatility | str,
-        name: Optional[str] = None,
-    ) -> AggregateUDF:
-        """Create a new User-Defined Aggregate Function.
+    class udaf:
+        """Create a new User-Defined Aggregate Function (UDAF).
 
-        If your :py:class:`Accumulator` can be instantiated with no arguments, you
-        can simply pass it's type as ``accum``. If you need to pass additional arguments
-        to it's constructor, you can define a lambda or a factory method. During runtime
-        the :py:class:`Accumulator` will be constructed for every instance in
-        which this UDAF is used. The following examples are all valid.
+        This class allows you to define an **aggregate function** that can be used in
+        data aggregation or window function calls.
 
-        .. code-block:: python
+        Usage:
+            - **As a function**: Call `udaf(accum, input_types, return_type, state_type,
+                volatility, name)`.
+            - **As a decorator**: Use `@udaf(input_types, return_type, state_type,
+                volatility, name)`.
+            When using `udaf` as a decorator, **do not pass `accum` explicitly**.
 
+        **Function example:**
+
+            If your `:py:class:Accumulator` can be instantiated with no arguments, you
+            can simply pass it's type as `accum`. If you need to pass additional
+            arguments to it's constructor, you can define a lambda or a factory method.
+            During runtime the `:py:class:Accumulator` will be constructed for every
+            instance in which this UDAF is used. The following examples are all valid.
+            ```
             import pyarrow as pa
             import pyarrow.compute as pc
 
@@ -253,12 +315,24 @@ def evaluate(self) -> pa.Scalar:
             def sum_bias_10() -> Summarize:
                 return Summarize(10.0)
 
-            udaf1 = udaf(Summarize, pa.float64(), pa.float64(), [pa.float64()], "immutable")
-            udaf2 = udaf(sum_bias_10, pa.float64(), pa.float64(), [pa.float64()], "immutable")
-            udaf3 = udaf(lambda: Summarize(20.0), pa.float64(), pa.float64(), [pa.float64()], "immutable")
+            udaf1 = udaf(Summarize, pa.float64(), pa.float64(), [pa.float64()],
+                "immutable")
+            udaf2 = udaf(sum_bias_10, pa.float64(), pa.float64(), [pa.float64()],
+                "immutable")
+            udaf3 = udaf(lambda: Summarize(20.0), pa.float64(), pa.float64(),
+                [pa.float64()], "immutable")
+            ```
+
+        **Decorator example:**
+            ```
+            @udaf(pa.float64(), pa.float64(), [pa.float64()], "immutable")
+            def udf4() -> Summarize:
+                return Summarize(10.0)
+            ```
 
         Args:
-            accum: The accumulator python function.
+            accum: The accumulator python function. **Only needed when calling as a
+                function. Skip this argument when using `udaf` as a decorator.**
             input_types: The data types of the arguments to ``accum``.
             return_type: The data type of the return value.
             state_type: The data types of the intermediate accumulation.
@@ -268,26 +342,69 @@ def sum_bias_10() -> Summarize:
         Returns:
             A user-defined aggregate function, which can be used in either data
             aggregation or window function calls.
-        """  # noqa W505
-        if not callable(accum):
-            raise TypeError("`func` must be callable.")
-        if not isinstance(accum.__call__(), Accumulator):
-            raise TypeError(
-                "Accumulator must implement the abstract base class Accumulator"
+        """
+
+        def __new__(cls, *args, **kwargs):
+            """Create a new UDAF.
+
+            Trigger UDAF function or decorator depending on if the first args is
+            callable
+            """
+            if args and callable(args[0]):
+                # Case 1: Used as a function, require the first parameter to be callable
+                return cls._function(*args, **kwargs)
+            else:
+                # Case 2: Used as a decorator with parameters
+                return cls._decorator(*args, **kwargs)
+
+        @staticmethod
+        def _function(
+            accum: Callable[[], Accumulator],
+            input_types: pyarrow.DataType | list[pyarrow.DataType],
+            return_type: pyarrow.DataType,
+            state_type: list[pyarrow.DataType],
+            volatility: Volatility | str,
+            name: Optional[str] = None,
+        ) -> AggregateUDF:
+            if not callable(accum):
+                raise TypeError("`func` must be callable.")
+            if not isinstance(accum.__call__(), Accumulator):
+                raise TypeError(
+                    "Accumulator must implement the abstract base class Accumulator"
+                )
+            if name is None:
+                name = accum.__call__().__class__.__qualname__.lower()
+            if isinstance(input_types, pyarrow.DataType):
+                input_types = [input_types]
+            return AggregateUDF(
+                name=name,
+                accumulator=accum,
+                input_types=input_types,
+                return_type=return_type,
+                state_type=state_type,
+                volatility=volatility,
             )
-        if name is None:
-            name = accum.__call__().__class__.__qualname__.lower()
-        assert name is not None
-        if isinstance(input_types, pyarrow.DataType):
-            input_types = [input_types]
-        return AggregateUDF(
-            name=name,
-            accumulator=accum,
-            input_types=input_types,
-            return_type=return_type,
-            state_type=state_type,
-            volatility=volatility,
-        )
+
+        @staticmethod
+        def _decorator(
+            input_types: pyarrow.DataType | list[pyarrow.DataType],
+            return_type: pyarrow.DataType,
+            state_type: list[pyarrow.DataType],
+            volatility: Volatility | str,
+            name: Optional[str] = None,
+        ):
+            def decorator(accum: Callable[[], Accumulator]):
+                udaf_caller = AggregateUDF.udaf(
+                    accum, input_types, return_type, state_type, volatility, name
+                )
+
+                @functools.wraps(accum)
+                def wrapper(*args, **kwargs):
+                    return udaf_caller(*args, **kwargs)
+
+                return wrapper
+
+            return decorator
 
 
 class WindowEvaluator(metaclass=ABCMeta):
diff --git a/python/tests/test_udaf.py b/python/tests/test_udaf.py
index 0005a3da8..e69c77d3c 100644
--- a/python/tests/test_udaf.py
+++ b/python/tests/test_udaf.py
@@ -117,6 +117,26 @@ def test_udaf_aggregate(df):
     assert result.column(0) == pa.array([1.0 + 2.0 + 3.0])
 
 
+def test_udaf_decorator_aggregate(df):
+    @udaf(pa.float64(), pa.float64(), [pa.float64()], "immutable")
+    def summarize():
+        return Summarize()
+
+    df1 = df.aggregate([], [summarize(column("a"))])
+
+    # execute and collect the first (and only) batch
+    result = df1.collect()[0]
+
+    assert result.column(0) == pa.array([1.0 + 2.0 + 3.0])
+
+    df2 = df.aggregate([], [summarize(column("a"))])
+
+    # Run a second time to ensure the state is properly reset
+    result = df2.collect()[0]
+
+    assert result.column(0) == pa.array([1.0 + 2.0 + 3.0])
+
+
 def test_udaf_aggregate_with_arguments(df):
     bias = 10.0
 
@@ -143,6 +163,28 @@ def test_udaf_aggregate_with_arguments(df):
     assert result.column(0) == pa.array([bias + 1.0 + 2.0 + 3.0])
 
 
+def test_udaf_decorator_aggregate_with_arguments(df):
+    bias = 10.0
+
+    @udaf(pa.float64(), pa.float64(), [pa.float64()], "immutable")
+    def summarize():
+        return Summarize(bias)
+
+    df1 = df.aggregate([], [summarize(column("a"))])
+
+    # execute and collect the first (and only) batch
+    result = df1.collect()[0]
+
+    assert result.column(0) == pa.array([bias + 1.0 + 2.0 + 3.0])
+
+    df2 = df.aggregate([], [summarize(column("a"))])
+
+    # Run a second time to ensure the state is properly reset
+    result = df2.collect()[0]
+
+    assert result.column(0) == pa.array([bias + 1.0 + 2.0 + 3.0])
+
+
 def test_group_by(df):
     summarize = udaf(
         Summarize,
diff --git a/python/tests/test_udf.py b/python/tests/test_udf.py
index 3a5dce6d6..a6c047552 100644
--- a/python/tests/test_udf.py
+++ b/python/tests/test_udf.py
@@ -24,7 +24,7 @@
 def df(ctx):
     # create a RecordBatch and a new DataFrame from it
     batch = pa.RecordBatch.from_arrays(
-        [pa.array([1, 2, 3]), pa.array([4, 4, 6])],
+        [pa.array([1, 2, 3]), pa.array([4, 4, None])],
         names=["a", "b"],
     )
     return ctx.create_dataframe([[batch]], name="test_table")
@@ -39,10 +39,20 @@ def test_udf(df):
         volatility="immutable",
     )
 
-    df = df.select(is_null(column("a")))
+    df = df.select(is_null(column("b")))
     result = df.collect()[0].column(0)
 
-    assert result == pa.array([False, False, False])
+    assert result == pa.array([False, False, True])
+
+
+def test_udf_decorator(df):
+    @udf([pa.int64()], pa.bool_(), "immutable")
+    def is_null(x: pa.Array) -> pa.Array:
+        return x.is_null()
+
+    df = df.select(is_null(column("b")))
+    result = df.collect()[0].column(0)
+    assert result == pa.array([False, False, True])
 
 
 def test_register_udf(ctx, df) -> None:
@@ -56,10 +66,10 @@ def test_register_udf(ctx, df) -> None:
 
     ctx.register_udf(is_null)
 
-    df_result = ctx.sql("select is_null(a) from test_table")
+    df_result = ctx.sql("select is_null(b) from test_table")
     result = df_result.collect()[0].column(0)
 
-    assert result == pa.array([False, False, False])
+    assert result == pa.array([False, False, True])
 
 
 class OverThresholdUDF:
@@ -70,7 +80,7 @@ def __call__(self, values: pa.Array) -> pa.Array:
         return pa.array(v.as_py() >= self.threshold for v in values)
 
 
-def test_udf_with_parameters(df) -> None:
+def test_udf_with_parameters_function(df) -> None:
     udf_no_param = udf(
         OverThresholdUDF(),
         pa.int64(),
@@ -94,3 +104,23 @@ def test_udf_with_parameters(df) -> None:
     result = df2.collect()[0].column(0)
 
     assert result == pa.array([False, True, True])
+
+
+def test_udf_with_parameters_decorator(df) -> None:
+    @udf([pa.int64()], pa.bool_(), "immutable")
+    def udf_no_param(values: pa.Array) -> pa.Array:
+        return OverThresholdUDF()(values)
+
+    df1 = df.select(udf_no_param(column("a")))
+    result = df1.collect()[0].column(0)
+
+    assert result == pa.array([True, True, True])
+
+    @udf([pa.int64()], pa.bool_(), "immutable")
+    def udf_with_param(values: pa.Array) -> pa.Array:
+        return OverThresholdUDF(2)(values)
+
+    df2 = df.select(udf_with_param(column("a")))
+    result = df2.collect()[0].column(0)
+
+    assert result == pa.array([False, True, True])

From d72f5605b3d523585d04857505793920f96242ba Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Mon, 10 Mar 2025 06:56:12 -0400
Subject: [PATCH 10/22] Enable FA ruff lint (#1052)

---
 examples/python-udwf.py   | 2 ++
 pyproject.toml            | 2 +-
 python/datafusion/io.py   | 2 ++
 python/tests/test_udaf.py | 2 ++
 python/tests/test_udwf.py | 2 ++
 5 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/examples/python-udwf.py b/examples/python-udwf.py
index 32f8fadaa..7d39dc1b8 100644
--- a/examples/python-udwf.py
+++ b/examples/python-udwf.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import annotations
+
 import datafusion
 import pyarrow as pa
 from datafusion import col, lit, udwf
diff --git a/pyproject.toml b/pyproject.toml
index f416e02a5..d16a18aa6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -66,7 +66,7 @@ features = ["substrait"]
 
 # Enable docstring linting using the google style guide
 [tool.ruff.lint]
-select = ["E4", "E7", "E9", "F", "D", "W", "I"]
+select = ["E4", "E7", "E9", "F", "FA", "D", "W", "I"]
 
 [tool.ruff.lint.pydocstyle]
 convention = "google"
diff --git a/python/datafusion/io.py b/python/datafusion/io.py
index 7f3b77efa..3b6264948 100644
--- a/python/datafusion/io.py
+++ b/python/datafusion/io.py
@@ -17,6 +17,8 @@
 
 """IO read functions using global context."""
 
+from __future__ import annotations
+
 import pathlib
 
 import pyarrow
diff --git a/python/tests/test_udaf.py b/python/tests/test_udaf.py
index e69c77d3c..97cf81f3c 100644
--- a/python/tests/test_udaf.py
+++ b/python/tests/test_udaf.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import annotations
+
 from typing import List
 
 import pyarrow as pa
diff --git a/python/tests/test_udwf.py b/python/tests/test_udwf.py
index 0ffa04179..2fea34aa3 100644
--- a/python/tests/test_udwf.py
+++ b/python/tests/test_udwf.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import annotations
+
 import pyarrow as pa
 import pytest
 from datafusion import SessionContext, column, lit, udwf

From 0002372ccdb780e011631c797ec9613174cf0a94 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Mon, 10 Mar 2025 14:22:42 -0400
Subject: [PATCH 11/22] Enable take comments to assign issues to users (#1058)

---
 .github/workflows/take.yml | 41 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 .github/workflows/take.yml

diff --git a/.github/workflows/take.yml b/.github/workflows/take.yml
new file mode 100644
index 000000000..86dc190ad
--- /dev/null
+++ b/.github/workflows/take.yml
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Assign the issue via a `take` comment
+on:
+  issue_comment:
+    types: created
+
+permissions:
+  issues: write
+
+jobs:
+  issue_assign:
+    runs-on: ubuntu-latest
+    if: (!github.event.issue.pull_request) && github.event.comment.body == 'take'
+    concurrency:
+      group: ${{ github.actor }}-issue-assign
+    steps:
+      - run: |
+          CODE=$(curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -LI https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees/${{ github.event.comment.user.login }} -o /dev/null -w '%{http_code}\n' -s)
+          if [ "$CODE" -eq "204" ]
+          then
+            echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
+            curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees
+          else
+            echo "Cannot assign issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
+          fi
\ No newline at end of file

From 9d634de6df2f8b76bd303ab1f5972f01deb2210d Mon Sep 17 00:00:00 2001
From: Kevin Liu <kevinjqliu@users.noreply.github.com>
Date: Mon, 10 Mar 2025 14:24:40 -0400
Subject: [PATCH 12/22] Update python min version to 3.9 (#1043)

* 3.8 -> 3.9

* upgrade pyo3 abi3-py38 -> abi3-py39
---
 Cargo.toml                                    |   2 +-
 .../source/contributor-guide/introduction.rst |   2 +-
 examples/ffi-table-provider/Cargo.lock        |  75 +-
 examples/ffi-table-provider/Cargo.toml        |   2 +-
 examples/ffi-table-provider/pyproject.toml    |   2 +-
 pyproject.toml                                |   3 +-
 uv.lock                                       | 707 ++----------------
 7 files changed, 121 insertions(+), 672 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 5358b1836..50967a219 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -35,7 +35,7 @@ substrait = ["dep:datafusion-substrait"]
 
 [dependencies]
 tokio = { version = "1.42", features = ["macros", "rt", "rt-multi-thread", "sync"] }
-pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py38"] }
+pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py39"] }
 pyo3-async-runtimes = { version = "0.23", features = ["tokio-runtime"]}
 arrow = { version = "54", features = ["pyarrow"] }
 datafusion = { version = "45.0.0", features = ["avro", "unicode_expressions"] }
diff --git a/docs/source/contributor-guide/introduction.rst b/docs/source/contributor-guide/introduction.rst
index 25f2c21a4..2fba64111 100644
--- a/docs/source/contributor-guide/introduction.rst
+++ b/docs/source/contributor-guide/introduction.rst
@@ -118,7 +118,7 @@ be ignored by ``git``.
 .. code-block::
 
     implementation=CPython
-    version=3.8
+    version=3.9
     shared=true
     abi3=true
     lib_name=python3.12
diff --git a/examples/ffi-table-provider/Cargo.lock b/examples/ffi-table-provider/Cargo.lock
index 32af85180..8d0edd515 100644
--- a/examples/ffi-table-provider/Cargo.lock
+++ b/examples/ffi-table-provider/Cargo.lock
@@ -766,7 +766,8 @@ dependencies = [
 [[package]]
 name = "datafusion"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -816,7 +817,8 @@ dependencies = [
 [[package]]
 name = "datafusion-catalog"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1"
 dependencies = [
  "arrow",
  "async-trait",
@@ -836,7 +838,8 @@ dependencies = [
 [[package]]
 name = "datafusion-common"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602"
 dependencies = [
  "ahash",
  "arrow",
@@ -862,7 +865,8 @@ dependencies = [
 [[package]]
 name = "datafusion-common-runtime"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48"
 dependencies = [
  "log",
  "tokio",
@@ -871,12 +875,14 @@ dependencies = [
 [[package]]
 name = "datafusion-doc"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66"
 
 [[package]]
 name = "datafusion-execution"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27"
 dependencies = [
  "arrow",
  "dashmap",
@@ -894,7 +900,8 @@ dependencies = [
 [[package]]
 name = "datafusion-expr"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1"
 dependencies = [
  "arrow",
  "chrono",
@@ -914,7 +921,8 @@ dependencies = [
 [[package]]
 name = "datafusion-expr-common"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -925,7 +933,8 @@ dependencies = [
 [[package]]
 name = "datafusion-ffi"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff47a79d442207c168c6e3e1d970c248589c148e4800e5b285ac1b2cb1a230f8"
 dependencies = [
  "abi_stable",
  "arrow",
@@ -945,7 +954,8 @@ dependencies = [
 [[package]]
 name = "datafusion-functions"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -974,7 +984,8 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-aggregate"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6"
 dependencies = [
  "ahash",
  "arrow",
@@ -996,7 +1007,8 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-aggregate-common"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c"
 dependencies = [
  "ahash",
  "arrow",
@@ -1008,7 +1020,8 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-nested"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -1031,7 +1044,8 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-table"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1046,7 +1060,8 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-window"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5"
 dependencies = [
  "datafusion-common",
  "datafusion-doc",
@@ -1062,7 +1077,8 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-window-common"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -1071,7 +1087,8 @@ dependencies = [
 [[package]]
 name = "datafusion-macros"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af"
 dependencies = [
  "datafusion-expr",
  "quote",
@@ -1081,7 +1098,8 @@ dependencies = [
 [[package]]
 name = "datafusion-optimizer"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54"
 dependencies = [
  "arrow",
  "chrono",
@@ -1099,7 +1117,8 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c"
 dependencies = [
  "ahash",
  "arrow",
@@ -1123,7 +1142,8 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr-common"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435"
 dependencies = [
  "ahash",
  "arrow",
@@ -1137,7 +1157,8 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-optimizer"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb"
 dependencies = [
  "arrow",
  "arrow-schema",
@@ -1158,7 +1179,8 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-plan"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f"
 dependencies = [
  "ahash",
  "arrow",
@@ -1189,7 +1211,8 @@ dependencies = [
 [[package]]
 name = "datafusion-proto"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2db5d79f0c974041787b899d24dc91bdab2ff112d1942dd71356a4ce3b407e6c"
 dependencies = [
  "arrow",
  "chrono",
@@ -1204,7 +1227,8 @@ dependencies = [
 [[package]]
 name = "datafusion-proto-common"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "de21bde1603aac0ff32cf478e47081be6e3583c6861fe8f57034da911efe7578"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1214,7 +1238,8 @@ dependencies = [
 [[package]]
 name = "datafusion-sql"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43"
 dependencies = [
  "arrow",
  "arrow-array",
diff --git a/examples/ffi-table-provider/Cargo.toml b/examples/ffi-table-provider/Cargo.toml
index 0e558fdd0..f4e4fda79 100644
--- a/examples/ffi-table-provider/Cargo.toml
+++ b/examples/ffi-table-provider/Cargo.toml
@@ -23,7 +23,7 @@ edition = "2021"
 [dependencies]
 datafusion = { version = "45.0.0" }
 datafusion-ffi = { version = "45.0.0" }
-pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py38"] }
+pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py39"] }
 arrow = { version = "54" }
 arrow-array = { version = "54" }
 arrow-schema = { version = "54" }
diff --git a/examples/ffi-table-provider/pyproject.toml b/examples/ffi-table-provider/pyproject.toml
index 116efae9c..9cd25b423 100644
--- a/examples/ffi-table-provider/pyproject.toml
+++ b/examples/ffi-table-provider/pyproject.toml
@@ -21,7 +21,7 @@ build-backend = "maturin"
 
 [project]
 name = "ffi_table_provider"
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 classifiers = [
     "Programming Language :: Rust",
     "Programming Language :: Python :: Implementation :: CPython",
diff --git a/pyproject.toml b/pyproject.toml
index d16a18aa6..1c2733677 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,7 @@ name = "datafusion"
 description = "Build and run queries against data"
 readme = "README.md"
 license = { file = "LICENSE.txt" }
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 keywords = ["datafusion", "dataframe", "rust", "query-engine"]
 classifiers = [
     "Development Status :: 2 - Pre-Alpha",
@@ -35,7 +35,6 @@ classifiers = [
     "Operating System :: Microsoft :: Windows",
     "Operating System :: POSIX :: Linux",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
diff --git a/uv.lock b/uv.lock
index 587ddc8b7..619b92856 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,23 +1,10 @@
 version = 1
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 resolution-markers = [
     "python_full_version >= '3.12'",
     "python_full_version == '3.11.*'",
     "python_full_version == '3.10.*'",
-    "python_full_version == '3.9.*'",
-    "python_full_version < '3.9'",
-]
-
-[[package]]
-name = "alabaster"
-version = "0.7.13"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/94/71/a8ee96d1fd95ca04a0d2e2d9c4081dac4c2d2b12f7ddb899c8cb9bfd1532/alabaster-0.7.13.tar.gz", hash = "sha256:a27a4a084d5e690e16e01e03ad2b2e552c61a65469419b907243193de1a84ae2", size = 11454 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/64/88/c7083fc61120ab661c5d0b82cb77079fc1429d3f913a456c1c82cf4658f7/alabaster-0.7.13-py3-none-any.whl", hash = "sha256:1ee19aca801bbabb5ba3f5f258e4422dfa86f82f3e9cefb0859b283cdd7f62a3", size = 13857 },
+    "python_full_version < '3.10'",
 ]
 
 [[package]]
@@ -25,7 +12,7 @@ name = "alabaster"
 version = "0.7.16"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version == '3.9.*'",
+    "python_full_version < '3.10'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/c9/3e/13dd8e5ed9094e734ac430b5d0eb4f2bb001708a8b7856cbf8e084e001ba/alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65", size = 23776 }
 wheels = [
@@ -46,42 +33,12 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b", size = 13929 },
 ]
 
-[[package]]
-name = "appnope"
-version = "0.1.4"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/35/5d/752690df9ef5b76e169e68d6a129fa6d08a7100ca7f754c89495db3c6019/appnope-0.1.4.tar.gz", hash = "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee", size = 4170 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/81/29/5ecc3a15d5a33e31b26c11426c45c501e439cb865d0bff96315d86443b78/appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c", size = 4321 },
-]
-
-[[package]]
-name = "astroid"
-version = "3.2.4"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-dependencies = [
-    { name = "typing-extensions", marker = "python_full_version < '3.9'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/9e/53/1067e1113ecaf58312357f2cd93063674924119d80d173adc3f6f2387aa2/astroid-3.2.4.tar.gz", hash = "sha256:0e14202810b30da1b735827f78f5157be2bbd4a7a59b7707ca0bfc2fb4c0063a", size = 397576 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/80/96/b32bbbb46170a1c8b8b1f28c794202e25cfe743565e9d3469b8eb1e0cc05/astroid-3.2.4-py3-none-any.whl", hash = "sha256:413658a61eeca6202a59231abb473f932038fbcbf1666587f66d482083413a25", size = 276348 },
-]
-
 [[package]]
 name = "astroid"
 version = "3.3.8"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.12'",
-    "python_full_version == '3.11.*'",
-    "python_full_version == '3.10.*'",
-    "python_full_version == '3.9.*'",
-]
 dependencies = [
-    { name = "typing-extensions", marker = "python_full_version >= '3.9' and python_full_version < '3.11'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/80/c5/5c83c48bbf547f3dd8b587529db7cf5a265a3368b33e85e76af8ff6061d3/astroid-3.3.8.tar.gz", hash = "sha256:a88c7994f914a4ea8572fac479459f4955eeccc877be3f2d959a33273b0cf40b", size = 398196 }
 wheels = [
@@ -101,23 +58,11 @@ wheels = [
 name = "babel"
 version = "2.16.0"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "pytz", marker = "python_full_version < '3.9'" },
-]
 sdist = { url = "https://files.pythonhosted.org/packages/2a/74/f1bc80f23eeba13393b7222b11d95ca3af2c1e28edca18af487137eefed9/babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316", size = 9348104 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/ed/20/bc79bc575ba2e2a7f70e8a1155618bb1301eaa5132a8271373a6903f73f8/babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b", size = 9587599 },
 ]
 
-[[package]]
-name = "backcall"
-version = "0.2.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/40/764a663805d84deee23043e1426a9175567db89c8b3287b5c2ad9f71aa93/backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e", size = 18041 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/4c/1c/ff6546b6c12603d8dd1070aa3c3d273ad4c07f5771689a7b69a550e8c951/backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255", size = 11157 },
-]
-
 [[package]]
 name = "beautifulsoup4"
 version = "4.12.3"
@@ -194,14 +139,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f1/47/d7145bf2dc04684935d57d67dff9d6d795b2ba2796806bb109864be3a151/cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9", size = 488469 },
     { url = "https://files.pythonhosted.org/packages/bf/ee/f94057fa6426481d663b88637a9a10e859e492c73d0384514a17d78ee205/cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d", size = 172475 },
     { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009 },
-    { url = "https://files.pythonhosted.org/packages/48/08/15bf6b43ae9bd06f6b00ad8a91f5a8fe1069d4c9fab550a866755402724e/cffi-1.17.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b", size = 182457 },
-    { url = "https://files.pythonhosted.org/packages/c2/5b/f1523dd545f92f7df468e5f653ffa4df30ac222f3c884e51e139878f1cb5/cffi-1.17.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964", size = 425932 },
-    { url = "https://files.pythonhosted.org/packages/53/93/7e547ab4105969cc8c93b38a667b82a835dd2cc78f3a7dad6130cfd41e1d/cffi-1.17.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9", size = 448585 },
-    { url = "https://files.pythonhosted.org/packages/56/c4/a308f2c332006206bb511de219efeff090e9d63529ba0a77aae72e82248b/cffi-1.17.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc", size = 456268 },
-    { url = "https://files.pythonhosted.org/packages/ca/5b/b63681518265f2f4060d2b60755c1c77ec89e5e045fc3773b72735ddaad5/cffi-1.17.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c", size = 436592 },
-    { url = "https://files.pythonhosted.org/packages/bb/19/b51af9f4a4faa4a8ac5a0e5d5c2522dcd9703d07fac69da34a36c4d960d3/cffi-1.17.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1", size = 446512 },
-    { url = "https://files.pythonhosted.org/packages/e2/63/2bed8323890cb613bbecda807688a31ed11a7fe7afe31f8faaae0206a9a3/cffi-1.17.1-cp38-cp38-win32.whl", hash = "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8", size = 171576 },
-    { url = "https://files.pythonhosted.org/packages/2f/70/80c33b044ebc79527447fd4fbc5455d514c3bb840dede4455de97da39b4d/cffi-1.17.1-cp38-cp38-win_amd64.whl", hash = "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1", size = 181229 },
     { url = "https://files.pythonhosted.org/packages/b9/ea/8bb50596b8ffbc49ddd7a1ad305035daa770202a6b782fc164647c2673ad/cffi-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16", size = 182220 },
     { url = "https://files.pythonhosted.org/packages/ae/11/e77c8cd24f58285a82c23af484cf5b124a376b32644e445960d1a4654c3a/cffi-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36", size = 178605 },
     { url = "https://files.pythonhosted.org/packages/ed/65/25a8dc32c53bf5b7b6c2686b42ae2ad58743f7ff644844af7cdb29b49361/cffi-1.17.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8", size = 424910 },
@@ -274,19 +211,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e4/93/946a86ce20790e11312c87c75ba68d5f6ad2208cfb52b2d6a2c32840d922/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd", size = 145732 },
     { url = "https://files.pythonhosted.org/packages/cd/e5/131d2fb1b0dddafc37be4f3a2fa79aa4c037368be9423061dccadfd90091/charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407", size = 95391 },
     { url = "https://files.pythonhosted.org/packages/27/f2/4f9a69cc7712b9b5ad8fdb87039fd89abba997ad5cbe690d1835d40405b0/charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971", size = 102702 },
-    { url = "https://files.pythonhosted.org/packages/10/bd/6517ea94f2672e801011d50b5d06be2a0deaf566aea27bcdcd47e5195357/charset_normalizer-3.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c", size = 195653 },
-    { url = "https://files.pythonhosted.org/packages/e5/0d/815a2ba3f283b4eeaa5ece57acade365c5b4135f65a807a083c818716582/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9", size = 140701 },
-    { url = "https://files.pythonhosted.org/packages/aa/17/c94be7ee0d142687e047fe1de72060f6d6837f40eedc26e87e6e124a3fc6/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8", size = 150495 },
-    { url = "https://files.pythonhosted.org/packages/f7/33/557ac796c47165fc141e4fb71d7b0310f67e05cb420756f3a82e0a0068e0/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6", size = 142946 },
-    { url = "https://files.pythonhosted.org/packages/1e/0d/38ef4ae41e9248d63fc4998d933cae22473b1b2ac4122cf908d0f5eb32aa/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c", size = 144737 },
-    { url = "https://files.pythonhosted.org/packages/43/01/754cdb29dd0560f58290aaaa284d43eea343ad0512e6ad3b8b5c11f08592/charset_normalizer-3.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a", size = 147471 },
-    { url = "https://files.pythonhosted.org/packages/ba/cd/861883ba5160c7a9bd242c30b2c71074cda2aefcc0addc91118e0d4e0765/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd", size = 140801 },
-    { url = "https://files.pythonhosted.org/packages/6f/7f/0c0dad447819e90b93f8ed238cc8f11b91353c23c19e70fa80483a155bed/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd", size = 149312 },
-    { url = "https://files.pythonhosted.org/packages/8e/09/9f8abcc6fff60fb727268b63c376c8c79cc37b833c2dfe1f535dfb59523b/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824", size = 152347 },
-    { url = "https://files.pythonhosted.org/packages/be/e5/3f363dad2e24378f88ccf63ecc39e817c29f32e308ef21a7a6d9c1201165/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca", size = 149888 },
-    { url = "https://files.pythonhosted.org/packages/e4/10/a78c0e91f487b4ad0ef7480ac765e15b774f83de2597f1b6ef0eaf7a2f99/charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b", size = 145169 },
-    { url = "https://files.pythonhosted.org/packages/d3/81/396e7d7f5d7420da8273c91175d2e9a3f569288e3611d521685e4b9ac9cc/charset_normalizer-3.4.1-cp38-cp38-win32.whl", hash = "sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e", size = 95094 },
-    { url = "https://files.pythonhosted.org/packages/40/bb/20affbbd9ea29c71ea123769dc568a6d42052ff5089c5fe23e21e21084a6/charset_normalizer-3.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4", size = 102139 },
     { url = "https://files.pythonhosted.org/packages/7f/c0/b913f8f02836ed9ab32ea643c6fe4d3325c3d8627cf6e78098671cafff86/charset_normalizer-3.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41", size = 197867 },
     { url = "https://files.pythonhosted.org/packages/0f/6c/2bee440303d705b6fb1e2ec789543edec83d32d258299b16eed28aad48e0/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f", size = 141385 },
     { url = "https://files.pythonhosted.org/packages/3d/04/cb42585f07f6f9fd3219ffb6f37d5a39b4fd2db2355b23683060029c35f7/charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2", size = 151367 },
@@ -351,11 +275,9 @@ wheels = [
 
 [[package]]
 name = "datafusion"
-version = "44.0.0"
 source = { editable = "." }
 dependencies = [
-    { name = "pyarrow", version = "17.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "pyarrow", version = "18.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" },
+    { name = "pyarrow" },
     { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
 
@@ -369,20 +291,16 @@ dev = [
     { name = "toml" },
 ]
 docs = [
-    { name = "ipython", version = "8.12.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "ipython", version = "8.18.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
+    { name = "ipython", version = "8.18.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
     { name = "ipython", version = "8.31.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
     { name = "jinja2" },
     { name = "myst-parser", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
     { name = "myst-parser", version = "4.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
-    { name = "pandas", version = "2.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "pandas", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" },
+    { name = "pandas" },
     { name = "pickleshare" },
     { name = "pydata-sphinx-theme" },
-    { name = "setuptools", version = "75.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "setuptools", version = "75.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" },
-    { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
+    { name = "setuptools" },
+    { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
     { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
     { name = "sphinx-autoapi" },
 ]
@@ -435,28 +353,10 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998 },
 ]
 
-[[package]]
-name = "docutils"
-version = "0.20.1"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/1f/53/a5da4f2c5739cf66290fac1431ee52aff6851c7c8ffd8264f13affd7bcdd/docutils-0.20.1.tar.gz", hash = "sha256:f08a4e276c3a1583a86dce3e34aba3fe04d02bba2dd51ed16106244e8a923e3b", size = 2058365 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/26/87/f238c0670b94533ac0353a4e2a1a771a0cc73277b88bff23d3ae35a256c1/docutils-0.20.1-py3-none-any.whl", hash = "sha256:96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6", size = 572666 },
-]
-
 [[package]]
 name = "docutils"
 version = "0.21.2"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.12'",
-    "python_full_version == '3.11.*'",
-    "python_full_version == '3.10.*'",
-    "python_full_version == '3.9.*'",
-]
 sdist = { url = "https://files.pythonhosted.org/packages/ae/ed/aefcc8cd0ba62a0560c3c18c33925362d46c6075480bfa4df87b28e169a9/docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f", size = 2204444 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 },
@@ -503,8 +403,7 @@ name = "importlib-metadata"
 version = "8.5.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "zipp", version = "3.20.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "zipp", version = "3.21.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
+    { name = "zipp", marker = "python_full_version < '3.10'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/cd/12/33e59336dca5be0c398a7482335911a33aa0e20776128f038019f1a95f1b/importlib_metadata-8.5.0.tar.gz", hash = "sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7", size = 55304 }
 wheels = [
@@ -520,52 +419,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 },
 ]
 
-[[package]]
-name = "ipython"
-version = "8.12.3"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-dependencies = [
-    { name = "appnope", marker = "python_full_version < '3.9' and sys_platform == 'darwin'" },
-    { name = "backcall", marker = "python_full_version < '3.9'" },
-    { name = "colorama", marker = "python_full_version < '3.9' and sys_platform == 'win32'" },
-    { name = "decorator", marker = "python_full_version < '3.9'" },
-    { name = "jedi", marker = "python_full_version < '3.9'" },
-    { name = "matplotlib-inline", marker = "python_full_version < '3.9'" },
-    { name = "pexpect", marker = "python_full_version < '3.9' and sys_platform != 'win32'" },
-    { name = "pickleshare", marker = "python_full_version < '3.9'" },
-    { name = "prompt-toolkit", marker = "python_full_version < '3.9'" },
-    { name = "pygments", marker = "python_full_version < '3.9'" },
-    { name = "stack-data", marker = "python_full_version < '3.9'" },
-    { name = "traitlets", marker = "python_full_version < '3.9'" },
-    { name = "typing-extensions", marker = "python_full_version < '3.9'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/9e/6a/44ef299b1762f5a73841e87fae8a73a8cc8aee538d6dc8c77a5afe1fd2ce/ipython-8.12.3.tar.gz", hash = "sha256:3910c4b54543c2ad73d06579aa771041b7d5707b033bd488669b4cf544e3b363", size = 5470171 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/8d/97/8fe103906cd81bc42d3b0175b5534a9f67dccae47d6451131cf8d0d70bb2/ipython-8.12.3-py3-none-any.whl", hash = "sha256:b0340d46a933d27c657b211a329d0be23793c36595acf9e6ef4164bc01a1804c", size = 798307 },
-]
-
 [[package]]
 name = "ipython"
 version = "8.18.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version == '3.9.*'",
+    "python_full_version < '3.10'",
 ]
 dependencies = [
-    { name = "colorama", marker = "python_full_version == '3.9.*' and sys_platform == 'win32'" },
-    { name = "decorator", marker = "python_full_version == '3.9.*'" },
-    { name = "exceptiongroup", marker = "python_full_version == '3.9.*'" },
-    { name = "jedi", marker = "python_full_version == '3.9.*'" },
-    { name = "matplotlib-inline", marker = "python_full_version == '3.9.*'" },
-    { name = "pexpect", marker = "python_full_version == '3.9.*' and sys_platform != 'win32'" },
-    { name = "prompt-toolkit", marker = "python_full_version == '3.9.*'" },
-    { name = "pygments", marker = "python_full_version == '3.9.*'" },
-    { name = "stack-data", marker = "python_full_version == '3.9.*'" },
-    { name = "traitlets", marker = "python_full_version == '3.9.*'" },
-    { name = "typing-extensions", marker = "python_full_version == '3.9.*'" },
+    { name = "colorama", marker = "python_full_version < '3.10' and sys_platform == 'win32'" },
+    { name = "decorator", marker = "python_full_version < '3.10'" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.10'" },
+    { name = "jedi", marker = "python_full_version < '3.10'" },
+    { name = "matplotlib-inline", marker = "python_full_version < '3.10'" },
+    { name = "pexpect", marker = "python_full_version < '3.10' and sys_platform != 'win32'" },
+    { name = "prompt-toolkit", marker = "python_full_version < '3.10'" },
+    { name = "pygments", marker = "python_full_version < '3.10'" },
+    { name = "stack-data", marker = "python_full_version < '3.10'" },
+    { name = "traitlets", marker = "python_full_version < '3.10'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.10'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/b1/b9/3ba6c45a6df813c09a48bac313c22ff83efa26cbb55011218d925a46e2ad/ipython-8.18.1.tar.gz", hash = "sha256:ca6f079bb33457c66e233e4580ebfc4128855b4cf6370dddd73842a9563e8a27", size = 5486330 }
 wheels = [
@@ -616,8 +488,7 @@ name = "jinja2"
 version = "3.1.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "markupsafe", version = "2.1.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "markupsafe", version = "3.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" },
+    { name = "markupsafe" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/af/92/b3130cbbf5591acf9ade8708c365f3238046ac7cb8ccba6e81abccb0ccff/jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb", size = 244674 }
 wheels = [
@@ -636,77 +507,10 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 },
 ]
 
-[[package]]
-name = "markupsafe"
-version = "2.1.5"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/87/5b/aae44c6655f3801e81aa3eef09dbbf012431987ba564d7231722f68df02d/MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b", size = 19384 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e4/54/ad5eb37bf9d51800010a74e4665425831a9db4e7c4e0fde4352e391e808e/MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc", size = 18206 },
-    { url = "https://files.pythonhosted.org/packages/6a/4a/a4d49415e600bacae038c67f9fecc1d5433b9d3c71a4de6f33537b89654c/MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5", size = 14079 },
-    { url = "https://files.pythonhosted.org/packages/0a/7b/85681ae3c33c385b10ac0f8dd025c30af83c78cec1c37a6aa3b55e67f5ec/MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46", size = 26620 },
-    { url = "https://files.pythonhosted.org/packages/7c/52/2b1b570f6b8b803cef5ac28fdf78c0da318916c7d2fe9402a84d591b394c/MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f", size = 25818 },
-    { url = "https://files.pythonhosted.org/packages/29/fe/a36ba8c7ca55621620b2d7c585313efd10729e63ef81e4e61f52330da781/MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900", size = 25493 },
-    { url = "https://files.pythonhosted.org/packages/60/ae/9c60231cdfda003434e8bd27282b1f4e197ad5a710c14bee8bea8a9ca4f0/MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff", size = 30630 },
-    { url = "https://files.pythonhosted.org/packages/65/dc/1510be4d179869f5dafe071aecb3f1f41b45d37c02329dfba01ff59e5ac5/MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad", size = 29745 },
-    { url = "https://files.pythonhosted.org/packages/30/39/8d845dd7d0b0613d86e0ef89549bfb5f61ed781f59af45fc96496e897f3a/MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd", size = 30021 },
-    { url = "https://files.pythonhosted.org/packages/c7/5c/356a6f62e4f3c5fbf2602b4771376af22a3b16efa74eb8716fb4e328e01e/MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4", size = 16659 },
-    { url = "https://files.pythonhosted.org/packages/69/48/acbf292615c65f0604a0c6fc402ce6d8c991276e16c80c46a8f758fbd30c/MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5", size = 17213 },
-    { url = "https://files.pythonhosted.org/packages/11/e7/291e55127bb2ae67c64d66cef01432b5933859dfb7d6949daa721b89d0b3/MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f", size = 18219 },
-    { url = "https://files.pythonhosted.org/packages/6b/cb/aed7a284c00dfa7c0682d14df85ad4955a350a21d2e3b06d8240497359bf/MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2", size = 14098 },
-    { url = "https://files.pythonhosted.org/packages/1c/cf/35fe557e53709e93feb65575c93927942087e9b97213eabc3fe9d5b25a55/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced", size = 29014 },
-    { url = "https://files.pythonhosted.org/packages/97/18/c30da5e7a0e7f4603abfc6780574131221d9148f323752c2755d48abad30/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5", size = 28220 },
-    { url = "https://files.pythonhosted.org/packages/0c/40/2e73e7d532d030b1e41180807a80d564eda53babaf04d65e15c1cf897e40/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c", size = 27756 },
-    { url = "https://files.pythonhosted.org/packages/18/46/5dca760547e8c59c5311b332f70605d24c99d1303dd9a6e1fc3ed0d73561/MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f", size = 33988 },
-    { url = "https://files.pythonhosted.org/packages/6d/c5/27febe918ac36397919cd4a67d5579cbbfa8da027fa1238af6285bb368ea/MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a", size = 32718 },
-    { url = "https://files.pythonhosted.org/packages/f8/81/56e567126a2c2bc2684d6391332e357589a96a76cb9f8e5052d85cb0ead8/MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f", size = 33317 },
-    { url = "https://files.pythonhosted.org/packages/00/0b/23f4b2470accb53285c613a3ab9ec19dc944eaf53592cb6d9e2af8aa24cc/MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906", size = 16670 },
-    { url = "https://files.pythonhosted.org/packages/b7/a2/c78a06a9ec6d04b3445a949615c4c7ed86a0b2eb68e44e7541b9d57067cc/MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617", size = 17224 },
-    { url = "https://files.pythonhosted.org/packages/53/bd/583bf3e4c8d6a321938c13f49d44024dbe5ed63e0a7ba127e454a66da974/MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1", size = 18215 },
-    { url = "https://files.pythonhosted.org/packages/48/d6/e7cd795fc710292c3af3a06d80868ce4b02bfbbf370b7cee11d282815a2a/MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4", size = 14069 },
-    { url = "https://files.pythonhosted.org/packages/51/b5/5d8ec796e2a08fc814a2c7d2584b55f889a55cf17dd1a90f2beb70744e5c/MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee", size = 29452 },
-    { url = "https://files.pythonhosted.org/packages/0a/0d/2454f072fae3b5a137c119abf15465d1771319dfe9e4acbb31722a0fff91/MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5", size = 28462 },
-    { url = "https://files.pythonhosted.org/packages/2d/75/fd6cb2e68780f72d47e6671840ca517bda5ef663d30ada7616b0462ad1e3/MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b", size = 27869 },
-    { url = "https://files.pythonhosted.org/packages/b0/81/147c477391c2750e8fc7705829f7351cf1cd3be64406edcf900dc633feb2/MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a", size = 33906 },
-    { url = "https://files.pythonhosted.org/packages/8b/ff/9a52b71839d7a256b563e85d11050e307121000dcebc97df120176b3ad93/MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f", size = 32296 },
-    { url = "https://files.pythonhosted.org/packages/88/07/2dc76aa51b481eb96a4c3198894f38b480490e834479611a4053fbf08623/MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169", size = 33038 },
-    { url = "https://files.pythonhosted.org/packages/96/0c/620c1fb3661858c0e37eb3cbffd8c6f732a67cd97296f725789679801b31/MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad", size = 16572 },
-    { url = "https://files.pythonhosted.org/packages/3f/14/c3554d512d5f9100a95e737502f4a2323a1959f6d0d01e0d0997b35f7b10/MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb", size = 17127 },
-    { url = "https://files.pythonhosted.org/packages/f8/ff/2c942a82c35a49df5de3a630ce0a8456ac2969691b230e530ac12314364c/MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a", size = 18192 },
-    { url = "https://files.pythonhosted.org/packages/4f/14/6f294b9c4f969d0c801a4615e221c1e084722ea6114ab2114189c5b8cbe0/MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46", size = 14072 },
-    { url = "https://files.pythonhosted.org/packages/81/d4/fd74714ed30a1dedd0b82427c02fa4deec64f173831ec716da11c51a50aa/MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532", size = 26928 },
-    { url = "https://files.pythonhosted.org/packages/c7/bd/50319665ce81bb10e90d1cf76f9e1aa269ea6f7fa30ab4521f14d122a3df/MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab", size = 26106 },
-    { url = "https://files.pythonhosted.org/packages/4c/6f/f2b0f675635b05f6afd5ea03c094557bdb8622fa8e673387444fe8d8e787/MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68", size = 25781 },
-    { url = "https://files.pythonhosted.org/packages/51/e0/393467cf899b34a9d3678e78961c2c8cdf49fb902a959ba54ece01273fb1/MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0", size = 30518 },
-    { url = "https://files.pythonhosted.org/packages/f6/02/5437e2ad33047290dafced9df741d9efc3e716b75583bbd73a9984f1b6f7/MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4", size = 29669 },
-    { url = "https://files.pythonhosted.org/packages/0e/7d/968284145ffd9d726183ed6237c77938c021abacde4e073020f920e060b2/MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3", size = 29933 },
-    { url = "https://files.pythonhosted.org/packages/bf/f3/ecb00fc8ab02b7beae8699f34db9357ae49d9f21d4d3de6f305f34fa949e/MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff", size = 16656 },
-    { url = "https://files.pythonhosted.org/packages/92/21/357205f03514a49b293e214ac39de01fadd0970a6e05e4bf1ddd0ffd0881/MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029", size = 17206 },
-    { url = "https://files.pythonhosted.org/packages/0f/31/780bb297db036ba7b7bbede5e1d7f1e14d704ad4beb3ce53fb495d22bc62/MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf", size = 18193 },
-    { url = "https://files.pythonhosted.org/packages/6c/77/d77701bbef72892affe060cdacb7a2ed7fd68dae3b477a8642f15ad3b132/MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2", size = 14073 },
-    { url = "https://files.pythonhosted.org/packages/d9/a7/1e558b4f78454c8a3a0199292d96159eb4d091f983bc35ef258314fe7269/MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8", size = 26486 },
-    { url = "https://files.pythonhosted.org/packages/5f/5a/360da85076688755ea0cceb92472923086993e86b5613bbae9fbc14136b0/MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3", size = 25685 },
-    { url = "https://files.pythonhosted.org/packages/6a/18/ae5a258e3401f9b8312f92b028c54d7026a97ec3ab20bfaddbdfa7d8cce8/MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465", size = 25338 },
-    { url = "https://files.pythonhosted.org/packages/0b/cc/48206bd61c5b9d0129f4d75243b156929b04c94c09041321456fd06a876d/MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e", size = 30439 },
-    { url = "https://files.pythonhosted.org/packages/d1/06/a41c112ab9ffdeeb5f77bc3e331fdadf97fa65e52e44ba31880f4e7f983c/MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea", size = 29531 },
-    { url = "https://files.pythonhosted.org/packages/02/8c/ab9a463301a50dab04d5472e998acbd4080597abc048166ded5c7aa768c8/MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6", size = 29823 },
-    { url = "https://files.pythonhosted.org/packages/bc/29/9bc18da763496b055d8e98ce476c8e718dcfd78157e17f555ce6dd7d0895/MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf", size = 16658 },
-    { url = "https://files.pythonhosted.org/packages/f6/f8/4da07de16f10551ca1f640c92b5f316f9394088b183c6a57183df6de5ae4/MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5", size = 17211 },
-]
-
 [[package]]
 name = "markupsafe"
 version = "3.0.2"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.12'",
-    "python_full_version == '3.11.*'",
-    "python_full_version == '3.10.*'",
-    "python_full_version == '3.9.*'",
-]
 sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/04/90/d08277ce111dd22f77149fd1a5d4653eeb3b3eaacbdfcbae5afb2600eebd/MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8", size = 14357 },
@@ -832,18 +636,15 @@ name = "myst-parser"
 version = "3.0.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version == '3.9.*'",
-    "python_full_version < '3.9'",
+    "python_full_version < '3.10'",
 ]
 dependencies = [
-    { name = "docutils", version = "0.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
+    { name = "docutils", marker = "python_full_version < '3.10'" },
     { name = "jinja2", marker = "python_full_version < '3.10'" },
     { name = "markdown-it-py", marker = "python_full_version < '3.10'" },
     { name = "mdit-py-plugins", marker = "python_full_version < '3.10'" },
     { name = "pyyaml", marker = "python_full_version < '3.10'" },
-    { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
+    { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/49/64/e2f13dac02f599980798c01156393b781aec983b52a6e4057ee58f07c43a/myst_parser-3.0.1.tar.gz", hash = "sha256:88f0cb406cb363b077d176b51c476f62d60604d68a8dcdf4832e080441301a87", size = 92392 }
 wheels = [
@@ -860,7 +661,7 @@ resolution-markers = [
     "python_full_version == '3.10.*'",
 ]
 dependencies = [
-    { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+    { name = "docutils", marker = "python_full_version >= '3.10'" },
     { name = "jinja2", marker = "python_full_version >= '3.10'" },
     { name = "markdown-it-py", marker = "python_full_version >= '3.10'" },
     { name = "mdit-py-plugins", marker = "python_full_version >= '3.10'" },
@@ -872,50 +673,12 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ca/b4/b036f8fdb667587bb37df29dc6644681dd78b7a2a6321a34684b79412b28/myst_parser-4.0.0-py3-none-any.whl", hash = "sha256:b9317997552424448c6096c2558872fdb6f81d3ecb3a40ce84a7518798f3f28d", size = 84563 },
 ]
 
-[[package]]
-name = "numpy"
-version = "1.24.4"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/a4/9b/027bec52c633f6556dba6b722d9a0befb40498b9ceddd29cbe67a45a127c/numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463", size = 10911229 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/6b/80/6cdfb3e275d95155a34659163b83c09e3a3ff9f1456880bec6cc63d71083/numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64", size = 19789140 },
-    { url = "https://files.pythonhosted.org/packages/64/5f/3f01d753e2175cfade1013eea08db99ba1ee4bdb147ebcf3623b75d12aa7/numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1", size = 13854297 },
-    { url = "https://files.pythonhosted.org/packages/5a/b3/2f9c21d799fa07053ffa151faccdceeb69beec5a010576b8991f614021f7/numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4", size = 13995611 },
-    { url = "https://files.pythonhosted.org/packages/10/be/ae5bf4737cb79ba437879915791f6f26d92583c738d7d960ad94e5c36adf/numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6", size = 17282357 },
-    { url = "https://files.pythonhosted.org/packages/c0/64/908c1087be6285f40e4b3e79454552a701664a079321cff519d8c7051d06/numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc", size = 12429222 },
-    { url = "https://files.pythonhosted.org/packages/22/55/3d5a7c1142e0d9329ad27cece17933b0e2ab4e54ddc5c1861fbfeb3f7693/numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e", size = 14841514 },
-    { url = "https://files.pythonhosted.org/packages/a9/cc/5ed2280a27e5dab12994c884f1f4d8c3bd4d885d02ae9e52a9d213a6a5e2/numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810", size = 19775508 },
-    { url = "https://files.pythonhosted.org/packages/c0/bc/77635c657a3668cf652806210b8662e1aff84b818a55ba88257abf6637a8/numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254", size = 13840033 },
-    { url = "https://files.pythonhosted.org/packages/a7/4c/96cdaa34f54c05e97c1c50f39f98d608f96f0677a6589e64e53104e22904/numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7", size = 13991951 },
-    { url = "https://files.pythonhosted.org/packages/22/97/dfb1a31bb46686f09e68ea6ac5c63fdee0d22d7b23b8f3f7ea07712869ef/numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5", size = 17278923 },
-    { url = "https://files.pythonhosted.org/packages/35/e2/76a11e54139654a324d107da1d98f99e7aa2a7ef97cfd7c631fba7dbde71/numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d", size = 12422446 },
-    { url = "https://files.pythonhosted.org/packages/d8/ec/ebef2f7d7c28503f958f0f8b992e7ce606fb74f9e891199329d5f5f87404/numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694", size = 14834466 },
-    { url = "https://files.pythonhosted.org/packages/11/10/943cfb579f1a02909ff96464c69893b1d25be3731b5d3652c2e0cf1281ea/numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61", size = 19780722 },
-    { url = "https://files.pythonhosted.org/packages/a7/ae/f53b7b265fdc701e663fbb322a8e9d4b14d9cb7b2385f45ddfabfc4327e4/numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f", size = 13843102 },
-    { url = "https://files.pythonhosted.org/packages/25/6f/2586a50ad72e8dbb1d8381f837008a0321a3516dfd7cb57fc8cf7e4bb06b/numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e", size = 14039616 },
-    { url = "https://files.pythonhosted.org/packages/98/5d/5738903efe0ecb73e51eb44feafba32bdba2081263d40c5043568ff60faf/numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc", size = 17316263 },
-    { url = "https://files.pythonhosted.org/packages/d1/57/8d328f0b91c733aa9aa7ee540dbc49b58796c862b4fbcb1146c701e888da/numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2", size = 12455660 },
-    { url = "https://files.pythonhosted.org/packages/69/65/0d47953afa0ad569d12de5f65d964321c208492064c38fe3b0b9744f8d44/numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706", size = 14868112 },
-    { url = "https://files.pythonhosted.org/packages/9a/cd/d5b0402b801c8a8b56b04c1e85c6165efab298d2f0ab741c2406516ede3a/numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400", size = 19816549 },
-    { url = "https://files.pythonhosted.org/packages/14/27/638aaa446f39113a3ed38b37a66243e21b38110d021bfcb940c383e120f2/numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f", size = 13879950 },
-    { url = "https://files.pythonhosted.org/packages/8f/27/91894916e50627476cff1a4e4363ab6179d01077d71b9afed41d9e1f18bf/numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9", size = 14030228 },
-    { url = "https://files.pythonhosted.org/packages/7a/7c/d7b2a0417af6428440c0ad7cb9799073e507b1a465f827d058b826236964/numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d", size = 17311170 },
-    { url = "https://files.pythonhosted.org/packages/18/9d/e02ace5d7dfccee796c37b995c63322674daf88ae2f4a4724c5dd0afcc91/numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835", size = 12454918 },
-    { url = "https://files.pythonhosted.org/packages/63/38/6cc19d6b8bfa1d1a459daf2b3fe325453153ca7019976274b6f33d8b5663/numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8", size = 14867441 },
-    { url = "https://files.pythonhosted.org/packages/a4/fd/8dff40e25e937c94257455c237b9b6bf5a30d42dd1cc11555533be099492/numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef", size = 19156590 },
-    { url = "https://files.pythonhosted.org/packages/42/e7/4bf953c6e05df90c6d351af69966384fed8e988d0e8c54dad7103b59f3ba/numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a", size = 16705744 },
-    { url = "https://files.pythonhosted.org/packages/fc/dd/9106005eb477d022b60b3817ed5937a43dad8fd1f20b0610ea8a32fcb407/numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2", size = 14734290 },
-]
-
 [[package]]
 name = "numpy"
 version = "2.0.2"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version == '3.9.*'",
+    "python_full_version < '3.10'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a9/75/10dd1f8116a8b796cb2c737b674e02d02e80454bda953fa7e65d8c12b016/numpy-2.0.2.tar.gz", hash = "sha256:883c987dee1880e2a864ab0dc9892292582510604156762362d9326444636e78", size = 18902015 }
 wheels = [
@@ -1041,63 +804,16 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 },
 ]
 
-[[package]]
-name = "pandas"
-version = "2.0.3"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-dependencies = [
-    { name = "numpy", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "python-dateutil", marker = "python_full_version < '3.9'" },
-    { name = "pytz", marker = "python_full_version < '3.9'" },
-    { name = "tzdata", marker = "python_full_version < '3.9'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/b1/a7/824332581e258b5aa4f3763ecb2a797e5f9a54269044ba2e50ac19936b32/pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c", size = 5284455 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/3c/b2/0d4a5729ce1ce11630c4fc5d5522a33b967b3ca146c210f58efde7c40e99/pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8", size = 11760908 },
-    { url = "https://files.pythonhosted.org/packages/4a/f6/f620ca62365d83e663a255a41b08d2fc2eaf304e0b8b21bb6d62a7390fe3/pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f", size = 10823486 },
-    { url = "https://files.pythonhosted.org/packages/c2/59/cb4234bc9b968c57e81861b306b10cd8170272c57b098b724d3de5eda124/pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183", size = 11571897 },
-    { url = "https://files.pythonhosted.org/packages/e3/59/35a2892bf09ded9c1bf3804461efe772836a5261ef5dfb4e264ce813ff99/pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0", size = 12306421 },
-    { url = "https://files.pythonhosted.org/packages/94/71/3a0c25433c54bb29b48e3155b959ac78f4c4f2f06f94d8318aac612cb80f/pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210", size = 9540792 },
-    { url = "https://files.pythonhosted.org/packages/ed/30/b97456e7063edac0e5a405128065f0cd2033adfe3716fb2256c186bd41d0/pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e", size = 10664333 },
-    { url = "https://files.pythonhosted.org/packages/b3/92/a5e5133421b49e901a12e02a6a7ef3a0130e10d13db8cb657fdd0cba3b90/pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8", size = 11645672 },
-    { url = "https://files.pythonhosted.org/packages/8f/bb/aea1fbeed5b474cb8634364718abe9030d7cc7a30bf51f40bd494bbc89a2/pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26", size = 10693229 },
-    { url = "https://files.pythonhosted.org/packages/d6/90/e7d387f1a416b14e59290baa7a454a90d719baebbf77433ff1bdcc727800/pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d", size = 11581591 },
-    { url = "https://files.pythonhosted.org/packages/d0/28/88b81881c056376254618fad622a5e94b5126db8c61157ea1910cd1c040a/pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df", size = 12219370 },
-    { url = "https://files.pythonhosted.org/packages/e4/a5/212b9039e25bf8ebb97e417a96660e3dc925dacd3f8653d531b8f7fd9be4/pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd", size = 9482935 },
-    { url = "https://files.pythonhosted.org/packages/9e/71/756a1be6bee0209d8c0d8c5e3b9fc72c00373f384a4017095ec404aec3ad/pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b", size = 10607692 },
-    { url = "https://files.pythonhosted.org/packages/78/a8/07dd10f90ca915ed914853cd57f79bfc22e1ef4384ab56cb4336d2fc1f2a/pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061", size = 11653303 },
-    { url = "https://files.pythonhosted.org/packages/53/c3/f8e87361f7fdf42012def602bfa2a593423c729f5cb7c97aed7f51be66ac/pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5", size = 10710932 },
-    { url = "https://files.pythonhosted.org/packages/a7/87/828d50c81ce0f434163bf70b925a0eec6076808e0bca312a79322b141f66/pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089", size = 11684018 },
-    { url = "https://files.pythonhosted.org/packages/f8/7f/5b047effafbdd34e52c9e2d7e44f729a0655efafb22198c45cf692cdc157/pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0", size = 12353723 },
-    { url = "https://files.pythonhosted.org/packages/ea/ae/26a2eda7fa581347d69e51f93892493b2074ef3352ac71033c9f32c52389/pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02", size = 9646403 },
-    { url = "https://files.pythonhosted.org/packages/c3/6c/ea362eef61f05553aaf1a24b3e96b2d0603f5dc71a3bd35688a24ed88843/pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78", size = 10777638 },
-    { url = "https://files.pythonhosted.org/packages/f8/c7/cfef920b7b457dff6928e824896cb82367650ea127d048ee0b820026db4f/pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b", size = 11834160 },
-    { url = "https://files.pythonhosted.org/packages/6c/1c/689c9d99bc4e5d366a5fd871f0bcdee98a6581e240f96b78d2d08f103774/pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e", size = 10862752 },
-    { url = "https://files.pythonhosted.org/packages/cc/b8/4d082f41c27c95bf90485d1447b647cc7e5680fea75e315669dc6e4cb398/pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b", size = 11715852 },
-    { url = "https://files.pythonhosted.org/packages/9e/0d/91a9fd2c202f2b1d97a38ab591890f86480ecbb596cbc56d035f6f23fdcc/pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641", size = 12398496 },
-    { url = "https://files.pythonhosted.org/packages/26/7d/d8aa0a2c4f3f5f8ea59fb946c8eafe8f508090ca73e2b08a9af853c1103e/pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682", size = 9630766 },
-    { url = "https://files.pythonhosted.org/packages/9a/f2/0ad053856debbe90c83de1b4f05915f85fd2146f20faf9daa3b320d36df3/pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc", size = 10755902 },
-]
-
 [[package]]
 name = "pandas"
 version = "2.2.3"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.12'",
-    "python_full_version == '3.11.*'",
-    "python_full_version == '3.10.*'",
-    "python_full_version == '3.9.*'",
-]
 dependencies = [
-    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
+    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
     { name = "numpy", version = "2.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
-    { name = "python-dateutil", marker = "python_full_version >= '3.9'" },
-    { name = "pytz", marker = "python_full_version >= '3.9'" },
-    { name = "tzdata", marker = "python_full_version >= '3.9'" },
+    { name = "python-dateutil" },
+    { name = "pytz" },
+    { name = "tzdata" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213 }
 wheels = [
@@ -1213,65 +929,10 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842 },
 ]
 
-[[package]]
-name = "pyarrow"
-version = "17.0.0"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-dependencies = [
-    { name = "numpy", version = "1.24.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/27/4e/ea6d43f324169f8aec0e57569443a38bab4b398d09769ca64f7b4d467de3/pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28", size = 1112479 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/39/5d/78d4b040bc5ff2fc6c3d03e80fca396b742f6c125b8af06bcf7427f931bc/pyarrow-17.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:a5c8b238d47e48812ee577ee20c9a2779e6a5904f1708ae240f53ecbee7c9f07", size = 28994846 },
-    { url = "https://files.pythonhosted.org/packages/3b/73/8ed168db7642e91180330e4ea9f3ff8bab404678f00d32d7df0871a4933b/pyarrow-17.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db023dc4c6cae1015de9e198d41250688383c3f9af8f565370ab2b4cb5f62655", size = 27165908 },
-    { url = "https://files.pythonhosted.org/packages/81/36/e78c24be99242063f6d0590ef68c857ea07bdea470242c361e9a15bd57a4/pyarrow-17.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da1e060b3876faa11cee287839f9cc7cdc00649f475714b8680a05fd9071d545", size = 39264209 },
-    { url = "https://files.pythonhosted.org/packages/18/4c/3db637d7578f683b0a8fb8999b436bdbedd6e3517bd4f90c70853cf3ad20/pyarrow-17.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75c06d4624c0ad6674364bb46ef38c3132768139ddec1c56582dbac54f2663e2", size = 39862883 },
-    { url = "https://files.pythonhosted.org/packages/81/3c/0580626896c842614a523e66b351181ed5bb14e5dfc263cd68cea2c46d90/pyarrow-17.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:fa3c246cc58cb5a4a5cb407a18f193354ea47dd0648194e6265bd24177982fe8", size = 38723009 },
-    { url = "https://files.pythonhosted.org/packages/ee/fb/c1b47f0ada36d856a352da261a44d7344d8f22e2f7db3945f8c3b81be5dd/pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:f7ae2de664e0b158d1607699a16a488de3d008ba99b3a7aa5de1cbc13574d047", size = 39855626 },
-    { url = "https://files.pythonhosted.org/packages/19/09/b0a02908180a25d57312ab5919069c39fddf30602568980419f4b02393f6/pyarrow-17.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5984f416552eea15fd9cee03da53542bf4cddaef5afecefb9aa8d1010c335087", size = 25147242 },
-    { url = "https://files.pythonhosted.org/packages/f9/46/ce89f87c2936f5bb9d879473b9663ce7a4b1f4359acc2f0eb39865eaa1af/pyarrow-17.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:1c8856e2ef09eb87ecf937104aacfa0708f22dfeb039c363ec99735190ffb977", size = 29028748 },
-    { url = "https://files.pythonhosted.org/packages/8d/8e/ce2e9b2146de422f6638333c01903140e9ada244a2a477918a368306c64c/pyarrow-17.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e19f569567efcbbd42084e87f948778eb371d308e137a0f97afe19bb860ccb3", size = 27190965 },
-    { url = "https://files.pythonhosted.org/packages/3b/c8/5675719570eb1acd809481c6d64e2136ffb340bc387f4ca62dce79516cea/pyarrow-17.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b244dc8e08a23b3e352899a006a26ae7b4d0da7bb636872fa8f5884e70acf15", size = 39269081 },
-    { url = "https://files.pythonhosted.org/packages/5e/78/3931194f16ab681ebb87ad252e7b8d2c8b23dad49706cadc865dff4a1dd3/pyarrow-17.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b72e87fe3e1db343995562f7fff8aee354b55ee83d13afba65400c178ab2597", size = 39864921 },
-    { url = "https://files.pythonhosted.org/packages/d8/81/69b6606093363f55a2a574c018901c40952d4e902e670656d18213c71ad7/pyarrow-17.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dc5c31c37409dfbc5d014047817cb4ccd8c1ea25d19576acf1a001fe07f5b420", size = 38740798 },
-    { url = "https://files.pythonhosted.org/packages/4c/21/9ca93b84b92ef927814cb7ba37f0774a484c849d58f0b692b16af8eebcfb/pyarrow-17.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e3343cb1e88bc2ea605986d4b94948716edc7a8d14afd4e2c097232f729758b4", size = 39871877 },
-    { url = "https://files.pythonhosted.org/packages/30/d1/63a7c248432c71c7d3ee803e706590a0b81ce1a8d2b2ae49677774b813bb/pyarrow-17.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:a27532c38f3de9eb3e90ecab63dfda948a8ca859a66e3a47f5f42d1e403c4d03", size = 25151089 },
-    { url = "https://files.pythonhosted.org/packages/d4/62/ce6ac1275a432b4a27c55fe96c58147f111d8ba1ad800a112d31859fae2f/pyarrow-17.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9b8a823cea605221e61f34859dcc03207e52e409ccf6354634143e23af7c8d22", size = 29019418 },
-    { url = "https://files.pythonhosted.org/packages/8e/0a/dbd0c134e7a0c30bea439675cc120012337202e5fac7163ba839aa3691d2/pyarrow-17.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f1e70de6cb5790a50b01d2b686d54aaf73da01266850b05e3af2a1bc89e16053", size = 27152197 },
-    { url = "https://files.pythonhosted.org/packages/cb/05/3f4a16498349db79090767620d6dc23c1ec0c658a668d61d76b87706c65d/pyarrow-17.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0071ce35788c6f9077ff9ecba4858108eebe2ea5a3f7cf2cf55ebc1dbc6ee24a", size = 39263026 },
-    { url = "https://files.pythonhosted.org/packages/c2/0c/ea2107236740be8fa0e0d4a293a095c9f43546a2465bb7df34eee9126b09/pyarrow-17.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:757074882f844411fcca735e39aae74248a1531367a7c80799b4266390ae51cc", size = 39880798 },
-    { url = "https://files.pythonhosted.org/packages/f6/b0/b9164a8bc495083c10c281cc65064553ec87b7537d6f742a89d5953a2a3e/pyarrow-17.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ba11c4f16976e89146781a83833df7f82077cdab7dc6232c897789343f7891a", size = 38715172 },
-    { url = "https://files.pythonhosted.org/packages/f1/c4/9625418a1413005e486c006e56675334929fad864347c5ae7c1b2e7fe639/pyarrow-17.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b0c6ac301093b42d34410b187bba560b17c0330f64907bfa4f7f7f2444b0cf9b", size = 39874508 },
-    { url = "https://files.pythonhosted.org/packages/ae/49/baafe2a964f663413be3bd1cf5c45ed98c5e42e804e2328e18f4570027c1/pyarrow-17.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7", size = 25099235 },
-    { url = "https://files.pythonhosted.org/packages/8d/bd/8f52c1d7b430260f80a349cffa2df351750a737b5336313d56dcadeb9ae1/pyarrow-17.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204", size = 28999345 },
-    { url = "https://files.pythonhosted.org/packages/64/d9/51e35550f2f18b8815a2ab25948f735434db32000c0e91eba3a32634782a/pyarrow-17.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8", size = 27168441 },
-    { url = "https://files.pythonhosted.org/packages/18/d8/7161d87d07ea51be70c49f615004c1446d5723622a18b2681f7e4b71bf6e/pyarrow-17.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155", size = 39363163 },
-    { url = "https://files.pythonhosted.org/packages/3f/08/bc497130789833de09e345e3ce4647e3ce86517c4f70f2144f0367ca378b/pyarrow-17.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145", size = 39965253 },
-    { url = "https://files.pythonhosted.org/packages/d3/2e/493dd7db889402b4c7871ca7dfdd20f2c5deedbff802d3eb8576359930f9/pyarrow-17.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c", size = 38805378 },
-    { url = "https://files.pythonhosted.org/packages/e6/c1/4c6bcdf7a820034aa91a8b4d25fef38809be79b42ca7aaa16d4680b0bbac/pyarrow-17.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c", size = 39958364 },
-    { url = "https://files.pythonhosted.org/packages/d1/db/42ac644453cfdfc60fe002b46d647fe7a6dfad753ef7b28e99b4c936ad5d/pyarrow-17.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca", size = 25229211 },
-    { url = "https://files.pythonhosted.org/packages/43/e0/a898096d35be240aa61fb2d54db58b86d664b10e1e51256f9300f47565e8/pyarrow-17.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb", size = 29007881 },
-    { url = "https://files.pythonhosted.org/packages/59/22/f7d14907ed0697b5dd488d393129f2738629fa5bcba863e00931b7975946/pyarrow-17.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df", size = 27178117 },
-    { url = "https://files.pythonhosted.org/packages/bf/ee/661211feac0ed48467b1d5c57298c91403809ec3ab78b1d175e1d6ad03cf/pyarrow-17.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687", size = 39273896 },
-    { url = "https://files.pythonhosted.org/packages/af/61/bcd9b58e38ead6ad42b9ed00da33a3f862bc1d445e3d3164799c25550ac2/pyarrow-17.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b", size = 39875438 },
-    { url = "https://files.pythonhosted.org/packages/75/63/29d1bfcc57af73cde3fc3baccab2f37548de512dbe0ab294b033cd203516/pyarrow-17.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5", size = 38735092 },
-    { url = "https://files.pythonhosted.org/packages/39/f4/90258b4de753df7cc61cefb0312f8abcf226672e96cc64996e66afce817a/pyarrow-17.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda", size = 39867610 },
-    { url = "https://files.pythonhosted.org/packages/e7/f6/b75d4816c32f1618ed31a005ee635dd1d91d8164495d94f2ea092f594661/pyarrow-17.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204", size = 25148611 },
-]
-
 [[package]]
 name = "pyarrow"
 version = "18.1.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.12'",
-    "python_full_version == '3.11.*'",
-    "python_full_version == '3.10.*'",
-    "python_full_version == '3.9.*'",
-]
 sdist = { url = "https://files.pythonhosted.org/packages/7f/7b/640785a9062bb00314caa8a387abce547d2a420cf09bd6c715fe659ccffb/pyarrow-18.1.0.tar.gz", hash = "sha256:9386d3ca9c145b5539a1cfc75df07757dff870168c959b473a0bccbc3abc8c73", size = 1118671 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/1a/bb/8d4a1573f66e0684f190dd2b55fd0b97a7214de8882d58a3867e777bf640/pyarrow-18.1.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e21488d5cfd3d8b500b3238a6c4b075efabc18f0f6d80b29239737ebd69caa6c", size = 29531620 },
@@ -1332,10 +993,8 @@ version = "0.8.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "beautifulsoup4" },
-    { name = "docutils", version = "0.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" },
-    { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
+    { name = "docutils" },
+    { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
     { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/fc/d6/3921de802cf1ee771f0e76c9068b52498aeb8eeec6b830ff931c81c7ecf3/pydata_sphinx_theme-0.8.0.tar.gz", hash = "sha256:9f72015d9c572ea92e3007ab221a8325767c426783b6b9941813e65fa988dc90", size = 1123746 }
@@ -1349,13 +1008,11 @@ version = "2.5.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "deprecated" },
-    { name = "pyjwt", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, extra = ["crypto"], marker = "python_full_version < '3.9'" },
-    { name = "pyjwt", version = "2.10.1", source = { registry = "https://pypi.org/simple" }, extra = ["crypto"], marker = "python_full_version >= '3.9'" },
+    { name = "pyjwt", extra = ["crypto"] },
     { name = "pynacl" },
     { name = "requests" },
     { name = "typing-extensions" },
-    { name = "urllib3", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "urllib3", version = "2.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" },
+    { name = "urllib3" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/16/ce/aa91d30040d9552c274e7ea8bd10a977600d508d579a4bb262b95eccf961/pygithub-2.5.0.tar.gz", hash = "sha256:e1613ac508a9be710920d26eb18b1905ebd9926aa49398e88151c1b526aad3cf", size = 3552804 }
 wheels = [
@@ -1371,33 +1028,10 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 },
 ]
 
-[[package]]
-name = "pyjwt"
-version = "2.9.0"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/fb/68/ce067f09fca4abeca8771fe667d89cc347d1e99da3e093112ac329c6020e/pyjwt-2.9.0.tar.gz", hash = "sha256:7e1e5b56cc735432a7369cbfa0efe50fa113ebecdc04ae6922deba8b84582d0c", size = 78825 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/79/84/0fdf9b18ba31d69877bd39c9cd6052b47f3761e9910c15de788e519f079f/PyJWT-2.9.0-py3-none-any.whl", hash = "sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850", size = 22344 },
-]
-
-[package.optional-dependencies]
-crypto = [
-    { name = "cryptography", marker = "python_full_version < '3.9'" },
-]
-
 [[package]]
 name = "pyjwt"
 version = "2.10.1"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.12'",
-    "python_full_version == '3.11.*'",
-    "python_full_version == '3.10.*'",
-    "python_full_version == '3.9.*'",
-]
 sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997 },
@@ -1405,7 +1039,7 @@ wheels = [
 
 [package.optional-dependencies]
 crypto = [
-    { name = "cryptography", marker = "python_full_version >= '3.9'" },
+    { name = "cryptography" },
 ]
 
 [[package]]
@@ -1508,13 +1142,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597 },
     { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527 },
     { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446 },
-    { url = "https://files.pythonhosted.org/packages/74/d9/323a59d506f12f498c2097488d80d16f4cf965cee1791eab58b56b19f47a/PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a", size = 183218 },
-    { url = "https://files.pythonhosted.org/packages/74/cc/20c34d00f04d785f2028737e2e2a8254e1425102e730fee1d6396f832577/PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5", size = 728067 },
-    { url = "https://files.pythonhosted.org/packages/20/52/551c69ca1501d21c0de51ddafa8c23a0191ef296ff098e98358f69080577/PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d", size = 757812 },
-    { url = "https://files.pythonhosted.org/packages/fd/7f/2c3697bba5d4aa5cc2afe81826d73dfae5f049458e44732c7a0938baa673/PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083", size = 746531 },
-    { url = "https://files.pythonhosted.org/packages/8c/ab/6226d3df99900e580091bb44258fde77a8433511a86883bd4681ea19a858/PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706", size = 800820 },
-    { url = "https://files.pythonhosted.org/packages/a0/99/a9eb0f3e710c06c5d922026f6736e920d431812ace24aae38228d0d64b04/PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a", size = 145514 },
-    { url = "https://files.pythonhosted.org/packages/75/8a/ee831ad5fafa4431099aa4e078d4c8efd43cd5e48fbc774641d233b683a9/PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff", size = 162702 },
     { url = "https://files.pythonhosted.org/packages/65/d8/b7a1db13636d7fb7d4ff431593c510c8b8fca920ade06ca8ef20015493c5/PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d", size = 184777 },
     { url = "https://files.pythonhosted.org/packages/0a/02/6ec546cd45143fdf9840b2c6be8d875116a64076218b61d68e12548e5839/PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f", size = 172318 },
     { url = "https://files.pythonhosted.org/packages/0e/9a/8cc68be846c972bda34f6c2a93abb644fb2476f4dcc924d52175786932c9/PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290", size = 720891 },
@@ -1534,8 +1161,7 @@ dependencies = [
     { name = "certifi" },
     { name = "charset-normalizer" },
     { name = "idna" },
-    { name = "urllib3", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "urllib3", version = "2.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" },
+    { name = "urllib3" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 }
 wheels = [
@@ -1567,28 +1193,10 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b2/94/0498cdb7316ed67a1928300dd87d659c933479f44dec51b4f62bfd1f8028/ruff-0.9.1-py3-none-win_arm64.whl", hash = "sha256:1cd76c7f9c679e6e8f2af8f778367dca82b95009bc7b1a85a47f1521ae524fa7", size = 9145708 },
 ]
 
-[[package]]
-name = "setuptools"
-version = "75.3.0"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/ed/22/a438e0caa4576f8c383fa4d35f1cc01655a46c75be358960d815bfbb12bd/setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686", size = 1351577 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/90/12/282ee9bce8b58130cb762fbc9beabd531549952cac11fc56add11dcb7ea0/setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd", size = 1251070 },
-]
-
 [[package]]
 name = "setuptools"
 version = "75.8.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.12'",
-    "python_full_version == '3.11.*'",
-    "python_full_version == '3.10.*'",
-    "python_full_version == '3.9.*'",
-]
 sdist = { url = "https://files.pythonhosted.org/packages/92/ec/089608b791d210aec4e7f97488e67ab0d33add3efccb83a056cbafe3a2a6/setuptools-75.8.0.tar.gz", hash = "sha256:c5afc8f407c626b8313a86e10311dd3f661c6cd9c09d4bf8c15c0e11f9f2b0e6", size = 1343222 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/69/8a/b9dc7678803429e4a3bc9ba462fa3dd9066824d3c607490235c6a796be5a/setuptools-75.8.0-py3-none-any.whl", hash = "sha256:e3982f444617239225d675215d51f6ba05f845d4eec313da4418fdbb56fb27e3", size = 1228782 },
@@ -1621,63 +1229,32 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d1/c2/fe97d779f3ef3b15f05c94a2f1e3d21732574ed441687474db9d342a7315/soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9", size = 36186 },
 ]
 
-[[package]]
-name = "sphinx"
-version = "7.1.2"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-dependencies = [
-    { name = "alabaster", version = "0.7.13", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "babel", marker = "python_full_version < '3.9'" },
-    { name = "colorama", marker = "python_full_version < '3.9' and sys_platform == 'win32'" },
-    { name = "docutils", version = "0.20.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "imagesize", marker = "python_full_version < '3.9'" },
-    { name = "importlib-metadata", marker = "python_full_version < '3.9'" },
-    { name = "jinja2", marker = "python_full_version < '3.9'" },
-    { name = "packaging", marker = "python_full_version < '3.9'" },
-    { name = "pygments", marker = "python_full_version < '3.9'" },
-    { name = "requests", marker = "python_full_version < '3.9'" },
-    { name = "snowballstemmer", marker = "python_full_version < '3.9'" },
-    { name = "sphinxcontrib-applehelp", version = "1.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "sphinxcontrib-devhelp", version = "1.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "sphinxcontrib-htmlhelp", version = "2.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "sphinxcontrib-jsmath", marker = "python_full_version < '3.9'" },
-    { name = "sphinxcontrib-qthelp", version = "1.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "sphinxcontrib-serializinghtml", version = "1.1.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/dc/01/688bdf9282241dca09fe6e3a1110eda399fa9b10d0672db609e37c2e7a39/sphinx-7.1.2.tar.gz", hash = "sha256:780f4d32f1d7d1126576e0e5ecc19dc32ab76cd24e950228dcf7b1f6d3d9e22f", size = 6828258 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/48/17/325cf6a257d84751a48ae90752b3d8fe0be8f9535b6253add61c49d0d9bc/sphinx-7.1.2-py3-none-any.whl", hash = "sha256:d170a81825b2fcacb6dfd5a0d7f578a053e45d3f2b153fecc948c37344eb4cbe", size = 3169543 },
-]
-
 [[package]]
 name = "sphinx"
 version = "7.4.7"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version == '3.9.*'",
+    "python_full_version < '3.10'",
 ]
 dependencies = [
-    { name = "alabaster", version = "0.7.16", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
-    { name = "babel", marker = "python_full_version == '3.9.*'" },
-    { name = "colorama", marker = "python_full_version == '3.9.*' and sys_platform == 'win32'" },
-    { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
-    { name = "imagesize", marker = "python_full_version == '3.9.*'" },
-    { name = "importlib-metadata", marker = "python_full_version == '3.9.*'" },
-    { name = "jinja2", marker = "python_full_version == '3.9.*'" },
-    { name = "packaging", marker = "python_full_version == '3.9.*'" },
-    { name = "pygments", marker = "python_full_version == '3.9.*'" },
-    { name = "requests", marker = "python_full_version == '3.9.*'" },
-    { name = "snowballstemmer", marker = "python_full_version == '3.9.*'" },
-    { name = "sphinxcontrib-applehelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
-    { name = "sphinxcontrib-devhelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
-    { name = "sphinxcontrib-htmlhelp", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
-    { name = "sphinxcontrib-jsmath", marker = "python_full_version == '3.9.*'" },
-    { name = "sphinxcontrib-qthelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
-    { name = "sphinxcontrib-serializinghtml", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
-    { name = "tomli", marker = "python_full_version == '3.9.*'" },
+    { name = "alabaster", version = "0.7.16", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "babel", marker = "python_full_version < '3.10'" },
+    { name = "colorama", marker = "python_full_version < '3.10' and sys_platform == 'win32'" },
+    { name = "docutils", marker = "python_full_version < '3.10'" },
+    { name = "imagesize", marker = "python_full_version < '3.10'" },
+    { name = "importlib-metadata", marker = "python_full_version < '3.10'" },
+    { name = "jinja2", marker = "python_full_version < '3.10'" },
+    { name = "packaging", marker = "python_full_version < '3.10'" },
+    { name = "pygments", marker = "python_full_version < '3.10'" },
+    { name = "requests", marker = "python_full_version < '3.10'" },
+    { name = "snowballstemmer", marker = "python_full_version < '3.10'" },
+    { name = "sphinxcontrib-applehelp", marker = "python_full_version < '3.10'" },
+    { name = "sphinxcontrib-devhelp", marker = "python_full_version < '3.10'" },
+    { name = "sphinxcontrib-htmlhelp", marker = "python_full_version < '3.10'" },
+    { name = "sphinxcontrib-jsmath", marker = "python_full_version < '3.10'" },
+    { name = "sphinxcontrib-qthelp", marker = "python_full_version < '3.10'" },
+    { name = "sphinxcontrib-serializinghtml", marker = "python_full_version < '3.10'" },
+    { name = "tomli", marker = "python_full_version < '3.10'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/5b/be/50e50cb4f2eff47df05673d361095cafd95521d2a22521b920c67a372dcb/sphinx-7.4.7.tar.gz", hash = "sha256:242f92a7ea7e6c5b406fdc2615413890ba9f699114a9c09192d7dfead2ee9cfe", size = 8067911 }
 wheels = [
@@ -1697,19 +1274,19 @@ dependencies = [
     { name = "alabaster", version = "1.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
     { name = "babel", marker = "python_full_version >= '3.10'" },
     { name = "colorama", marker = "python_full_version >= '3.10' and sys_platform == 'win32'" },
-    { name = "docutils", version = "0.21.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+    { name = "docutils", marker = "python_full_version >= '3.10'" },
     { name = "imagesize", marker = "python_full_version >= '3.10'" },
     { name = "jinja2", marker = "python_full_version >= '3.10'" },
     { name = "packaging", marker = "python_full_version >= '3.10'" },
     { name = "pygments", marker = "python_full_version >= '3.10'" },
     { name = "requests", marker = "python_full_version >= '3.10'" },
     { name = "snowballstemmer", marker = "python_full_version >= '3.10'" },
-    { name = "sphinxcontrib-applehelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
-    { name = "sphinxcontrib-devhelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
-    { name = "sphinxcontrib-htmlhelp", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+    { name = "sphinxcontrib-applehelp", marker = "python_full_version >= '3.10'" },
+    { name = "sphinxcontrib-devhelp", marker = "python_full_version >= '3.10'" },
+    { name = "sphinxcontrib-htmlhelp", marker = "python_full_version >= '3.10'" },
     { name = "sphinxcontrib-jsmath", marker = "python_full_version >= '3.10'" },
-    { name = "sphinxcontrib-qthelp", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
-    { name = "sphinxcontrib-serializinghtml", version = "2.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+    { name = "sphinxcontrib-qthelp", marker = "python_full_version >= '3.10'" },
+    { name = "sphinxcontrib-serializinghtml", marker = "python_full_version >= '3.10'" },
     { name = "tomli", marker = "python_full_version == '3.10.*'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/be0b61178fe2cdcb67e2a92fc9ebb488e3c51c4f74a36a7824c0adf23425/sphinx-8.1.3.tar.gz", hash = "sha256:43c1911eecb0d3e161ad78611bc905d1ad0e523e4ddc202a58a821773dc4c927", size = 8184611 }
@@ -1722,97 +1299,40 @@ name = "sphinx-autoapi"
 version = "3.4.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "astroid", version = "3.2.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "astroid", version = "3.3.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9'" },
+    { name = "astroid" },
     { name = "jinja2" },
     { name = "pyyaml" },
-    { name = "sphinx", version = "7.1.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
+    { name = "sphinx", version = "7.4.7", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
     { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
-    { name = "stdlib-list", version = "0.10.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9'" },
-    { name = "stdlib-list", version = "0.11.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.9.*'" },
+    { name = "stdlib-list", marker = "python_full_version < '3.10'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/4a/eb/cc243583bb1d518ca3b10998c203d919a8ed90affd4831f2b61ad09043d2/sphinx_autoapi-3.4.0.tar.gz", hash = "sha256:e6d5371f9411bbb9fca358c00a9e57aef3ac94cbfc5df4bab285946462f69e0c", size = 29292 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/de/d6/f2acdc2567337fd5f5dc091a4e58d8a0fb14927b9779fc1e5ecee96d9824/sphinx_autoapi-3.4.0-py3-none-any.whl", hash = "sha256:4027fef2875a22c5f2a57107c71641d82f6166bf55beb407a47aaf3ef14e7b92", size = 34095 },
 ]
 
-[[package]]
-name = "sphinxcontrib-applehelp"
-version = "1.0.4"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/32/df/45e827f4d7e7fcc84e853bcef1d836effd762d63ccb86f43ede4e98b478c/sphinxcontrib-applehelp-1.0.4.tar.gz", hash = "sha256:828f867945bbe39817c210a1abfd1bc4895c8b73fcaade56d45357a348a07d7e", size = 24766 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/06/c1/5e2cafbd03105ce50d8500f9b4e8a6e8d02e22d0475b574c3b3e9451a15f/sphinxcontrib_applehelp-1.0.4-py3-none-any.whl", hash = "sha256:29d341f67fb0f6f586b23ad80e072c8e6ad0b48417db2bde114a4c9746feb228", size = 120601 },
-]
-
 [[package]]
 name = "sphinxcontrib-applehelp"
 version = "2.0.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.12'",
-    "python_full_version == '3.11.*'",
-    "python_full_version == '3.10.*'",
-    "python_full_version == '3.9.*'",
-]
 sdist = { url = "https://files.pythonhosted.org/packages/ba/6e/b837e84a1a704953c62ef8776d45c3e8d759876b4a84fe14eba2859106fe/sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1", size = 20053 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5", size = 119300 },
 ]
 
-[[package]]
-name = "sphinxcontrib-devhelp"
-version = "1.0.2"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/98/33/dc28393f16385f722c893cb55539c641c9aaec8d1bc1c15b69ce0ac2dbb3/sphinxcontrib-devhelp-1.0.2.tar.gz", hash = "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4", size = 17398 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c5/09/5de5ed43a521387f18bdf5f5af31d099605c992fd25372b2b9b825ce48ee/sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl", hash = "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e", size = 84690 },
-]
-
 [[package]]
 name = "sphinxcontrib-devhelp"
 version = "2.0.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.12'",
-    "python_full_version == '3.11.*'",
-    "python_full_version == '3.10.*'",
-    "python_full_version == '3.9.*'",
-]
 sdist = { url = "https://files.pythonhosted.org/packages/f6/d2/5beee64d3e4e747f316bae86b55943f51e82bb86ecd325883ef65741e7da/sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad", size = 12967 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2", size = 82530 },
 ]
 
-[[package]]
-name = "sphinxcontrib-htmlhelp"
-version = "2.0.1"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/b3/47/64cff68ea3aa450c373301e5bebfbb9fce0a3e70aca245fcadd4af06cd75/sphinxcontrib-htmlhelp-2.0.1.tar.gz", hash = "sha256:0cbdd302815330058422b98a113195c9249825d681e18f11e8b1f78a2f11efff", size = 27967 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/6e/ee/a1f5e39046cbb5f8bc8fba87d1ddf1c6643fbc9194e58d26e606de4b9074/sphinxcontrib_htmlhelp-2.0.1-py3-none-any.whl", hash = "sha256:c38cb46dccf316c79de6e5515e1770414b797162b23cd3d06e67020e1d2a6903", size = 99833 },
-]
-
 [[package]]
 name = "sphinxcontrib-htmlhelp"
 version = "2.1.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.12'",
-    "python_full_version == '3.11.*'",
-    "python_full_version == '3.10.*'",
-    "python_full_version == '3.9.*'",
-]
 sdist = { url = "https://files.pythonhosted.org/packages/43/93/983afd9aa001e5201eab16b5a444ed5b9b0a7a010541e0ddfbbfd0b2470c/sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9", size = 22617 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8", size = 98705 },
@@ -1827,55 +1347,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", size = 5071 },
 ]
 
-[[package]]
-name = "sphinxcontrib-qthelp"
-version = "1.0.3"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/b1/8e/c4846e59f38a5f2b4a0e3b27af38f2fcf904d4bfd82095bf92de0b114ebd/sphinxcontrib-qthelp-1.0.3.tar.gz", hash = "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72", size = 21658 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/2b/14/05f9206cf4e9cfca1afb5fd224c7cd434dcc3a433d6d9e4e0264d29c6cdb/sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl", hash = "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6", size = 90609 },
-]
-
 [[package]]
 name = "sphinxcontrib-qthelp"
 version = "2.0.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.12'",
-    "python_full_version == '3.11.*'",
-    "python_full_version == '3.10.*'",
-    "python_full_version == '3.9.*'",
-]
 sdist = { url = "https://files.pythonhosted.org/packages/68/bc/9104308fc285eb3e0b31b67688235db556cd5b0ef31d96f30e45f2e51cae/sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab", size = 17165 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb", size = 88743 },
 ]
 
-[[package]]
-name = "sphinxcontrib-serializinghtml"
-version = "1.1.5"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/b5/72/835d6fadb9e5d02304cf39b18f93d227cd93abd3c41ebf58e6853eeb1455/sphinxcontrib-serializinghtml-1.1.5.tar.gz", hash = "sha256:aa5f6de5dfdf809ef505c4895e51ef5c9eac17d0f287933eb49ec495280b6952", size = 21019 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c6/77/5464ec50dd0f1c1037e3c93249b040c8fc8078fdda97530eeb02424b6eea/sphinxcontrib_serializinghtml-1.1.5-py2.py3-none-any.whl", hash = "sha256:352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd", size = 94021 },
-]
-
 [[package]]
 name = "sphinxcontrib-serializinghtml"
 version = "2.0.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.12'",
-    "python_full_version == '3.11.*'",
-    "python_full_version == '3.10.*'",
-    "python_full_version == '3.9.*'",
-]
 sdist = { url = "https://files.pythonhosted.org/packages/3b/44/6716b257b0aa6bfd51a1b31665d1c205fb12cb5ad56de752dfa15657de2f/sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d", size = 16080 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331", size = 92072 },
@@ -1895,25 +1379,10 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521 },
 ]
 
-[[package]]
-name = "stdlib-list"
-version = "0.10.0"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/39/bb/1cdbc326a5ab0026602e0489cbf02357e78140253c4b57cd866d380eb355/stdlib_list-0.10.0.tar.gz", hash = "sha256:6519c50d645513ed287657bfe856d527f277331540691ddeaf77b25459964a14", size = 59447 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/13/d9/9085375f0d23a4896b307bf14dcc61b49ec8cc67cb33e06cf95bf3af3966/stdlib_list-0.10.0-py3-none-any.whl", hash = "sha256:b3a911bc441d03e0332dd1a9e7d0870ba3bb0a542a74d7524f54fb431256e214", size = 79814 },
-]
-
 [[package]]
 name = "stdlib-list"
 version = "0.11.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version == '3.9.*'",
-]
 sdist = { url = "https://files.pythonhosted.org/packages/5d/04/6b37a71e92ddca16b190b7df62494ac4779d58ced4787f73584eb32c8f03/stdlib_list-0.11.0.tar.gz", hash = "sha256:b74a7b643a77a12637e907f3f62f0ab9f67300bce4014f6b2d3c8b4c8fd63c66", size = 60335 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/16/fe/e07300c027a868d32d8ed7a425503401e91a03ff90e7ca525c115c634ffb/stdlib_list-0.11.0-py3-none-any.whl", hash = "sha256:8bf8decfffaaf273d4cfeb5bd852b910a00dec1037dcf163576803622bccf597", size = 83617 },
@@ -1994,28 +1463,10 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a6/ab/7e5f53c3b9d14972843a647d8d7a853969a58aecc7559cb3267302c94774/tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd", size = 346586 },
 ]
 
-[[package]]
-name = "urllib3"
-version = "2.2.3"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/ed/63/22ba4ebfe7430b76388e7cd448d5478814d3032121827c12a2cc287e2260/urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9", size = 300677 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ce/d9/5f4c13cecde62396b0d3fe530a50ccea91e7dfc1ccf0e09c228841bb5ba8/urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac", size = 126338 },
-]
-
 [[package]]
 name = "urllib3"
 version = "2.3.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.12'",
-    "python_full_version == '3.11.*'",
-    "python_full_version == '3.10.*'",
-    "python_full_version == '3.9.*'",
-]
 sdist = { url = "https://files.pythonhosted.org/packages/aa/63/e53da845320b757bf29ef6a9062f5c669fe997973f966045cb019c3f4b66/urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d", size = 307268 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/c8/19/4ec628951a74043532ca2cf5d97b7b14863931476d117c471e8e2b1eb39f/urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df", size = 128369 },
@@ -2091,17 +1542,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a7/b1/0bb11e29aa5139d90b770ebbfa167267b1fc548d2302c30c8f7572851738/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f", size = 106377 },
     { url = "https://files.pythonhosted.org/packages/6a/e1/0122853035b40b3f333bbb25f1939fc1045e21dd518f7f0922b60c156f7c/wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555", size = 37986 },
     { url = "https://files.pythonhosted.org/packages/09/5e/1655cf481e079c1f22d0cabdd4e51733679932718dc23bf2db175f329b76/wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c", size = 40750 },
-    { url = "https://files.pythonhosted.org/packages/0c/66/95b9e90e6e1274999b183c9c3f984996d870e933ca9560115bd1cd1d6f77/wrapt-1.17.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5c803c401ea1c1c18de70a06a6f79fcc9c5acfc79133e9869e730ad7f8ad8ef9", size = 53234 },
-    { url = "https://files.pythonhosted.org/packages/a4/b6/6eced5e2db5924bf6d9223d2bb96b62e00395aae77058e6a9e11bf16b3bd/wrapt-1.17.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f917c1180fdb8623c2b75a99192f4025e412597c50b2ac870f156de8fb101119", size = 38462 },
-    { url = "https://files.pythonhosted.org/packages/5d/a4/c8472fe2568978b5532df84273c53ddf713f689d408a4335717ab89547e0/wrapt-1.17.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ecc840861360ba9d176d413a5489b9a0aff6d6303d7e733e2c4623cfa26904a6", size = 38730 },
-    { url = "https://files.pythonhosted.org/packages/3c/70/1d259c6b1ad164eb23ff70e3e452dd1950f96e6473f72b7207891d0fd1f0/wrapt-1.17.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb87745b2e6dc56361bfde481d5a378dc314b252a98d7dd19a651a3fa58f24a9", size = 86225 },
-    { url = "https://files.pythonhosted.org/packages/a9/68/6b83367e1afb8de91cbea4ef8e85b58acdf62f034f05d78c7b82afaa23d8/wrapt-1.17.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58455b79ec2661c3600e65c0a716955adc2410f7383755d537584b0de41b1d8a", size = 78055 },
-    { url = "https://files.pythonhosted.org/packages/0d/21/09573d2443916705c57fdab85d508f592c0a58d57becc53e15755d67fba2/wrapt-1.17.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4e42a40a5e164cbfdb7b386c966a588b1047558a990981ace551ed7e12ca9c2", size = 85592 },
-    { url = "https://files.pythonhosted.org/packages/45/ce/700e17a852dd5dec894e241c72973ea82363486bcc1fb05d47b4fbd1d683/wrapt-1.17.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:91bd7d1773e64019f9288b7a5101f3ae50d3d8e6b1de7edee9c2ccc1d32f0c0a", size = 83906 },
-    { url = "https://files.pythonhosted.org/packages/37/14/bd210faf0a66faeb8529d42b6b45a25d6aa6ce25ddfc19168e4161aed227/wrapt-1.17.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:bb90fb8bda722a1b9d48ac1e6c38f923ea757b3baf8ebd0c82e09c5c1a0e7a04", size = 76763 },
-    { url = "https://files.pythonhosted.org/packages/34/0c/85af70d291f44659c422416f0272046109e785bf6db8c081cfeeae5715c5/wrapt-1.17.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:08e7ce672e35efa54c5024936e559469436f8b8096253404faeb54d2a878416f", size = 83573 },
-    { url = "https://files.pythonhosted.org/packages/f8/1e/b215068e824878f69ea945804fa26c176f7c2735a3ad5367d78930bd076a/wrapt-1.17.2-cp38-cp38-win32.whl", hash = "sha256:410a92fefd2e0e10d26210e1dfb4a876ddaf8439ef60d6434f21ef8d87efc5b7", size = 36408 },
-    { url = "https://files.pythonhosted.org/packages/52/27/3dd9ad5f1097b33c95d05929e409cc86d7c765cb5437b86694dc8f8e9af0/wrapt-1.17.2-cp38-cp38-win_amd64.whl", hash = "sha256:95c658736ec15602da0ed73f312d410117723914a5c91a14ee4cdd72f1d790b3", size = 38737 },
     { url = "https://files.pythonhosted.org/packages/8a/f4/6ed2b8f6f1c832933283974839b88ec7c983fd12905e01e97889dadf7559/wrapt-1.17.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:99039fa9e6306880572915728d7f6c24a86ec57b0a83f6b2491e1d8ab0235b9a", size = 53308 },
     { url = "https://files.pythonhosted.org/packages/a2/a9/712a53f8f4f4545768ac532619f6e56d5d0364a87b2212531685e89aeef8/wrapt-1.17.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2696993ee1eebd20b8e4ee4356483c4cb696066ddc24bd70bcbb80fa56ff9061", size = 38489 },
     { url = "https://files.pythonhosted.org/packages/fa/9b/e172c8f28a489a2888df18f953e2f6cb8d33b1a2e78c9dfc52d8bf6a5ead/wrapt-1.17.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:612dff5db80beef9e649c6d803a8d50c409082f1fedc9dbcdfde2983b2025b82", size = 38776 },
@@ -2116,25 +1556,10 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2d/82/f56956041adef78f849db6b289b282e72b55ab8045a75abad81898c28d19/wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8", size = 23594 },
 ]
 
-[[package]]
-name = "zipp"
-version = "3.20.2"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.9'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/54/bf/5c0000c44ebc80123ecbdddba1f5dcd94a5ada602a9c225d84b5aaa55e86/zipp-3.20.2.tar.gz", hash = "sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29", size = 24199 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/62/8b/5ba542fa83c90e09eac972fc9baca7a88e7e7ca4b221a89251954019308b/zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350", size = 9200 },
-]
-
 [[package]]
 name = "zipp"
 version = "3.21.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version == '3.9.*'",
-]
 sdist = { url = "https://files.pythonhosted.org/packages/3f/50/bad581df71744867e9468ebd0bcd6505de3b275e06f202c2cb016e3ff56f/zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4", size = 24545 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/1a/7e4798e9339adc931158c9d69ecc34f5e6791489d469f5e50ec15e35f458/zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931", size = 9630 },

From b194a8772e58ccefc697e11671113127a8038716 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Wed, 12 Mar 2025 14:25:32 -0400
Subject: [PATCH 13/22] feat/improve ruff test coverage (#1055)

* Run python tests on all currently supported python versions

* Update ruff checks to select all

* Ruff auto fix

* Applying ruff suggestions

* noqa rules updates per ruff checks

* Working through more ruff suggestions

* Working through more ruff suggestions

* update timestamps on tests

* More ruff updates

* More ruff updates

* Instead of importing udf static functions as variables, import

* More ruff formatting suggestions

* more ruff formatting suggestions

* More ruff formatting

* More ruff formatting

* Cut off lint errors for this PR

* Working through more ruff checks and disabling a bunch for now

* Address CI difference from local ruff

* UDWF isn't a proper abstract base class right now since users can opt in to all methods

* Update pre-commit to match the version of ruff used in CI

* To enable testing in python 3.9 we need numpy. Also going to the current minimal supported version

* Update min requried version of python to 3.9 in pyproject.toml. The other changes will come in #1043 that is soon to be merged.

* Suppress UP035

* ruff format
---
 .github/workflows/test.yaml           |   2 +
 .pre-commit-config.yaml               |   2 +-
 benchmarks/tpch/tpch.py               |  14 +-
 dev/release/check-rat-report.py       |   2 +-
 dev/release/generate-changelog.py     |  10 +-
 docs/source/conf.py                   |   2 +-
 examples/python-udwf.py               |   2 +-
 examples/tpch/_tests.py               |  15 +-
 pyproject.toml                        |  76 +++++-
 python/datafusion/__init__.py         |  50 ++--
 python/datafusion/common.py           |  14 +-
 python/datafusion/context.py          |   4 +-
 python/datafusion/dataframe.py        |  15 +-
 python/datafusion/expr.py             |  94 +++----
 python/datafusion/functions.py        |  46 ++--
 python/datafusion/input/__init__.py   |   2 +-
 python/datafusion/input/base.py       |   6 +-
 python/datafusion/input/location.py   |  40 +--
 python/datafusion/io.py               |  20 +-
 python/datafusion/object_store.py     |   2 +-
 python/datafusion/plan.py             |   8 +-
 python/datafusion/record_batch.py     |   8 +-
 python/datafusion/substrait.py        |  21 +-
 python/datafusion/udf.py              | 236 +++++++++--------
 python/tests/generic.py               |  19 +-
 python/tests/test_aggregation.py      |  16 +-
 python/tests/test_catalog.py          |   9 +-
 python/tests/test_context.py          |  53 ++--
 python/tests/test_dataframe.py        |  38 ++-
 python/tests/test_expr.py             |  11 +-
 python/tests/test_functions.py        | 358 ++++++++++++++------------
 python/tests/test_imports.py          |   7 +-
 python/tests/test_input.py            |  12 +-
 python/tests/test_io.py               |  13 +-
 python/tests/test_sql.py              |  35 +--
 python/tests/test_store.py            |  13 +-
 python/tests/test_substrait.py        |   2 +-
 python/tests/test_udaf.py             |  10 +-
 python/tests/test_udwf.py             |   2 +-
 python/tests/test_wrapper_coverage.py |   7 +-
 40 files changed, 697 insertions(+), 599 deletions(-)

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index c1d9ac838..da3582766 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -33,9 +33,11 @@ jobs:
       fail-fast: false
       matrix:
         python-version:
+          - "3.9"
           - "3.10"
           - "3.11"
           - "3.12"
+          - "3.13"
         toolchain:
           - "stable"
 
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b548ff18f..abcfcf321 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,7 +22,7 @@ repos:
           - id: actionlint-docker
       - repo: https://github.com/astral-sh/ruff-pre-commit
         # Ruff version.
-        rev: v0.3.0
+        rev: v0.9.10
         hooks:
           # Run the linter.
           - id: ruff
diff --git a/benchmarks/tpch/tpch.py b/benchmarks/tpch/tpch.py
index fb86b12b6..bfb9ac398 100644
--- a/benchmarks/tpch/tpch.py
+++ b/benchmarks/tpch/tpch.py
@@ -59,13 +59,13 @@ def bench(data_path, query_path):
         end = time.time()
         time_millis = (end - start) * 1000
         total_time_millis += time_millis
-        print("setup,{}".format(round(time_millis, 1)))
-        results.write("setup,{}\n".format(round(time_millis, 1)))
+        print(f"setup,{round(time_millis, 1)}")
+        results.write(f"setup,{round(time_millis, 1)}\n")
         results.flush()
 
         # run queries
         for query in range(1, 23):
-            with open("{}/q{}.sql".format(query_path, query)) as f:
+            with open(f"{query_path}/q{query}.sql") as f:
                 text = f.read()
                 tmp = text.split(";")
                 queries = []
@@ -83,14 +83,14 @@ def bench(data_path, query_path):
                     end = time.time()
                     time_millis = (end - start) * 1000
                     total_time_millis += time_millis
-                    print("q{},{}".format(query, round(time_millis, 1)))
-                    results.write("q{},{}\n".format(query, round(time_millis, 1)))
+                    print(f"q{query},{round(time_millis, 1)}")
+                    results.write(f"q{query},{round(time_millis, 1)}\n")
                     results.flush()
                 except Exception as e:
                     print("query", query, "failed", e)
 
-        print("total,{}".format(round(total_time_millis, 1)))
-        results.write("total,{}\n".format(round(total_time_millis, 1)))
+        print(f"total,{round(total_time_millis, 1)}")
+        results.write(f"total,{round(total_time_millis, 1)}\n")
 
 
 if __name__ == "__main__":
diff --git a/dev/release/check-rat-report.py b/dev/release/check-rat-report.py
index d3dd7c5dd..0c9f4c326 100644
--- a/dev/release/check-rat-report.py
+++ b/dev/release/check-rat-report.py
@@ -29,7 +29,7 @@
 exclude_globs_filename = sys.argv[1]
 xml_filename = sys.argv[2]
 
-globs = [line.strip() for line in open(exclude_globs_filename, "r")]
+globs = [line.strip() for line in open(exclude_globs_filename)]
 
 tree = ET.parse(xml_filename)
 root = tree.getroot()
diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py
index 2564eea86..e30e2def2 100755
--- a/dev/release/generate-changelog.py
+++ b/dev/release/generate-changelog.py
@@ -26,15 +26,11 @@
 
 def print_pulls(repo_name, title, pulls):
     if len(pulls) > 0:
-        print("**{}:**".format(title))
+        print(f"**{title}:**")
         print()
         for pull, commit in pulls:
-            url = "https://github.com/{}/pull/{}".format(repo_name, pull.number)
-            print(
-                "- {} [#{}]({}) ({})".format(
-                    pull.title, pull.number, url, commit.author.login
-                )
-            )
+            url = f"https://github.com/{repo_name}/pull/{pull.number}"
+            print(f"- {pull.title} [#{pull.number}]({url}) ({commit.author.login})")
         print()
 
 
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 2e5a41339..c82a189e0 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -73,7 +73,7 @@
 autoapi_python_class_content = "both"
 
 
-def autoapi_skip_member_fn(app, what, name, obj, skip, options):
+def autoapi_skip_member_fn(app, what, name, obj, skip, options):  # noqa: ARG001
     skip_contents = [
         # Re-exports
         ("class", "datafusion.DataFrame"),
diff --git a/examples/python-udwf.py b/examples/python-udwf.py
index 7d39dc1b8..98d118bf2 100644
--- a/examples/python-udwf.py
+++ b/examples/python-udwf.py
@@ -59,7 +59,7 @@ def __init__(self, alpha: float) -> None:
     def supports_bounded_execution(self) -> bool:
         return True
 
-    def get_range(self, idx: int, num_rows: int) -> tuple[int, int]:
+    def get_range(self, idx: int, num_rows: int) -> tuple[int, int]:  # noqa: ARG002
         # Override the default range of current row since uses_window_frame is False
         # So for the purpose of this test we just smooth from the previous row to
         # current.
diff --git a/examples/tpch/_tests.py b/examples/tpch/_tests.py
index c4d872085..2be4dfabd 100644
--- a/examples/tpch/_tests.py
+++ b/examples/tpch/_tests.py
@@ -27,28 +27,25 @@
 def df_selection(col_name, col_type):
     if col_type == pa.float64() or isinstance(col_type, pa.Decimal128Type):
         return F.round(col(col_name), lit(2)).alias(col_name)
-    elif col_type == pa.string() or col_type == pa.string_view():
+    if col_type == pa.string() or col_type == pa.string_view():
         return F.trim(col(col_name)).alias(col_name)
-    else:
-        return col(col_name)
+    return col(col_name)
 
 
 def load_schema(col_name, col_type):
     if col_type == pa.int64() or col_type == pa.int32():
         return col_name, pa.string()
-    elif isinstance(col_type, pa.Decimal128Type):
+    if isinstance(col_type, pa.Decimal128Type):
         return col_name, pa.float64()
-    else:
-        return col_name, col_type
+    return col_name, col_type
 
 
 def expected_selection(col_name, col_type):
     if col_type == pa.int64() or col_type == pa.int32():
         return F.trim(col(col_name)).cast(col_type).alias(col_name)
-    elif col_type == pa.string() or col_type == pa.string_view():
+    if col_type == pa.string() or col_type == pa.string_view():
         return F.trim(col(col_name)).alias(col_name)
-    else:
-        return col(col_name)
+    return col(col_name)
 
 
 def selections_and_schema(original_schema):
diff --git a/pyproject.toml b/pyproject.toml
index 1c2733677..060e3b80a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -65,7 +65,57 @@ features = ["substrait"]
 
 # Enable docstring linting using the google style guide
 [tool.ruff.lint]
-select = ["E4", "E7", "E9", "F", "FA", "D", "W", "I"]
+select = ["ALL" ]
+ignore = [
+    "A001",    # Allow using words like min as variable names
+    "A002",    # Allow using words like filter as variable names
+    "ANN401",  # Allow Any for wrapper classes
+    "COM812",  # Recommended to ignore these rules when using with ruff-format
+    "FIX002",  # Allow TODO lines - consider removing at some point
+    "FBT001",  # Allow boolean positional args
+    "FBT002",  # Allow boolean positional args
+    "ISC001",  # Recommended to ignore these rules when using with ruff-format
+    "SLF001",  # Allow accessing private members
+    "TD002",
+    "TD003",   # Allow TODO lines
+    "UP007",   # Disallowing Union is pedantic
+    # TODO: Enable all of the following, but this PR is getting too large already
+    "PT001",
+    "ANN204",
+    "B008",
+    "EM101",
+    "PLR0913",
+    "PLR1714",
+    "ANN201",
+    "C400",
+    "TRY003",
+    "B904",
+    "UP006",
+    "RUF012",
+    "FBT003",
+    "C416",
+    "SIM102",
+    "PGH003",
+    "PLR2004",
+    "PERF401",
+    "PD901",
+    "EM102",
+    "ERA001",
+    "SIM108",
+    "ICN001",
+    "ANN001",
+    "ANN202",
+    "PTH",
+    "N812",
+    "INP001",
+    "DTZ007",
+    "PLW2901",
+    "RET503",
+    "RUF015",
+    "A005",
+    "TC001",
+    "UP035",
+]
 
 [tool.ruff.lint.pydocstyle]
 convention = "google"
@@ -75,16 +125,30 @@ max-doc-length = 88
 
 # Disable docstring checking for these directories
 [tool.ruff.lint.per-file-ignores]
-"python/tests/*" = ["D"]
-"examples/*" = ["D", "W505"]
-"dev/*" = ["D"]
-"benchmarks/*" = ["D", "F"]
+"python/tests/*" = [
+    "ANN",
+    "ARG",
+    "BLE001",
+    "D",
+    "S101",
+    "SLF",
+    "PD",
+    "PLR2004",
+    "PT011",
+    "RUF015",
+    "S608",
+    "PLR0913",
+    "PT004",
+]
+"examples/*" = ["D", "W505", "E501", "T201", "S101"]
+"dev/*" = ["D", "E", "T", "S", "PLR", "C", "SIM", "UP", "EXE", "N817"]
+"benchmarks/*" = ["D", "F", "T", "BLE", "FURB", "PLR", "E", "TD", "TRY", "S", "SIM", "EXE", "UP"]
 "docs/*" = ["D"]
 
 [dependency-groups]
 dev = [
     "maturin>=1.8.1",
-    "numpy>1.24.4 ; python_full_version >= '3.10'",
+    "numpy>1.25.0",
     "pytest>=7.4.4",
     "ruff>=0.9.1",
     "toml>=0.10.2",
diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py
index f11ce54a6..286e5dc31 100644
--- a/python/datafusion/__init__.py
+++ b/python/datafusion/__init__.py
@@ -48,44 +48,47 @@
 from .io import read_avro, read_csv, read_json, read_parquet
 from .plan import ExecutionPlan, LogicalPlan
 from .record_batch import RecordBatch, RecordBatchStream
-from .udf import Accumulator, AggregateUDF, ScalarUDF, WindowUDF
+from .udf import Accumulator, AggregateUDF, ScalarUDF, WindowUDF, udaf, udf, udwf
 
 __version__ = importlib_metadata.version(__name__)
 
 __all__ = [
     "Accumulator",
+    "AggregateUDF",
+    "Catalog",
     "Config",
-    "DataFrame",
-    "SessionContext",
-    "SessionConfig",
-    "SQLOptions",
-    "RuntimeEnvBuilder",
-    "Expr",
-    "ScalarUDF",
-    "WindowFrame",
-    "column",
-    "col",
-    "literal",
-    "lit",
     "DFSchema",
-    "Catalog",
+    "DataFrame",
     "Database",
-    "Table",
-    "AggregateUDF",
-    "WindowUDF",
-    "LogicalPlan",
     "ExecutionPlan",
+    "Expr",
+    "LogicalPlan",
     "RecordBatch",
     "RecordBatchStream",
+    "RuntimeEnvBuilder",
+    "SQLOptions",
+    "ScalarUDF",
+    "SessionConfig",
+    "SessionContext",
+    "Table",
+    "WindowFrame",
+    "WindowUDF",
+    "col",
+    "column",
     "common",
     "expr",
     "functions",
+    "lit",
+    "literal",
     "object_store",
-    "substrait",
-    "read_parquet",
     "read_avro",
     "read_csv",
     "read_json",
+    "read_parquet",
+    "substrait",
+    "udaf",
+    "udf",
+    "udwf",
 ]
 
 
@@ -120,10 +123,3 @@ def str_lit(value):
 def lit(value):
     """Create a literal expression."""
     return Expr.literal(value)
-
-
-udf = ScalarUDF.udf
-
-udaf = AggregateUDF.udaf
-
-udwf = WindowUDF.udwf
diff --git a/python/datafusion/common.py b/python/datafusion/common.py
index a2298c634..e762a993b 100644
--- a/python/datafusion/common.py
+++ b/python/datafusion/common.py
@@ -20,7 +20,7 @@
 
 from ._internal import common as common_internal
 
-# TODO these should all have proper wrapper classes
+# TODO: these should all have proper wrapper classes
 
 DFSchema = common_internal.DFSchema
 DataType = common_internal.DataType
@@ -38,15 +38,15 @@
     "DFSchema",
     "DataType",
     "DataTypeMap",
-    "RexType",
-    "PythonType",
-    "SqlType",
     "NullTreatment",
-    "SqlTable",
+    "PythonType",
+    "RexType",
+    "SqlFunction",
     "SqlSchema",
-    "SqlView",
     "SqlStatistics",
-    "SqlFunction",
+    "SqlTable",
+    "SqlType",
+    "SqlView",
 ]
 
 
diff --git a/python/datafusion/context.py b/python/datafusion/context.py
index 282b2a477..0ab1a908a 100644
--- a/python/datafusion/context.py
+++ b/python/datafusion/context.py
@@ -393,8 +393,6 @@ def with_temp_file_path(self, path: str | pathlib.Path) -> RuntimeEnvBuilder:
 class RuntimeConfig(RuntimeEnvBuilder):
     """See `RuntimeEnvBuilder`."""
 
-    pass
-
 
 class SQLOptions:
     """Options to be used when performing SQL queries."""
@@ -498,7 +496,7 @@ def __init__(
 
         self.ctx = SessionContextInternal(config, runtime)
 
-    def enable_url_table(self) -> "SessionContext":
+    def enable_url_table(self) -> SessionContext:
         """Control if local files can be queried as tables.
 
         Returns:
diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
index de5d8376e..d1c71c2bb 100644
--- a/python/datafusion/dataframe.py
+++ b/python/datafusion/dataframe.py
@@ -29,6 +29,7 @@
     List,
     Literal,
     Optional,
+    Type,
     Union,
     overload,
 )
@@ -49,10 +50,11 @@
     import polars as pl
     import pyarrow as pa
 
+    from datafusion._internal import DataFrame as DataFrameInternal
+    from datafusion._internal import expr as expr_internal
+
 from enum import Enum
 
-from datafusion._internal import DataFrame as DataFrameInternal
-from datafusion._internal import expr as expr_internal
 from datafusion.expr import Expr, SortExpr, sort_or_default
 
 
@@ -73,7 +75,7 @@ class Compression(Enum):
     LZ4_RAW = "lz4_raw"
 
     @classmethod
-    def from_str(cls, value: str) -> "Compression":
+    def from_str(cls: Type[Compression], value: str) -> Compression:
         """Convert a string to a Compression enum value.
 
         Args:
@@ -88,8 +90,9 @@ def from_str(cls, value: str) -> "Compression":
         try:
             return cls(value.lower())
         except ValueError:
+            valid_values = str([item.value for item in Compression])
             raise ValueError(
-                f"{value} is not a valid Compression. Valid values are: {[item.value for item in Compression]}"
+                f"{value} is not a valid Compression. Valid values are: {valid_values}"
             )
 
     def get_default_level(self) -> Optional[int]:
@@ -104,9 +107,9 @@ def get_default_level(self) -> Optional[int]:
         # https://github.com/apache/datafusion-python/pull/981#discussion_r1904789223
         if self == Compression.GZIP:
             return 6
-        elif self == Compression.BROTLI:
+        if self == Compression.BROTLI:
             return 1
-        elif self == Compression.ZSTD:
+        if self == Compression.ZSTD:
             return 4
         return None
 
diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py
index 3639abec6..702f75aed 100644
--- a/python/datafusion/expr.py
+++ b/python/datafusion/expr.py
@@ -101,63 +101,63 @@
 WindowExpr = expr_internal.WindowExpr
 
 __all__ = [
-    "Expr",
-    "Column",
-    "Literal",
-    "BinaryExpr",
-    "Literal",
+    "Aggregate",
     "AggregateFunction",
-    "Not",
-    "IsNotNull",
-    "IsNull",
-    "IsTrue",
-    "IsFalse",
-    "IsUnknown",
-    "IsNotTrue",
-    "IsNotFalse",
-    "IsNotUnknown",
-    "Negative",
-    "Like",
-    "ILike",
-    "SimilarTo",
-    "ScalarVariable",
     "Alias",
-    "InList",
-    "Exists",
-    "Subquery",
-    "InSubquery",
-    "ScalarSubquery",
-    "Placeholder",
-    "GroupingSet",
+    "Analyze",
+    "Between",
+    "BinaryExpr",
     "Case",
     "CaseBuilder",
     "Cast",
-    "TryCast",
-    "Between",
+    "Column",
+    "CreateMemoryTable",
+    "CreateView",
+    "Distinct",
+    "DropTable",
+    "EmptyRelation",
+    "Exists",
     "Explain",
+    "Expr",
+    "Extension",
+    "Filter",
+    "GroupingSet",
+    "ILike",
+    "InList",
+    "InSubquery",
+    "IsFalse",
+    "IsNotFalse",
+    "IsNotNull",
+    "IsNotTrue",
+    "IsNotUnknown",
+    "IsNull",
+    "IsTrue",
+    "IsUnknown",
+    "Join",
+    "JoinConstraint",
+    "JoinType",
+    "Like",
     "Limit",
-    "Aggregate",
+    "Literal",
+    "Literal",
+    "Negative",
+    "Not",
+    "Partitioning",
+    "Placeholder",
+    "Projection",
+    "Repartition",
+    "ScalarSubquery",
+    "ScalarVariable",
+    "SimilarTo",
     "Sort",
     "SortExpr",
-    "Analyze",
-    "EmptyRelation",
-    "Join",
-    "JoinType",
-    "JoinConstraint",
+    "Subquery",
+    "SubqueryAlias",
+    "TableScan",
+    "TryCast",
     "Union",
     "Unnest",
     "UnnestExpr",
-    "Extension",
-    "Filter",
-    "Projection",
-    "TableScan",
-    "CreateMemoryTable",
-    "CreateView",
-    "Distinct",
-    "SubqueryAlias",
-    "DropTable",
-    "Partitioning",
-    "Repartition",
     "Window",
     "WindowExpr",
     "WindowFrame",
@@ -311,7 +311,7 @@ def __getitem__(self, key: str | int) -> Expr:
             )
         return Expr(self.expr.__getitem__(key))
 
-    def __eq__(self, rhs: Any) -> Expr:
+    def __eq__(self, rhs: object) -> Expr:
         """Equal to.
 
         Accepts either an expression or any valid PyArrow scalar literal value.
@@ -320,7 +320,7 @@ def __eq__(self, rhs: Any) -> Expr:
             rhs = Expr.literal(rhs)
         return Expr(self.expr.__eq__(rhs.expr))
 
-    def __ne__(self, rhs: Any) -> Expr:
+    def __ne__(self, rhs: object) -> Expr:
         """Not equal to.
 
         Accepts either an expression or any valid PyArrow scalar literal value.
diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index b449c4868..0cc7434cf 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -18,13 +18,12 @@
 
 from __future__ import annotations
 
-from typing import Any, Optional
+from typing import TYPE_CHECKING, Any, Optional
 
 import pyarrow as pa
 
 from datafusion._internal import functions as f
 from datafusion.common import NullTreatment
-from datafusion.context import SessionContext
 from datafusion.expr import (
     CaseBuilder,
     Expr,
@@ -34,6 +33,9 @@
     sort_list_to_raw_sort_list,
 )
 
+if TYPE_CHECKING:
+    from datafusion.context import SessionContext
+
 __all__ = [
     "abs",
     "acos",
@@ -81,8 +83,8 @@
     "array_sort",
     "array_to_string",
     "array_union",
-    "arrow_typeof",
     "arrow_cast",
+    "arrow_typeof",
     "ascii",
     "asin",
     "asinh",
@@ -97,6 +99,7 @@
     "bool_and",
     "bool_or",
     "btrim",
+    "cardinality",
     "case",
     "cbrt",
     "ceil",
@@ -116,6 +119,7 @@
     "covar",
     "covar_pop",
     "covar_samp",
+    "cume_dist",
     "current_date",
     "current_time",
     "date_bin",
@@ -125,17 +129,17 @@
     "datetrunc",
     "decode",
     "degrees",
+    "dense_rank",
     "digest",
     "empty",
     "encode",
     "ends_with",
-    "extract",
     "exp",
+    "extract",
     "factorial",
     "find_in_set",
     "first_value",
     "flatten",
-    "cardinality",
     "floor",
     "from_unixtime",
     "gcd",
@@ -143,8 +147,10 @@
     "initcap",
     "isnan",
     "iszero",
+    "lag",
     "last_value",
     "lcm",
+    "lead",
     "left",
     "length",
     "levenshtein",
@@ -166,10 +172,10 @@
     "list_prepend",
     "list_push_back",
     "list_push_front",
-    "list_repeat",
     "list_remove",
     "list_remove_all",
     "list_remove_n",
+    "list_repeat",
     "list_replace",
     "list_replace_all",
     "list_replace_n",
@@ -180,14 +186,14 @@
     "list_union",
     "ln",
     "log",
-    "log10",
     "log2",
+    "log10",
     "lower",
     "lpad",
     "ltrim",
     "make_array",
-    "make_list",
     "make_date",
+    "make_list",
     "max",
     "md5",
     "mean",
@@ -195,19 +201,22 @@
     "min",
     "named_struct",
     "nanvl",
-    "nvl",
     "now",
     "nth_value",
+    "ntile",
     "nullif",
+    "nvl",
     "octet_length",
     "order_by",
     "overlay",
+    "percent_rank",
     "pi",
     "pow",
     "power",
     "radians",
     "random",
     "range",
+    "rank",
     "regexp_like",
     "regexp_match",
     "regexp_replace",
@@ -225,6 +234,7 @@
     "reverse",
     "right",
     "round",
+    "row_number",
     "rpad",
     "rtrim",
     "sha224",
@@ -252,8 +262,8 @@
     "to_hex",
     "to_timestamp",
     "to_timestamp_micros",
-    "to_timestamp_nanos",
     "to_timestamp_millis",
+    "to_timestamp_nanos",
     "to_timestamp_seconds",
     "to_unixtime",
     "translate",
@@ -268,14 +278,6 @@
     "when",
     # Window Functions
     "window",
-    "lead",
-    "lag",
-    "row_number",
-    "rank",
-    "dense_rank",
-    "percent_rank",
-    "cume_dist",
-    "ntile",
 ]
 
 
@@ -292,14 +294,14 @@ def nullif(expr1: Expr, expr2: Expr) -> Expr:
     return Expr(f.nullif(expr1.expr, expr2.expr))
 
 
-def encode(input: Expr, encoding: Expr) -> Expr:
+def encode(expr: Expr, encoding: Expr) -> Expr:
     """Encode the ``input``, using the ``encoding``. encoding can be base64 or hex."""
-    return Expr(f.encode(input.expr, encoding.expr))
+    return Expr(f.encode(expr.expr, encoding.expr))
 
 
-def decode(input: Expr, encoding: Expr) -> Expr:
+def decode(expr: Expr, encoding: Expr) -> Expr:
     """Decode the ``input``, using the ``encoding``. encoding can be base64 or hex."""
-    return Expr(f.decode(input.expr, encoding.expr))
+    return Expr(f.decode(expr.expr, encoding.expr))
 
 
 def array_to_string(expr: Expr, delimiter: Expr) -> Expr:
diff --git a/python/datafusion/input/__init__.py b/python/datafusion/input/__init__.py
index f85ce21f0..f0c1f42b4 100644
--- a/python/datafusion/input/__init__.py
+++ b/python/datafusion/input/__init__.py
@@ -23,5 +23,5 @@
 from .location import LocationInputPlugin
 
 __all__ = [
-    LocationInputPlugin,
+    "LocationInputPlugin",
 ]
diff --git a/python/datafusion/input/base.py b/python/datafusion/input/base.py
index 4eba19784..f67dde2a1 100644
--- a/python/datafusion/input/base.py
+++ b/python/datafusion/input/base.py
@@ -38,11 +38,9 @@ class BaseInputSource(ABC):
     """
 
     @abstractmethod
-    def is_correct_input(self, input_item: Any, table_name: str, **kwargs) -> bool:
+    def is_correct_input(self, input_item: Any, table_name: str, **kwargs: Any) -> bool:
         """Returns `True` if the input is valid."""
-        pass
 
     @abstractmethod
-    def build_table(self, input_item: Any, table_name: str, **kwarg) -> SqlTable:
+    def build_table(self, input_item: Any, table_name: str, **kwarg: Any) -> SqlTable:  # type: ignore[invalid-type-form]
         """Create a table from the input source."""
-        pass
diff --git a/python/datafusion/input/location.py b/python/datafusion/input/location.py
index 517cd1578..08d98d115 100644
--- a/python/datafusion/input/location.py
+++ b/python/datafusion/input/location.py
@@ -18,7 +18,7 @@
 """The default input source for DataFusion."""
 
 import glob
-import os
+from pathlib import Path
 from typing import Any
 
 from datafusion.common import DataTypeMap, SqlTable
@@ -31,7 +31,7 @@ class LocationInputPlugin(BaseInputSource):
     This can be read in from a file (on disk, remote etc.).
     """
 
-    def is_correct_input(self, input_item: Any, table_name: str, **kwargs):
+    def is_correct_input(self, input_item: Any, table_name: str, **kwargs: Any) -> bool:  # noqa: ARG002
         """Returns `True` if the input is valid."""
         return isinstance(input_item, str)
 
@@ -39,27 +39,28 @@ def build_table(
         self,
         input_item: str,
         table_name: str,
-        **kwargs,
-    ) -> SqlTable:
+        **kwargs: Any,  # noqa: ARG002
+    ) -> SqlTable:  # type: ignore[invalid-type-form]
         """Create a table from the input source."""
-        _, extension = os.path.splitext(input_item)
-        format = extension.lstrip(".").lower()
+        extension = Path(input_item).suffix
+        file_format = extension.lstrip(".").lower()
         num_rows = 0  # Total number of rows in the file. Used for statistics
         columns = []
-        if format == "parquet":
+        if file_format == "parquet":
             import pyarrow.parquet as pq
 
             # Read the Parquet metadata
             metadata = pq.read_metadata(input_item)
             num_rows = metadata.num_rows
             # Iterate through the schema and build the SqlTable
-            for col in metadata.schema:
-                columns.append(
-                    (
-                        col.name,
-                        DataTypeMap.from_parquet_type_str(col.physical_type),
-                    )
+            columns = [
+                (
+                    col.name,
+                    DataTypeMap.from_parquet_type_str(col.physical_type),
                 )
+                for col in metadata.schema
+            ]
+
         elif format == "csv":
             import csv
 
@@ -69,19 +70,18 @@ def build_table(
             # to get that information. However, this should only be occurring
             # at table creation time and therefore shouldn't
             # slow down query performance.
-            with open(input_item, "r") as file:
+            with Path(input_item).open() as file:
                 reader = csv.reader(file)
-                header_row = next(reader)
-                print(header_row)
+                _header_row = next(reader)
                 for _ in reader:
                     num_rows += 1
             # TODO: Need to actually consume this row into reasonable columns
-            raise RuntimeError("TODO: Currently unable to support CSV input files.")
+            msg = "TODO: Currently unable to support CSV input files."
+            raise RuntimeError(msg)
         else:
-            raise RuntimeError(
-                f"Input of format: `{format}` is currently not supported.\
+            msg = f"Input of format: `{format}` is currently not supported.\
                 Only Parquet and CSV."
-            )
+            raise RuntimeError(msg)
 
         # Input could possibly be multiple files. Create a list if so
         input_files = glob.glob(input_item)
diff --git a/python/datafusion/io.py b/python/datafusion/io.py
index 3b6264948..3e39703e3 100644
--- a/python/datafusion/io.py
+++ b/python/datafusion/io.py
@@ -19,15 +19,19 @@
 
 from __future__ import annotations
 
-import pathlib
-
-import pyarrow
+from typing import TYPE_CHECKING
 
 from datafusion.dataframe import DataFrame
-from datafusion.expr import Expr
 
 from ._internal import SessionContext as SessionContextInternal
 
+if TYPE_CHECKING:
+    import pathlib
+
+    import pyarrow as pa
+
+    from datafusion.expr import Expr
+
 
 def read_parquet(
     path: str | pathlib.Path,
@@ -35,7 +39,7 @@ def read_parquet(
     parquet_pruning: bool = True,
     file_extension: str = ".parquet",
     skip_metadata: bool = True,
-    schema: pyarrow.Schema | None = None,
+    schema: pa.Schema | None = None,
     file_sort_order: list[list[Expr]] | None = None,
 ) -> DataFrame:
     """Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`.
@@ -79,7 +83,7 @@ def read_parquet(
 
 def read_json(
     path: str | pathlib.Path,
-    schema: pyarrow.Schema | None = None,
+    schema: pa.Schema | None = None,
     schema_infer_max_records: int = 1000,
     file_extension: str = ".json",
     table_partition_cols: list[tuple[str, str]] | None = None,
@@ -120,7 +124,7 @@ def read_json(
 
 def read_csv(
     path: str | pathlib.Path | list[str] | list[pathlib.Path],
-    schema: pyarrow.Schema | None = None,
+    schema: pa.Schema | None = None,
     has_header: bool = True,
     delimiter: str = ",",
     schema_infer_max_records: int = 1000,
@@ -173,7 +177,7 @@ def read_csv(
 
 def read_avro(
     path: str | pathlib.Path,
-    schema: pyarrow.Schema | None = None,
+    schema: pa.Schema | None = None,
     file_partition_cols: list[tuple[str, str]] | None = None,
     file_extension: str = ".avro",
 ) -> DataFrame:
diff --git a/python/datafusion/object_store.py b/python/datafusion/object_store.py
index 7cc17506f..6298526f5 100644
--- a/python/datafusion/object_store.py
+++ b/python/datafusion/object_store.py
@@ -24,4 +24,4 @@
 MicrosoftAzure = object_store.MicrosoftAzure
 Http = object_store.Http
 
-__all__ = ["AmazonS3", "GoogleCloud", "LocalFileSystem", "MicrosoftAzure", "Http"]
+__all__ = ["AmazonS3", "GoogleCloud", "Http", "LocalFileSystem", "MicrosoftAzure"]
diff --git a/python/datafusion/plan.py b/python/datafusion/plan.py
index 133fc446d..0b7bebcb3 100644
--- a/python/datafusion/plan.py
+++ b/python/datafusion/plan.py
@@ -19,7 +19,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, List
+from typing import TYPE_CHECKING, Any
 
 import datafusion._internal as df_internal
 
@@ -27,8 +27,8 @@
     from datafusion.context import SessionContext
 
 __all__ = [
-    "LogicalPlan",
     "ExecutionPlan",
+    "LogicalPlan",
 ]
 
 
@@ -54,7 +54,7 @@ def to_variant(self) -> Any:
         """Convert the logical plan into its specific variant."""
         return self._raw_plan.to_variant()
 
-    def inputs(self) -> List[LogicalPlan]:
+    def inputs(self) -> list[LogicalPlan]:
         """Returns the list of inputs to the logical plan."""
         return [LogicalPlan(p) for p in self._raw_plan.inputs()]
 
@@ -106,7 +106,7 @@ def __init__(self, plan: df_internal.ExecutionPlan) -> None:
         """This constructor should not be called by the end user."""
         self._raw_plan = plan
 
-    def children(self) -> List[ExecutionPlan]:
+    def children(self) -> list[ExecutionPlan]:
         """Get a list of children `ExecutionPlan` that act as inputs to this plan.
 
         The returned list will be empty for leaf nodes such as scans, will contain a
diff --git a/python/datafusion/record_batch.py b/python/datafusion/record_batch.py
index 772cd9089..556eaa786 100644
--- a/python/datafusion/record_batch.py
+++ b/python/datafusion/record_batch.py
@@ -26,14 +26,14 @@
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    import pyarrow
+    import pyarrow as pa
     import typing_extensions
 
     import datafusion._internal as df_internal
 
 
 class RecordBatch:
-    """This class is essentially a wrapper for :py:class:`pyarrow.RecordBatch`."""
+    """This class is essentially a wrapper for :py:class:`pa.RecordBatch`."""
 
     def __init__(self, record_batch: df_internal.RecordBatch) -> None:
         """This constructor is generally not called by the end user.
@@ -42,8 +42,8 @@ def __init__(self, record_batch: df_internal.RecordBatch) -> None:
         """
         self.record_batch = record_batch
 
-    def to_pyarrow(self) -> pyarrow.RecordBatch:
-        """Convert to :py:class:`pyarrow.RecordBatch`."""
+    def to_pyarrow(self) -> pa.RecordBatch:
+        """Convert to :py:class:`pa.RecordBatch`."""
         return self.record_batch.to_pyarrow()
 
 
diff --git a/python/datafusion/substrait.py b/python/datafusion/substrait.py
index 06302fe38..f10adfb0c 100644
--- a/python/datafusion/substrait.py
+++ b/python/datafusion/substrait.py
@@ -23,7 +23,6 @@
 
 from __future__ import annotations
 
-import pathlib
 from typing import TYPE_CHECKING
 
 try:
@@ -36,11 +35,13 @@
 from ._internal import substrait as substrait_internal
 
 if TYPE_CHECKING:
+    import pathlib
+
     from datafusion.context import SessionContext
 
 __all__ = [
-    "Plan",
     "Consumer",
+    "Plan",
     "Producer",
     "Serde",
 ]
@@ -68,11 +69,9 @@ def encode(self) -> bytes:
 
 
 @deprecated("Use `Plan` instead.")
-class plan(Plan):
+class plan(Plan):  # noqa: N801
     """See `Plan`."""
 
-    pass
-
 
 class Serde:
     """Provides the ``Substrait`` serialization and deserialization."""
@@ -140,11 +139,9 @@ def deserialize_bytes(proto_bytes: bytes) -> Plan:
 
 
 @deprecated("Use `Serde` instead.")
-class serde(Serde):
+class serde(Serde):  # noqa: N801
     """See `Serde` instead."""
 
-    pass
-
 
 class Producer:
     """Generates substrait plans from a logical plan."""
@@ -168,11 +165,9 @@ def to_substrait_plan(logical_plan: LogicalPlan, ctx: SessionContext) -> Plan:
 
 
 @deprecated("Use `Producer` instead.")
-class producer(Producer):
+class producer(Producer):  # noqa: N801
     """Use `Producer` instead."""
 
-    pass
-
 
 class Consumer:
     """Generates a logical plan from a substrait plan."""
@@ -194,7 +189,5 @@ def from_substrait_plan(ctx: SessionContext, plan: Plan) -> LogicalPlan:
 
 
 @deprecated("Use `Consumer` instead.")
-class consumer(Consumer):
+class consumer(Consumer):  # noqa: N801
     """Use `Consumer` instead."""
-
-    pass
diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py
index af7bcf2ed..603b7063d 100644
--- a/python/datafusion/udf.py
+++ b/python/datafusion/udf.py
@@ -22,15 +22,15 @@
 import functools
 from abc import ABCMeta, abstractmethod
 from enum import Enum
-from typing import TYPE_CHECKING, Callable, List, Optional, TypeVar
+from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, overload
 
-import pyarrow
+import pyarrow as pa
 
 import datafusion._internal as df_internal
 from datafusion.expr import Expr
 
 if TYPE_CHECKING:
-    _R = TypeVar("_R", bound=pyarrow.DataType)
+    _R = TypeVar("_R", bound=pa.DataType)
 
 
 class Volatility(Enum):
@@ -72,7 +72,7 @@ class Volatility(Enum):
     for each output row, resulting in a unique random value for each row.
     """
 
-    def __str__(self):
+    def __str__(self) -> str:
         """Returns the string equivalent."""
         return self.name.lower()
 
@@ -88,7 +88,7 @@ def __init__(
         self,
         name: str,
         func: Callable[..., _R],
-        input_types: pyarrow.DataType | list[pyarrow.DataType],
+        input_types: pa.DataType | list[pa.DataType],
         return_type: _R,
         volatility: Volatility | str,
     ) -> None:
@@ -96,7 +96,7 @@ def __init__(
 
         See helper method :py:func:`udf` for argument details.
         """
-        if isinstance(input_types, pyarrow.DataType):
+        if isinstance(input_types, pa.DataType):
             input_types = [input_types]
         self._udf = df_internal.ScalarUDF(
             name, func, input_types, return_type, str(volatility)
@@ -111,7 +111,27 @@ def __call__(self, *args: Expr) -> Expr:
         args_raw = [arg.expr for arg in args]
         return Expr(self._udf.__call__(*args_raw))
 
-    class udf:
+    @overload
+    @staticmethod
+    def udf(
+        input_types: list[pa.DataType],
+        return_type: _R,
+        volatility: Volatility | str,
+        name: Optional[str] = None,
+    ) -> Callable[..., ScalarUDF]: ...
+
+    @overload
+    @staticmethod
+    def udf(
+        func: Callable[..., _R],
+        input_types: list[pa.DataType],
+        return_type: _R,
+        volatility: Volatility | str,
+        name: Optional[str] = None,
+    ) -> ScalarUDF: ...
+
+    @staticmethod
+    def udf(*args: Any, **kwargs: Any):  # noqa: D417
         """Create a new User-Defined Function (UDF).
 
         This class can be used both as a **function** and as a **decorator**.
@@ -125,7 +145,7 @@ class udf:
         Args:
             func (Callable, optional): **Only needed when calling as a function.**
                 Skip this argument when using `udf` as a decorator.
-            input_types (list[pyarrow.DataType]): The data types of the arguments
+            input_types (list[pa.DataType]): The data types of the arguments
                 to `func`. This list must be of the same length as the number of
                 arguments.
             return_type (_R): The data type of the return value from the function.
@@ -141,40 +161,28 @@ class udf:
             ```
             def double_func(x):
                 return x * 2
-            double_udf = udf(double_func, [pyarrow.int32()], pyarrow.int32(),
+            double_udf = udf(double_func, [pa.int32()], pa.int32(),
             "volatile", "double_it")
             ```
 
             **Using `udf` as a decorator:**
             ```
-            @udf([pyarrow.int32()], pyarrow.int32(), "volatile", "double_it")
+            @udf([pa.int32()], pa.int32(), "volatile", "double_it")
             def double_udf(x):
                 return x * 2
             ```
         """
 
-        def __new__(cls, *args, **kwargs):
-            """Create a new UDF.
-
-            Trigger UDF function or decorator depending on if the first args is callable
-            """
-            if args and callable(args[0]):
-                # Case 1: Used as a function, require the first parameter to be callable
-                return cls._function(*args, **kwargs)
-            else:
-                # Case 2: Used as a decorator with parameters
-                return cls._decorator(*args, **kwargs)
-
-        @staticmethod
         def _function(
             func: Callable[..., _R],
-            input_types: list[pyarrow.DataType],
+            input_types: list[pa.DataType],
             return_type: _R,
             volatility: Volatility | str,
             name: Optional[str] = None,
         ) -> ScalarUDF:
             if not callable(func):
-                raise TypeError("`func` argument must be callable")
+                msg = "`func` argument must be callable"
+                raise TypeError(msg)
             if name is None:
                 if hasattr(func, "__qualname__"):
                     name = func.__qualname__.lower()
@@ -188,49 +196,50 @@ def _function(
                 volatility=volatility,
             )
 
-        @staticmethod
         def _decorator(
-            input_types: list[pyarrow.DataType],
+            input_types: list[pa.DataType],
             return_type: _R,
             volatility: Volatility | str,
             name: Optional[str] = None,
-        ):
-            def decorator(func):
+        ) -> Callable:
+            def decorator(func: Callable):
                 udf_caller = ScalarUDF.udf(
                     func, input_types, return_type, volatility, name
                 )
 
                 @functools.wraps(func)
-                def wrapper(*args, **kwargs):
+                def wrapper(*args: Any, **kwargs: Any):
                     return udf_caller(*args, **kwargs)
 
                 return wrapper
 
             return decorator
 
+        if args and callable(args[0]):
+            # Case 1: Used as a function, require the first parameter to be callable
+            return _function(*args, **kwargs)
+        # Case 2: Used as a decorator with parameters
+        return _decorator(*args, **kwargs)
+
 
 class Accumulator(metaclass=ABCMeta):
     """Defines how an :py:class:`AggregateUDF` accumulates values."""
 
     @abstractmethod
-    def state(self) -> List[pyarrow.Scalar]:
+    def state(self) -> list[pa.Scalar]:
         """Return the current state."""
-        pass
 
     @abstractmethod
-    def update(self, *values: pyarrow.Array) -> None:
+    def update(self, *values: pa.Array) -> None:
         """Evaluate an array of values and update state."""
-        pass
 
     @abstractmethod
-    def merge(self, states: List[pyarrow.Array]) -> None:
+    def merge(self, states: list[pa.Array]) -> None:
         """Merge a set of states."""
-        pass
 
     @abstractmethod
-    def evaluate(self) -> pyarrow.Scalar:
+    def evaluate(self) -> pa.Scalar:
         """Return the resultant value."""
-        pass
 
 
 class AggregateUDF:
@@ -244,9 +253,9 @@ def __init__(
         self,
         name: str,
         accumulator: Callable[[], Accumulator],
-        input_types: list[pyarrow.DataType],
-        return_type: pyarrow.DataType,
-        state_type: list[pyarrow.DataType],
+        input_types: list[pa.DataType],
+        return_type: pa.DataType,
+        state_type: list[pa.DataType],
         volatility: Volatility | str,
     ) -> None:
         """Instantiate a user-defined aggregate function (UDAF).
@@ -272,7 +281,29 @@ def __call__(self, *args: Expr) -> Expr:
         args_raw = [arg.expr for arg in args]
         return Expr(self._udaf.__call__(*args_raw))
 
-    class udaf:
+    @overload
+    @staticmethod
+    def udaf(
+        input_types: pa.DataType | list[pa.DataType],
+        return_type: pa.DataType,
+        state_type: list[pa.DataType],
+        volatility: Volatility | str,
+        name: Optional[str] = None,
+    ) -> Callable[..., AggregateUDF]: ...
+
+    @overload
+    @staticmethod
+    def udaf(
+        accum: Callable[[], Accumulator],
+        input_types: pa.DataType | list[pa.DataType],
+        return_type: pa.DataType,
+        state_type: list[pa.DataType],
+        volatility: Volatility | str,
+        name: Optional[str] = None,
+    ) -> AggregateUDF: ...
+
+    @staticmethod
+    def udaf(*args: Any, **kwargs: Any):  # noqa: D417
         """Create a new User-Defined Aggregate Function (UDAF).
 
         This class allows you to define an **aggregate function** that can be used in
@@ -300,13 +331,13 @@ class Summarize(Accumulator):
                 def __init__(self, bias: float = 0.0):
                     self._sum = pa.scalar(bias)
 
-                def state(self) -> List[pa.Scalar]:
+                def state(self) -> list[pa.Scalar]:
                     return [self._sum]
 
                 def update(self, values: pa.Array) -> None:
                     self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py())
 
-                def merge(self, states: List[pa.Array]) -> None:
+                def merge(self, states: list[pa.Array]) -> None:
                     self._sum = pa.scalar(self._sum.as_py() + pc.sum(states[0]).as_py())
 
                 def evaluate(self) -> pa.Scalar:
@@ -344,37 +375,23 @@ def udf4() -> Summarize:
             aggregation or window function calls.
         """
 
-        def __new__(cls, *args, **kwargs):
-            """Create a new UDAF.
-
-            Trigger UDAF function or decorator depending on if the first args is
-            callable
-            """
-            if args and callable(args[0]):
-                # Case 1: Used as a function, require the first parameter to be callable
-                return cls._function(*args, **kwargs)
-            else:
-                # Case 2: Used as a decorator with parameters
-                return cls._decorator(*args, **kwargs)
-
-        @staticmethod
         def _function(
             accum: Callable[[], Accumulator],
-            input_types: pyarrow.DataType | list[pyarrow.DataType],
-            return_type: pyarrow.DataType,
-            state_type: list[pyarrow.DataType],
+            input_types: pa.DataType | list[pa.DataType],
+            return_type: pa.DataType,
+            state_type: list[pa.DataType],
             volatility: Volatility | str,
             name: Optional[str] = None,
         ) -> AggregateUDF:
             if not callable(accum):
-                raise TypeError("`func` must be callable.")
-            if not isinstance(accum.__call__(), Accumulator):
-                raise TypeError(
-                    "Accumulator must implement the abstract base class Accumulator"
-                )
+                msg = "`func` must be callable."
+                raise TypeError(msg)
+            if not isinstance(accum(), Accumulator):
+                msg = "Accumulator must implement the abstract base class Accumulator"
+                raise TypeError(msg)
             if name is None:
-                name = accum.__call__().__class__.__qualname__.lower()
-            if isinstance(input_types, pyarrow.DataType):
+                name = accum().__class__.__qualname__.lower()
+            if isinstance(input_types, pa.DataType):
                 input_types = [input_types]
             return AggregateUDF(
                 name=name,
@@ -385,29 +402,34 @@ def _function(
                 volatility=volatility,
             )
 
-        @staticmethod
         def _decorator(
-            input_types: pyarrow.DataType | list[pyarrow.DataType],
-            return_type: pyarrow.DataType,
-            state_type: list[pyarrow.DataType],
+            input_types: pa.DataType | list[pa.DataType],
+            return_type: pa.DataType,
+            state_type: list[pa.DataType],
             volatility: Volatility | str,
             name: Optional[str] = None,
-        ):
-            def decorator(accum: Callable[[], Accumulator]):
+        ) -> Callable[..., Callable[..., Expr]]:
+            def decorator(accum: Callable[[], Accumulator]) -> Callable[..., Expr]:
                 udaf_caller = AggregateUDF.udaf(
                     accum, input_types, return_type, state_type, volatility, name
                 )
 
                 @functools.wraps(accum)
-                def wrapper(*args, **kwargs):
+                def wrapper(*args: Any, **kwargs: Any) -> Expr:
                     return udaf_caller(*args, **kwargs)
 
                 return wrapper
 
             return decorator
 
+        if args and callable(args[0]):
+            # Case 1: Used as a function, require the first parameter to be callable
+            return _function(*args, **kwargs)
+        # Case 2: Used as a decorator with parameters
+        return _decorator(*args, **kwargs)
+
 
-class WindowEvaluator(metaclass=ABCMeta):
+class WindowEvaluator:
     """Evaluator class for user-defined window functions (UDWF).
 
     It is up to the user to decide which evaluate function is appropriate.
@@ -423,7 +445,7 @@ class WindowEvaluator(metaclass=ABCMeta):
     +------------------------+--------------------------------+------------------+---------------------------+
     | True                   | True/False                     | True/False       | ``evaluate``              |
     +------------------------+--------------------------------+------------------+---------------------------+
-    """  # noqa: W505
+    """  # noqa: W505, E501
 
     def memoize(self) -> None:
         """Perform a memoize operation to improve performance.
@@ -436,9 +458,8 @@ def memoize(self) -> None:
         `memoize` is called after each input batch is processed, and
         such functions can save whatever they need
         """
-        pass
 
-    def get_range(self, idx: int, num_rows: int) -> tuple[int, int]:
+    def get_range(self, idx: int, num_rows: int) -> tuple[int, int]:  # noqa: ARG002
         """Return the range for the window fuction.
 
         If `uses_window_frame` flag is `false`. This method is used to
@@ -460,14 +481,17 @@ def is_causal(self) -> bool:
         """Get whether evaluator needs future data for its result."""
         return False
 
-    def evaluate_all(self, values: list[pyarrow.Array], num_rows: int) -> pyarrow.Array:
+    def evaluate_all(self, values: list[pa.Array], num_rows: int) -> pa.Array:
         """Evaluate a window function on an entire input partition.
 
         This function is called once per input *partition* for window functions that
         *do not use* values from the window frame, such as
-        :py:func:`~datafusion.functions.row_number`, :py:func:`~datafusion.functions.rank`,
-        :py:func:`~datafusion.functions.dense_rank`, :py:func:`~datafusion.functions.percent_rank`,
-        :py:func:`~datafusion.functions.cume_dist`, :py:func:`~datafusion.functions.lead`,
+        :py:func:`~datafusion.functions.row_number`,
+        :py:func:`~datafusion.functions.rank`,
+        :py:func:`~datafusion.functions.dense_rank`,
+        :py:func:`~datafusion.functions.percent_rank`,
+        :py:func:`~datafusion.functions.cume_dist`,
+        :py:func:`~datafusion.functions.lead`,
         and :py:func:`~datafusion.functions.lag`.
 
         It produces the result of all rows in a single pass. It
@@ -499,12 +523,11 @@ def evaluate_all(self, values: list[pyarrow.Array], num_rows: int) -> pyarrow.Ar
         .. code-block:: text
 
             avg(x) OVER (PARTITION BY y ORDER BY z ROWS BETWEEN 2 PRECEDING AND 3 FOLLOWING)
-        """  # noqa: W505
-        pass
+        """  # noqa: W505, E501
 
     def evaluate(
-        self, values: list[pyarrow.Array], eval_range: tuple[int, int]
-    ) -> pyarrow.Scalar:
+        self, values: list[pa.Array], eval_range: tuple[int, int]
+    ) -> pa.Scalar:
         """Evaluate window function on a range of rows in an input partition.
 
         This is the simplest and most general function to implement
@@ -519,11 +542,10 @@ def evaluate(
         and evaluation results of ORDER BY expressions. If function has a
         single argument, `values[1..]` will contain ORDER BY expression results.
         """
-        pass
 
     def evaluate_all_with_rank(
         self, num_rows: int, ranks_in_partition: list[tuple[int, int]]
-    ) -> pyarrow.Array:
+    ) -> pa.Array:
         """Called for window functions that only need the rank of a row.
 
         Evaluate the partition evaluator against the partition using
@@ -552,7 +574,6 @@ def evaluate_all_with_rank(
 
         The user must implement this method if ``include_rank`` returns True.
         """
-        pass
 
     def supports_bounded_execution(self) -> bool:
         """Can the window function be incrementally computed using bounded memory?"""
@@ -567,10 +588,6 @@ def include_rank(self) -> bool:
         return False
 
 
-if TYPE_CHECKING:
-    _W = TypeVar("_W", bound=WindowEvaluator)
-
-
 class WindowUDF:
     """Class for performing window user-defined functions (UDF).
 
@@ -582,8 +599,8 @@ def __init__(
         self,
         name: str,
         func: Callable[[], WindowEvaluator],
-        input_types: list[pyarrow.DataType],
-        return_type: pyarrow.DataType,
+        input_types: list[pa.DataType],
+        return_type: pa.DataType,
         volatility: Volatility | str,
     ) -> None:
         """Instantiate a user-defined window function (UDWF).
@@ -607,8 +624,8 @@ def __call__(self, *args: Expr) -> Expr:
     @staticmethod
     def udwf(
         func: Callable[[], WindowEvaluator],
-        input_types: pyarrow.DataType | list[pyarrow.DataType],
-        return_type: pyarrow.DataType,
+        input_types: pa.DataType | list[pa.DataType],
+        return_type: pa.DataType,
         volatility: Volatility | str,
         name: Optional[str] = None,
     ) -> WindowUDF:
@@ -648,16 +665,16 @@ def bias_10() -> BiasedNumbers:
 
         Returns:
             A user-defined window function.
-        """  # noqa W505
+        """  # noqa: W505, E501
         if not callable(func):
-            raise TypeError("`func` must be callable.")
-        if not isinstance(func.__call__(), WindowEvaluator):
-            raise TypeError(
-                "`func` must implement the abstract base class WindowEvaluator"
-            )
+            msg = "`func` must be callable."
+            raise TypeError(msg)
+        if not isinstance(func(), WindowEvaluator):
+            msg = "`func` must implement the abstract base class WindowEvaluator"
+            raise TypeError(msg)
         if name is None:
-            name = func.__call__().__class__.__qualname__.lower()
-        if isinstance(input_types, pyarrow.DataType):
+            name = func().__class__.__qualname__.lower()
+        if isinstance(input_types, pa.DataType):
             input_types = [input_types]
         return WindowUDF(
             name=name,
@@ -666,3 +683,10 @@ def bias_10() -> BiasedNumbers:
             return_type=return_type,
             volatility=volatility,
         )
+
+
+# Convenience exports so we can import instead of treating as
+# variables at the package root
+udf = ScalarUDF.udf
+udaf = AggregateUDF.udaf
+udwf = WindowUDF.udwf
diff --git a/python/tests/generic.py b/python/tests/generic.py
index 0177e2df0..1b98fdf9e 100644
--- a/python/tests/generic.py
+++ b/python/tests/generic.py
@@ -16,6 +16,7 @@
 # under the License.
 
 import datetime
+from datetime import timezone
 
 import numpy as np
 import pyarrow as pa
@@ -26,29 +27,29 @@
 
 
 def data():
-    np.random.seed(1)
+    rng = np.random.default_rng(1)
     data = np.concatenate(
         [
-            np.random.normal(0, 0.01, size=50),
-            np.random.normal(50, 0.01, size=50),
+            rng.normal(0, 0.01, size=50),
+            rng.normal(50, 0.01, size=50),
         ]
     )
     return pa.array(data)
 
 
 def data_with_nans():
-    np.random.seed(0)
-    data = np.random.normal(0, 0.01, size=50)
-    mask = np.random.randint(0, 2, size=50)
+    rng = np.random.default_rng(0)
+    data = rng.normal(0, 0.01, size=50)
+    mask = rng.normal(0, 2, size=50)
     data[mask == 0] = np.nan
     return data
 
 
 def data_datetime(f):
     data = [
-        datetime.datetime.now(),
-        datetime.datetime.now() - datetime.timedelta(days=1),
-        datetime.datetime.now() + datetime.timedelta(days=1),
+        datetime.datetime.now(tz=timezone.utc),
+        datetime.datetime.now(tz=timezone.utc) - datetime.timedelta(days=1),
+        datetime.datetime.now(tz=timezone.utc) + datetime.timedelta(days=1),
     ]
     return pa.array(data, type=pa.timestamp(f), mask=np.array([False, True, False]))
 
diff --git a/python/tests/test_aggregation.py b/python/tests/test_aggregation.py
index 5ef46131b..61b1c7d80 100644
--- a/python/tests/test_aggregation.py
+++ b/python/tests/test_aggregation.py
@@ -66,7 +66,7 @@ def df_aggregate_100():
 
 
 @pytest.mark.parametrize(
-    "agg_expr, calc_expected",
+    ("agg_expr", "calc_expected"),
     [
         (f.avg(column("a")), lambda a, b, c, d: np.array(np.average(a))),
         (
@@ -114,7 +114,7 @@ def test_aggregation_stats(df, agg_expr, calc_expected):
 
 
 @pytest.mark.parametrize(
-    "agg_expr, expected, array_sort",
+    ("agg_expr", "expected", "array_sort"),
     [
         (f.approx_distinct(column("b")), pa.array([2], type=pa.uint64()), False),
         (
@@ -182,12 +182,11 @@ def test_aggregation(df, agg_expr, expected, array_sort):
     agg_df.show()
     result = agg_df.collect()[0]
 
-    print(result)
     assert result.column(0) == expected
 
 
 @pytest.mark.parametrize(
-    "name,expr,expected",
+    ("name", "expr", "expected"),
     [
         (
             "approx_percentile_cont",
@@ -299,7 +298,9 @@ def test_aggregate_100(df_aggregate_100, name, expr, expected):
 ]
 
 
-@pytest.mark.parametrize("name,expr,result", data_test_bitwise_and_boolean_functions)
+@pytest.mark.parametrize(
+    ("name", "expr", "result"), data_test_bitwise_and_boolean_functions
+)
 def test_bit_and_bool_fns(df, name, expr, result):
     df = df.aggregate([], [expr.alias(name)])
 
@@ -311,7 +312,7 @@ def test_bit_and_bool_fns(df, name, expr, result):
 
 
 @pytest.mark.parametrize(
-    "name,expr,result",
+    ("name", "expr", "result"),
     [
         ("first_value", f.first_value(column("a")), [0, 4]),
         (
@@ -361,7 +362,6 @@ def test_bit_and_bool_fns(df, name, expr, result):
             ),
             [8, 9],
         ),
-        ("first_value", f.first_value(column("a")), [0, 4]),
         (
             "nth_value_ordered",
             f.nth_value(column("a"), 2, order_by=[column("a").sort(ascending=False)]),
@@ -401,7 +401,7 @@ def test_first_last_value(df_partitioned, name, expr, result) -> None:
 
 
 @pytest.mark.parametrize(
-    "name,expr,result",
+    ("name", "expr", "result"),
     [
         ("string_agg", f.string_agg(column("a"), ","), "one,two,three,two"),
         ("string_agg", f.string_agg(column("b"), ""), "03124"),
diff --git a/python/tests/test_catalog.py b/python/tests/test_catalog.py
index 214f6b165..23b328458 100644
--- a/python/tests/test_catalog.py
+++ b/python/tests/test_catalog.py
@@ -19,6 +19,9 @@
 import pytest
 
 
+# Note we take in `database` as a variable even though we don't use
+# it because that will cause the fixture to set up the context with
+# the tables we need.
 def test_basic(ctx, database):
     with pytest.raises(KeyError):
         ctx.catalog("non-existent")
@@ -26,10 +29,10 @@ def test_basic(ctx, database):
     default = ctx.catalog()
     assert default.names() == ["public"]
 
-    for database in [default.database("public"), default.database()]:
-        assert database.names() == {"csv1", "csv", "csv2"}
+    for db in [default.database("public"), default.database()]:
+        assert db.names() == {"csv1", "csv", "csv2"}
 
-    table = database.table("csv")
+    table = db.table("csv")
     assert table.kind == "physical"
     assert table.schema == pa.schema(
         [
diff --git a/python/tests/test_context.py b/python/tests/test_context.py
index 91046e6b8..7a0a7aa08 100644
--- a/python/tests/test_context.py
+++ b/python/tests/test_context.py
@@ -16,7 +16,6 @@
 # under the License.
 import datetime as dt
 import gzip
-import os
 import pathlib
 
 import pyarrow as pa
@@ -45,7 +44,7 @@ def test_create_context_runtime_config_only():
     SessionContext(runtime=RuntimeEnvBuilder())
 
 
-@pytest.mark.parametrize("path_to_str", (True, False))
+@pytest.mark.parametrize("path_to_str", [True, False])
 def test_runtime_configs(tmp_path, path_to_str):
     path1 = tmp_path / "dir1"
     path2 = tmp_path / "dir2"
@@ -62,7 +61,7 @@ def test_runtime_configs(tmp_path, path_to_str):
     assert db is not None
 
 
-@pytest.mark.parametrize("path_to_str", (True, False))
+@pytest.mark.parametrize("path_to_str", [True, False])
 def test_temporary_files(tmp_path, path_to_str):
     path = str(tmp_path) if path_to_str else tmp_path
 
@@ -79,14 +78,14 @@ def test_create_context_with_all_valid_args():
     runtime = RuntimeEnvBuilder().with_disk_manager_os().with_fair_spill_pool(10000000)
     config = (
         SessionConfig()
-        .with_create_default_catalog_and_schema(True)
+        .with_create_default_catalog_and_schema(enabled=True)
         .with_default_catalog_and_schema("foo", "bar")
         .with_target_partitions(1)
-        .with_information_schema(True)
-        .with_repartition_joins(False)
-        .with_repartition_aggregations(False)
-        .with_repartition_windows(False)
-        .with_parquet_pruning(False)
+        .with_information_schema(enabled=True)
+        .with_repartition_joins(enabled=False)
+        .with_repartition_aggregations(enabled=False)
+        .with_repartition_windows(enabled=False)
+        .with_parquet_pruning(enabled=False)
     )
 
     ctx = SessionContext(config, runtime)
@@ -167,7 +166,7 @@ def test_from_arrow_table(ctx):
 
 def record_batch_generator(num_batches: int):
     schema = pa.schema([("a", pa.int64()), ("b", pa.int64())])
-    for i in range(num_batches):
+    for _i in range(num_batches):
         yield pa.RecordBatch.from_arrays(
             [pa.array([1, 2, 3]), pa.array([4, 5, 6])], schema=schema
         )
@@ -492,10 +491,10 @@ def test_table_not_found(ctx):
 
 
 def test_read_json(ctx):
-    path = os.path.dirname(os.path.abspath(__file__))
+    path = pathlib.Path(__file__).parent.resolve()
 
     # Default
-    test_data_path = os.path.join(path, "data_test_context", "data.json")
+    test_data_path = path / "data_test_context" / "data.json"
     df = ctx.read_json(test_data_path)
     result = df.collect()
 
@@ -515,7 +514,7 @@ def test_read_json(ctx):
     assert result[0].schema == schema
 
     # File extension
-    test_data_path = os.path.join(path, "data_test_context", "data.json")
+    test_data_path = path / "data_test_context" / "data.json"
     df = ctx.read_json(test_data_path, file_extension=".json")
     result = df.collect()
 
@@ -524,15 +523,17 @@ def test_read_json(ctx):
 
 
 def test_read_json_compressed(ctx, tmp_path):
-    path = os.path.dirname(os.path.abspath(__file__))
-    test_data_path = os.path.join(path, "data_test_context", "data.json")
+    path = pathlib.Path(__file__).parent.resolve()
+    test_data_path = path / "data_test_context" / "data.json"
 
     # File compression type
     gzip_path = tmp_path / "data.json.gz"
 
-    with open(test_data_path, "rb") as csv_file:
-        with gzip.open(gzip_path, "wb") as gzipped_file:
-            gzipped_file.writelines(csv_file)
+    with (
+        pathlib.Path.open(test_data_path, "rb") as csv_file,
+        gzip.open(gzip_path, "wb") as gzipped_file,
+    ):
+        gzipped_file.writelines(csv_file)
 
     df = ctx.read_json(gzip_path, file_extension=".gz", file_compression_type="gz")
     result = df.collect()
@@ -563,14 +564,16 @@ def test_read_csv_list(ctx):
 
 
 def test_read_csv_compressed(ctx, tmp_path):
-    test_data_path = "testing/data/csv/aggregate_test_100.csv"
+    test_data_path = pathlib.Path("testing/data/csv/aggregate_test_100.csv")
 
     # File compression type
     gzip_path = tmp_path / "aggregate_test_100.csv.gz"
 
-    with open(test_data_path, "rb") as csv_file:
-        with gzip.open(gzip_path, "wb") as gzipped_file:
-            gzipped_file.writelines(csv_file)
+    with (
+        pathlib.Path.open(test_data_path, "rb") as csv_file,
+        gzip.open(gzip_path, "wb") as gzipped_file,
+    ):
+        gzipped_file.writelines(csv_file)
 
     csv_df = ctx.read_csv(gzip_path, file_extension=".gz", file_compression_type="gz")
     csv_df.select(column("c1")).show()
@@ -603,7 +606,7 @@ def test_create_sql_options():
 def test_sql_with_options_no_ddl(ctx):
     sql = "CREATE TABLE IF NOT EXISTS valuetable AS VALUES(1,'HELLO'),(12,'DATAFUSION')"
     ctx.sql(sql)
-    options = SQLOptions().with_allow_ddl(False)
+    options = SQLOptions().with_allow_ddl(allow=False)
     with pytest.raises(Exception, match="DDL"):
         ctx.sql_with_options(sql, options=options)
 
@@ -618,7 +621,7 @@ def test_sql_with_options_no_dml(ctx):
     ctx.register_dataset(table_name, dataset)
     sql = f'INSERT INTO "{table_name}" VALUES (1, 2), (2, 3);'
     ctx.sql(sql)
-    options = SQLOptions().with_allow_dml(False)
+    options = SQLOptions().with_allow_dml(allow=False)
     with pytest.raises(Exception, match="DML"):
         ctx.sql_with_options(sql, options=options)
 
@@ -626,6 +629,6 @@ def test_sql_with_options_no_dml(ctx):
 def test_sql_with_options_no_statements(ctx):
     sql = "SET time zone = 1;"
     ctx.sql(sql)
-    options = SQLOptions().with_allow_statements(False)
+    options = SQLOptions().with_allow_statements(allow=False)
     with pytest.raises(Exception, match="SetVariable"):
         ctx.sql_with_options(sql, options=options)
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index c636e896a..d084f12dd 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -339,7 +339,7 @@ def test_join():
 
     # Verify we don't make a breaking change to pre-43.0.0
     # where users would pass join_keys as a positional argument
-    df2 = df.join(df1, (["a"], ["a"]), how="inner")  # type: ignore
+    df2 = df.join(df1, (["a"], ["a"]), how="inner")
     df2.show()
     df2 = df2.sort(column("l.a"))
     table = pa.Table.from_batches(df2.collect())
@@ -375,17 +375,17 @@ def test_join_invalid_params():
     with pytest.raises(
         ValueError, match=r"`left_on` or `right_on` should not provided with `on`"
     ):
-        df2 = df.join(df1, on="a", how="inner", right_on="test")  # type: ignore
+        df2 = df.join(df1, on="a", how="inner", right_on="test")
 
     with pytest.raises(
         ValueError, match=r"`left_on` and `right_on` should both be provided."
     ):
-        df2 = df.join(df1, left_on="a", how="inner")  # type: ignore
+        df2 = df.join(df1, left_on="a", how="inner")
 
     with pytest.raises(
         ValueError, match=r"either `on` or `left_on` and `right_on` should be provided."
     ):
-        df2 = df.join(df1, how="inner")  # type: ignore
+        df2 = df.join(df1, how="inner")
 
 
 def test_join_on():
@@ -567,7 +567,7 @@ def test_distinct():
 ]
 
 
-@pytest.mark.parametrize("name,expr,result", data_test_window_functions)
+@pytest.mark.parametrize(("name", "expr", "result"), data_test_window_functions)
 def test_window_functions(partitioned_df, name, expr, result):
     df = partitioned_df.select(
         column("a"), column("b"), column("c"), f.alias(expr, name)
@@ -731,7 +731,7 @@ def test_execution_plan(aggregate_df):
     plan = aggregate_df.execution_plan()
 
     expected = (
-        "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[sum(test.c2)]\n"  # noqa: E501
+        "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1], aggr=[sum(test.c2)]\n"
     )
 
     assert expected == plan.display()
@@ -756,7 +756,7 @@ def test_execution_plan(aggregate_df):
 
     ctx = SessionContext()
     rows_returned = 0
-    for idx in range(0, plan.partition_count):
+    for idx in range(plan.partition_count):
         stream = ctx.execute(plan, idx)
         try:
             batch = stream.next()
@@ -885,7 +885,7 @@ def test_union_distinct(ctx):
     )
     df_c = ctx.create_dataframe([[batch]]).sort(column("a"))
 
-    df_a_u_b = df_a.union(df_b, True).sort(column("a"))
+    df_a_u_b = df_a.union(df_b, distinct=True).sort(column("a"))
 
     assert df_c.collect() == df_a_u_b.collect()
     assert df_c.collect() == df_a_u_b.collect()
@@ -954,8 +954,6 @@ def test_to_arrow_table(df):
 
 def test_execute_stream(df):
     stream = df.execute_stream()
-    for s in stream:
-        print(type(s))
     assert all(batch is not None for batch in stream)
     assert not list(stream)  # after one iteration the generator must be exhausted
 
@@ -969,7 +967,7 @@ def test_execute_stream_to_arrow_table(df, schema):
             (batch.to_pyarrow() for batch in stream), schema=df.schema()
         )
     else:
-        pyarrow_table = pa.Table.from_batches((batch.to_pyarrow() for batch in stream))
+        pyarrow_table = pa.Table.from_batches(batch.to_pyarrow() for batch in stream)
 
     assert isinstance(pyarrow_table, pa.Table)
     assert pyarrow_table.shape == (3, 3)
@@ -1033,7 +1031,7 @@ def test_describe(df):
     }
 
 
-@pytest.mark.parametrize("path_to_str", (True, False))
+@pytest.mark.parametrize("path_to_str", [True, False])
 def test_write_csv(ctx, df, tmp_path, path_to_str):
     path = str(tmp_path) if path_to_str else tmp_path
 
@@ -1046,7 +1044,7 @@ def test_write_csv(ctx, df, tmp_path, path_to_str):
     assert result == expected
 
 
-@pytest.mark.parametrize("path_to_str", (True, False))
+@pytest.mark.parametrize("path_to_str", [True, False])
 def test_write_json(ctx, df, tmp_path, path_to_str):
     path = str(tmp_path) if path_to_str else tmp_path
 
@@ -1059,7 +1057,7 @@ def test_write_json(ctx, df, tmp_path, path_to_str):
     assert result == expected
 
 
-@pytest.mark.parametrize("path_to_str", (True, False))
+@pytest.mark.parametrize("path_to_str", [True, False])
 def test_write_parquet(df, tmp_path, path_to_str):
     path = str(tmp_path) if path_to_str else tmp_path
 
@@ -1071,7 +1069,7 @@ def test_write_parquet(df, tmp_path, path_to_str):
 
 
 @pytest.mark.parametrize(
-    "compression, compression_level",
+    ("compression", "compression_level"),
     [("gzip", 6), ("brotli", 7), ("zstd", 15)],
 )
 def test_write_compressed_parquet(df, tmp_path, compression, compression_level):
@@ -1082,7 +1080,7 @@ def test_write_compressed_parquet(df, tmp_path, compression, compression_level):
     )
 
     # test that the actual compression scheme is the one written
-    for root, dirs, files in os.walk(path):
+    for _root, _dirs, files in os.walk(path):
         for file in files:
             if file.endswith(".parquet"):
                 metadata = pq.ParquetFile(tmp_path / file).metadata.to_dict()
@@ -1097,7 +1095,7 @@ def test_write_compressed_parquet(df, tmp_path, compression, compression_level):
 
 
 @pytest.mark.parametrize(
-    "compression, compression_level",
+    ("compression", "compression_level"),
     [("gzip", 12), ("brotli", 15), ("zstd", 23), ("wrong", 12)],
 )
 def test_write_compressed_parquet_wrong_compression_level(
@@ -1152,7 +1150,7 @@ def test_dataframe_export(df) -> None:
     table = pa.table(df, schema=desired_schema)
     assert table.num_columns == 1
     assert table.num_rows == 3
-    for i in range(0, 3):
+    for i in range(3):
         assert table[0][i].as_py() is None
 
     # Expect an error when we cannot convert schema
@@ -1186,8 +1184,8 @@ def add_with_parameter(df_internal, value: Any) -> DataFrame:
     result = df.to_pydict()
 
     assert result["a"] == [1, 2, 3]
-    assert result["string_col"] == ["string data" for _i in range(0, 3)]
-    assert result["new_col"] == [3 for _i in range(0, 3)]
+    assert result["string_col"] == ["string data" for _i in range(3)]
+    assert result["new_col"] == [3 for _i in range(3)]
 
 
 def test_dataframe_repr_html(df) -> None:
diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py
index 354c7e180..926e69845 100644
--- a/python/tests/test_expr.py
+++ b/python/tests/test_expr.py
@@ -85,18 +85,14 @@ def test_limit(test_ctx):
 
     plan = plan.to_variant()
     assert isinstance(plan, Limit)
-    # TODO: Upstream now has expressions for skip and fetch
-    # REF: https://github.com/apache/datafusion/pull/12836
-    # assert plan.skip() == 0
+    assert "Skip: None" in str(plan)
 
     df = test_ctx.sql("select c1 from test LIMIT 10 OFFSET 5")
     plan = df.logical_plan()
 
     plan = plan.to_variant()
     assert isinstance(plan, Limit)
-    # TODO: Upstream now has expressions for skip and fetch
-    # REF: https://github.com/apache/datafusion/pull/12836
-    # assert plan.skip() == 5
+    assert "Skip: Some(Literal(Int64(5)))" in str(plan)
 
 
 def test_aggregate_query(test_ctx):
@@ -165,6 +161,7 @@ def traverse_logical_plan(plan):
                 res = traverse_logical_plan(input_plan)
                 if res is not None:
                     return res
+        return None
 
     ctx = SessionContext()
     data = {"id": [1, 2, 3], "name": ["Alice", "Bob", "Charlie"]}
@@ -176,7 +173,7 @@ def traverse_logical_plan(plan):
     assert variant.expr().to_variant().qualified_name() == "table1.name"
     assert (
         str(variant.list())
-        == '[Expr(Utf8("dfa")), Expr(Utf8("ad")), Expr(Utf8("dfre")), Expr(Utf8("vsa"))]'
+        == '[Expr(Utf8("dfa")), Expr(Utf8("ad")), Expr(Utf8("dfre")), Expr(Utf8("vsa"))]'  # noqa: E501
     )
     assert not variant.negated()
 
diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py
index fca05bb8f..ed88a16e3 100644
--- a/python/tests/test_functions.py
+++ b/python/tests/test_functions.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 import math
-from datetime import datetime
+from datetime import datetime, timezone
 
 import numpy as np
 import pyarrow as pa
@@ -25,6 +25,8 @@
 
 np.seterr(invalid="ignore")
 
+DEFAULT_TZ = timezone.utc
+
 
 @pytest.fixture
 def df():
@@ -37,9 +39,9 @@ def df():
             pa.array(["hello ", " world ", " !"], type=pa.string_view()),
             pa.array(
                 [
-                    datetime(2022, 12, 31),
-                    datetime(2027, 6, 26),
-                    datetime(2020, 7, 2),
+                    datetime(2022, 12, 31, tzinfo=DEFAULT_TZ),
+                    datetime(2027, 6, 26, tzinfo=DEFAULT_TZ),
+                    datetime(2020, 7, 2, tzinfo=DEFAULT_TZ),
                 ]
             ),
             pa.array([False, True, True]),
@@ -221,12 +223,12 @@ def py_indexof(arr, v):
 def py_arr_remove(arr, v, n=None):
     new_arr = arr[:]
     found = 0
-    while found != n:
-        try:
+    try:
+        while found != n:
             new_arr.remove(v)
             found += 1
-        except ValueError:
-            break
+    except ValueError:
+        pass
 
     return new_arr
 
@@ -234,13 +236,13 @@ def py_arr_remove(arr, v, n=None):
 def py_arr_replace(arr, from_, to, n=None):
     new_arr = arr[:]
     found = 0
-    while found != n:
-        try:
+    try:
+        while found != n:
             idx = new_arr.index(from_)
             new_arr[idx] = to
             found += 1
-        except ValueError:
-            break
+    except ValueError:
+        pass
 
     return new_arr
 
@@ -268,266 +270,266 @@ def py_flatten(arr):
 @pytest.mark.parametrize(
     ("stmt", "py_expr"),
     [
-        [
+        (
             lambda col: f.array_append(col, literal(99.0)),
             lambda data: [np.append(arr, 99.0) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_push_back(col, literal(99.0)),
             lambda data: [np.append(arr, 99.0) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_append(col, literal(99.0)),
             lambda data: [np.append(arr, 99.0) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_push_back(col, literal(99.0)),
             lambda data: [np.append(arr, 99.0) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_concat(col, col),
             lambda data: [np.concatenate([arr, arr]) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_cat(col, col),
             lambda data: [np.concatenate([arr, arr]) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_cat(col, col),
             lambda data: [np.concatenate([arr, arr]) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_concat(col, col),
             lambda data: [np.concatenate([arr, arr]) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_dims(col),
             lambda data: [[len(r)] for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_distinct(col),
             lambda data: [list(set(r)) for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_distinct(col),
             lambda data: [list(set(r)) for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_dims(col),
             lambda data: [[len(r)] for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_element(col, literal(1)),
             lambda data: [r[0] for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_empty(col),
             lambda data: [len(r) == 0 for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.empty(col),
             lambda data: [len(r) == 0 for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_extract(col, literal(1)),
             lambda data: [r[0] for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_element(col, literal(1)),
             lambda data: [r[0] for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_extract(col, literal(1)),
             lambda data: [r[0] for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_length(col),
             lambda data: [len(r) for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_length(col),
             lambda data: [len(r) for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_has(col, literal(1.0)),
             lambda data: [1.0 in r for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_has_all(
                 col, f.make_array(*[literal(v) for v in [1.0, 3.0, 5.0]])
             ),
             lambda data: [np.all([v in r for v in [1.0, 3.0, 5.0]]) for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_has_any(
                 col, f.make_array(*[literal(v) for v in [1.0, 3.0, 5.0]])
             ),
             lambda data: [np.any([v in r for v in [1.0, 3.0, 5.0]]) for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_position(col, literal(1.0)),
             lambda data: [py_indexof(r, 1.0) for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_indexof(col, literal(1.0)),
             lambda data: [py_indexof(r, 1.0) for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_position(col, literal(1.0)),
             lambda data: [py_indexof(r, 1.0) for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_indexof(col, literal(1.0)),
             lambda data: [py_indexof(r, 1.0) for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_positions(col, literal(1.0)),
             lambda data: [[i + 1 for i, _v in enumerate(r) if _v == 1.0] for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_positions(col, literal(1.0)),
             lambda data: [[i + 1 for i, _v in enumerate(r) if _v == 1.0] for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_ndims(col),
             lambda data: [np.array(r).ndim for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_ndims(col),
             lambda data: [np.array(r).ndim for r in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_prepend(literal(99.0), col),
             lambda data: [np.insert(arr, 0, 99.0) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_push_front(literal(99.0), col),
             lambda data: [np.insert(arr, 0, 99.0) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_prepend(literal(99.0), col),
             lambda data: [np.insert(arr, 0, 99.0) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_push_front(literal(99.0), col),
             lambda data: [np.insert(arr, 0, 99.0) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_pop_back(col),
             lambda data: [arr[:-1] for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_pop_front(col),
             lambda data: [arr[1:] for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_remove(col, literal(3.0)),
             lambda data: [py_arr_remove(arr, 3.0, 1) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_remove(col, literal(3.0)),
             lambda data: [py_arr_remove(arr, 3.0, 1) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_remove_n(col, literal(3.0), literal(2)),
             lambda data: [py_arr_remove(arr, 3.0, 2) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_remove_n(col, literal(3.0), literal(2)),
             lambda data: [py_arr_remove(arr, 3.0, 2) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_remove_all(col, literal(3.0)),
             lambda data: [py_arr_remove(arr, 3.0) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_remove_all(col, literal(3.0)),
             lambda data: [py_arr_remove(arr, 3.0) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_repeat(col, literal(2)),
             lambda data: [[arr] * 2 for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_repeat(col, literal(2)),
             lambda data: [[arr] * 2 for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_replace(col, literal(3.0), literal(4.0)),
             lambda data: [py_arr_replace(arr, 3.0, 4.0, 1) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_replace(col, literal(3.0), literal(4.0)),
             lambda data: [py_arr_replace(arr, 3.0, 4.0, 1) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_replace_n(col, literal(3.0), literal(4.0), literal(1)),
             lambda data: [py_arr_replace(arr, 3.0, 4.0, 1) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_replace_n(col, literal(3.0), literal(4.0), literal(2)),
             lambda data: [py_arr_replace(arr, 3.0, 4.0, 2) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_replace_all(col, literal(3.0), literal(4.0)),
             lambda data: [py_arr_replace(arr, 3.0, 4.0) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_replace_all(col, literal(3.0), literal(4.0)),
             lambda data: [py_arr_replace(arr, 3.0, 4.0) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_sort(col, descending=True, null_first=True),
             lambda data: [np.sort(arr)[::-1] for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_sort(col, descending=False, null_first=False),
             lambda data: [np.sort(arr) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_slice(col, literal(2), literal(4)),
             lambda data: [arr[1:4] for arr in data],
-        ],
+        ),
         pytest.param(
             lambda col: f.list_slice(col, literal(-1), literal(2)),
             lambda data: [arr[-1:2] for arr in data],
         ),
-        [
+        (
             lambda col: f.array_intersect(col, literal([3.0, 4.0])),
             lambda data: [np.intersect1d(arr, [3.0, 4.0]) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_intersect(col, literal([3.0, 4.0])),
             lambda data: [np.intersect1d(arr, [3.0, 4.0]) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_union(col, literal([12.0, 999.0])),
             lambda data: [np.union1d(arr, [12.0, 999.0]) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_union(col, literal([12.0, 999.0])),
             lambda data: [np.union1d(arr, [12.0, 999.0]) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_except(col, literal([3.0])),
             lambda data: [np.setdiff1d(arr, [3.0]) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_except(col, literal([3.0])),
             lambda data: [np.setdiff1d(arr, [3.0]) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.array_resize(col, literal(10), literal(0.0)),
             lambda data: [py_arr_resize(arr, 10, 0.0) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.list_resize(col, literal(10), literal(0.0)),
             lambda data: [py_arr_resize(arr, 10, 0.0) for arr in data],
-        ],
-        [
+        ),
+        (
             lambda col: f.range(literal(1), literal(5), literal(2)),
             lambda data: [np.arange(1, 5, 2)],
-        ],
+        ),
     ],
 )
 def test_array_functions(stmt, py_expr):
@@ -611,22 +613,22 @@ def test_make_array_functions(make_func):
 @pytest.mark.parametrize(
     ("stmt", "py_expr"),
     [
-        [
+        (
             f.array_to_string(column("arr"), literal(",")),
             lambda data: [",".join([str(int(v)) for v in r]) for r in data],
-        ],
-        [
+        ),
+        (
             f.array_join(column("arr"), literal(",")),
             lambda data: [",".join([str(int(v)) for v in r]) for r in data],
-        ],
-        [
+        ),
+        (
             f.list_to_string(column("arr"), literal(",")),
             lambda data: [",".join([str(int(v)) for v in r]) for r in data],
-        ],
-        [
+        ),
+        (
             f.list_join(column("arr"), literal(",")),
             lambda data: [",".join([str(int(v)) for v in r]) for r in data],
-        ],
+        ),
     ],
 )
 def test_array_function_obj_tests(stmt, py_expr):
@@ -640,7 +642,7 @@ def test_array_function_obj_tests(stmt, py_expr):
 
 
 @pytest.mark.parametrize(
-    "function, expected_result",
+    ("function", "expected_result"),
     [
         (
             f.ascii(column("a")),
@@ -894,54 +896,72 @@ def test_temporal_functions(df):
     assert result.column(0) == pa.array([12, 6, 7], type=pa.int32())
     assert result.column(1) == pa.array([2022, 2027, 2020], type=pa.int32())
     assert result.column(2) == pa.array(
-        [datetime(2022, 12, 1), datetime(2027, 6, 1), datetime(2020, 7, 1)],
-        type=pa.timestamp("us"),
+        [
+            datetime(2022, 12, 1, tzinfo=DEFAULT_TZ),
+            datetime(2027, 6, 1, tzinfo=DEFAULT_TZ),
+            datetime(2020, 7, 1, tzinfo=DEFAULT_TZ),
+        ],
+        type=pa.timestamp("ns", tz=DEFAULT_TZ),
     )
     assert result.column(3) == pa.array(
-        [datetime(2022, 12, 31), datetime(2027, 6, 26), datetime(2020, 7, 2)],
-        type=pa.timestamp("us"),
+        [
+            datetime(2022, 12, 31, tzinfo=DEFAULT_TZ),
+            datetime(2027, 6, 26, tzinfo=DEFAULT_TZ),
+            datetime(2020, 7, 2, tzinfo=DEFAULT_TZ),
+        ],
+        type=pa.timestamp("ns", tz=DEFAULT_TZ),
     )
     assert result.column(4) == pa.array(
         [
-            datetime(2022, 12, 30, 23, 47, 30),
-            datetime(2027, 6, 25, 23, 47, 30),
-            datetime(2020, 7, 1, 23, 47, 30),
+            datetime(2022, 12, 30, 23, 47, 30, tzinfo=DEFAULT_TZ),
+            datetime(2027, 6, 25, 23, 47, 30, tzinfo=DEFAULT_TZ),
+            datetime(2020, 7, 1, 23, 47, 30, tzinfo=DEFAULT_TZ),
         ],
-        type=pa.timestamp("ns"),
+        type=pa.timestamp("ns", tz=DEFAULT_TZ),
     )
     assert result.column(5) == pa.array(
-        [datetime(2023, 1, 10, 20, 52, 54)] * 3, type=pa.timestamp("s")
+        [datetime(2023, 1, 10, 20, 52, 54, tzinfo=DEFAULT_TZ)] * 3,
+        type=pa.timestamp("s"),
     )
     assert result.column(6) == pa.array(
-        [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns")
+        [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3,
+        type=pa.timestamp("ns"),
     )
     assert result.column(7) == pa.array(
-        [datetime(2023, 9, 7, 5, 6, 14)] * 3, type=pa.timestamp("s")
+        [datetime(2023, 9, 7, 5, 6, 14, tzinfo=DEFAULT_TZ)] * 3, type=pa.timestamp("s")
     )
     assert result.column(8) == pa.array(
-        [datetime(2023, 9, 7, 5, 6, 14, 523000)] * 3, type=pa.timestamp("ms")
+        [datetime(2023, 9, 7, 5, 6, 14, 523000, tzinfo=DEFAULT_TZ)] * 3,
+        type=pa.timestamp("ms"),
     )
     assert result.column(9) == pa.array(
-        [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us")
+        [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3,
+        type=pa.timestamp("us"),
     )
     assert result.column(10) == pa.array([31, 26, 2], type=pa.int32())
     assert result.column(11) == pa.array(
-        [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns")
+        [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3,
+        type=pa.timestamp("ns"),
     )
     assert result.column(12) == pa.array(
-        [datetime(2023, 9, 7, 5, 6, 14)] * 3, type=pa.timestamp("s")
+        [datetime(2023, 9, 7, 5, 6, 14, tzinfo=DEFAULT_TZ)] * 3,
+        type=pa.timestamp("s"),
     )
     assert result.column(13) == pa.array(
-        [datetime(2023, 9, 7, 5, 6, 14, 523000)] * 3, type=pa.timestamp("ms")
+        [datetime(2023, 9, 7, 5, 6, 14, 523000, tzinfo=DEFAULT_TZ)] * 3,
+        type=pa.timestamp("ms"),
     )
     assert result.column(14) == pa.array(
-        [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us")
+        [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3,
+        type=pa.timestamp("us"),
     )
     assert result.column(15) == pa.array(
-        [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns")
+        [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3,
+        type=pa.timestamp("ns"),
     )
     assert result.column(16) == pa.array(
-        [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("ns")
+        [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3,
+        type=pa.timestamp("ns"),
     )
 
 
@@ -1057,7 +1077,7 @@ def test_regr_funcs_sql_2():
 
 
 @pytest.mark.parametrize(
-    "func, expected",
+    ("func", "expected"),
     [
         pytest.param(f.regr_slope(column("c2"), column("c1")), [4.6], id="regr_slope"),
         pytest.param(
@@ -1160,7 +1180,7 @@ def test_binary_string_functions(df):
 
 
 @pytest.mark.parametrize(
-    "python_datatype, name, expected",
+    ("python_datatype", "name", "expected"),
     [
         pytest.param(bool, "e", pa.bool_(), id="bool"),
         pytest.param(int, "b", pa.int64(), id="int"),
@@ -1179,7 +1199,7 @@ def test_cast(df, python_datatype, name: str, expected):
 
 
 @pytest.mark.parametrize(
-    "negated, low, high, expected",
+    ("negated", "low", "high", "expected"),
     [
         pytest.param(False, 3, 5, {"filtered": [4, 5]}),
         pytest.param(False, 4, 5, {"filtered": [4, 5]}),
diff --git a/python/tests/test_imports.py b/python/tests/test_imports.py
index 0c155cbde..9ef7ed89a 100644
--- a/python/tests/test_imports.py
+++ b/python/tests/test_imports.py
@@ -169,14 +169,15 @@ def test_class_module_is_datafusion():
 
 
 def test_import_from_functions_submodule():
-    from datafusion.functions import abs, sin  # noqa
+    from datafusion.functions import abs as df_abs
+    from datafusion.functions import sin
 
-    assert functions.abs is abs
+    assert functions.abs is df_abs
     assert functions.sin is sin
 
     msg = "cannot import name 'foobar' from 'datafusion.functions'"
     with pytest.raises(ImportError, match=msg):
-        from datafusion.functions import foobar  # noqa
+        from datafusion.functions import foobar  # noqa: F401
 
 
 def test_classes_are_inheritable():
diff --git a/python/tests/test_input.py b/python/tests/test_input.py
index 806471357..4663f6148 100644
--- a/python/tests/test_input.py
+++ b/python/tests/test_input.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import os
+import pathlib
 
 from datafusion.input.location import LocationInputPlugin
 
@@ -23,10 +23,10 @@
 def test_location_input():
     location_input = LocationInputPlugin()
 
-    cwd = os.getcwd()
-    input_file = cwd + "/testing/data/parquet/generated_simple_numerics/blogs.parquet"
+    cwd = pathlib.Path.cwd()
+    input_file = cwd / "testing/data/parquet/generated_simple_numerics/blogs.parquet"
     table_name = "blog"
-    tbl = location_input.build_table(input_file, table_name)
-    assert "blog" == tbl.name
-    assert 3 == len(tbl.columns)
+    tbl = location_input.build_table(str(input_file), table_name)
+    assert tbl.name == "blog"
+    assert len(tbl.columns) == 3
     assert "blogs.parquet" in tbl.filepaths[0]
diff --git a/python/tests/test_io.py b/python/tests/test_io.py
index 21ad188ee..7ca509689 100644
--- a/python/tests/test_io.py
+++ b/python/tests/test_io.py
@@ -14,8 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-import os
-import pathlib
+from pathlib import Path
 
 import pyarrow as pa
 from datafusion import column
@@ -23,10 +22,10 @@
 
 
 def test_read_json_global_ctx(ctx):
-    path = os.path.dirname(os.path.abspath(__file__))
+    path = Path(__file__).parent.resolve()
 
     # Default
-    test_data_path = os.path.join(path, "data_test_context", "data.json")
+    test_data_path = Path(path) / "data_test_context" / "data.json"
     df = read_json(test_data_path)
     result = df.collect()
 
@@ -46,7 +45,7 @@ def test_read_json_global_ctx(ctx):
     assert result[0].schema == schema
 
     # File extension
-    test_data_path = os.path.join(path, "data_test_context", "data.json")
+    test_data_path = Path(path) / "data_test_context" / "data.json"
     df = read_json(test_data_path, file_extension=".json")
     result = df.collect()
 
@@ -59,7 +58,7 @@ def test_read_parquet_global():
     parquet_df.show()
     assert parquet_df is not None
 
-    path = pathlib.Path.cwd() / "parquet/data/alltypes_plain.parquet"
+    path = Path.cwd() / "parquet/data/alltypes_plain.parquet"
     parquet_df = read_parquet(path=path)
     assert parquet_df is not None
 
@@ -90,6 +89,6 @@ def test_read_avro():
     avro_df.show()
     assert avro_df is not None
 
-    path = pathlib.Path.cwd() / "testing/data/avro/alltypes_plain.avro"
+    path = Path.cwd() / "testing/data/avro/alltypes_plain.avro"
     avro_df = read_avro(path=path)
     assert avro_df is not None
diff --git a/python/tests/test_sql.py b/python/tests/test_sql.py
index 862f745bf..b6348e3a0 100644
--- a/python/tests/test_sql.py
+++ b/python/tests/test_sql.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 import gzip
-import os
+from pathlib import Path
 
 import numpy as np
 import pyarrow as pa
@@ -47,9 +47,8 @@ def test_register_csv(ctx, tmp_path):
     )
     write_csv(table, path)
 
-    with open(path, "rb") as csv_file:
-        with gzip.open(gzip_path, "wb") as gzipped_file:
-            gzipped_file.writelines(csv_file)
+    with Path.open(path, "rb") as csv_file, gzip.open(gzip_path, "wb") as gzipped_file:
+        gzipped_file.writelines(csv_file)
 
     ctx.register_csv("csv", path)
     ctx.register_csv("csv1", str(path))
@@ -158,7 +157,7 @@ def test_register_parquet(ctx, tmp_path):
     assert result.to_pydict() == {"cnt": [100]}
 
 
-@pytest.mark.parametrize("path_to_str", (True, False))
+@pytest.mark.parametrize("path_to_str", [True, False])
 def test_register_parquet_partitioned(ctx, tmp_path, path_to_str):
     dir_root = tmp_path / "dataset_parquet_partitioned"
     dir_root.mkdir(exist_ok=False)
@@ -194,7 +193,7 @@ def test_register_parquet_partitioned(ctx, tmp_path, path_to_str):
     assert dict(zip(rd["grp"], rd["cnt"])) == {"a": 3, "b": 1}
 
 
-@pytest.mark.parametrize("path_to_str", (True, False))
+@pytest.mark.parametrize("path_to_str", [True, False])
 def test_register_dataset(ctx, tmp_path, path_to_str):
     path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data())
     path = str(path) if path_to_str else path
@@ -209,13 +208,15 @@ def test_register_dataset(ctx, tmp_path, path_to_str):
 
 
 def test_register_json(ctx, tmp_path):
-    path = os.path.dirname(os.path.abspath(__file__))
-    test_data_path = os.path.join(path, "data_test_context", "data.json")
+    path = Path(__file__).parent.resolve()
+    test_data_path = Path(path) / "data_test_context" / "data.json"
     gzip_path = tmp_path / "data.json.gz"
 
-    with open(test_data_path, "rb") as json_file:
-        with gzip.open(gzip_path, "wb") as gzipped_file:
-            gzipped_file.writelines(json_file)
+    with (
+        Path.open(test_data_path, "rb") as json_file,
+        gzip.open(gzip_path, "wb") as gzipped_file,
+    ):
+        gzipped_file.writelines(json_file)
 
     ctx.register_json("json", test_data_path)
     ctx.register_json("json1", str(test_data_path))
@@ -470,16 +471,18 @@ def test_simple_select(ctx, tmp_path, arr):
     # In DF 43.0.0 we now default to having BinaryView and StringView
     # so the array that is saved to the parquet is slightly different
     # than the array read. Convert to values for comparison.
-    if isinstance(result, pa.BinaryViewArray) or isinstance(result, pa.StringViewArray):
+    if isinstance(result, (pa.BinaryViewArray, pa.StringViewArray)):
         arr = arr.tolist()
         result = result.tolist()
 
     np.testing.assert_equal(result, arr)
 
 
-@pytest.mark.parametrize("file_sort_order", (None, [[col("int").sort(True, True)]]))
-@pytest.mark.parametrize("pass_schema", (True, False))
-@pytest.mark.parametrize("path_to_str", (True, False))
+@pytest.mark.parametrize(
+    "file_sort_order", [None, [[col("int").sort(ascending=True, nulls_first=True)]]]
+)
+@pytest.mark.parametrize("pass_schema", [True, False])
+@pytest.mark.parametrize("path_to_str", [True, False])
 def test_register_listing_table(
     ctx, tmp_path, pass_schema, file_sort_order, path_to_str
 ):
@@ -528,7 +531,7 @@ def test_register_listing_table(
     assert dict(zip(rd["grp"], rd["count"])) == {"a": 5, "b": 2}
 
     result = ctx.sql(
-        "SELECT grp, COUNT(*) AS count FROM my_table WHERE date_id=20201005 GROUP BY grp"
+        "SELECT grp, COUNT(*) AS count FROM my_table WHERE date_id=20201005 GROUP BY grp"  # noqa: E501
     ).collect()
     result = pa.Table.from_batches(result)
 
diff --git a/python/tests/test_store.py b/python/tests/test_store.py
index 53ffc3acf..ac9af98f3 100644
--- a/python/tests/test_store.py
+++ b/python/tests/test_store.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import os
+from pathlib import Path
 
 import pytest
 from datafusion import SessionContext
@@ -23,17 +23,16 @@
 
 @pytest.fixture
 def ctx():
-    ctx = SessionContext()
-    return ctx
+    return SessionContext()
 
 
 def test_read_parquet(ctx):
     ctx.register_parquet(
         "test",
-        f"file://{os.getcwd()}/parquet/data/alltypes_plain.parquet",
-        [],
-        True,
-        ".parquet",
+        f"file://{Path.cwd()}/parquet/data/alltypes_plain.parquet",
+        table_partition_cols=[],
+        parquet_pruning=True,
+        file_extension=".parquet",
     )
     df = ctx.sql("SELECT * FROM test")
     assert isinstance(df.collect(), list)
diff --git a/python/tests/test_substrait.py b/python/tests/test_substrait.py
index feada7cde..f367a447d 100644
--- a/python/tests/test_substrait.py
+++ b/python/tests/test_substrait.py
@@ -50,7 +50,7 @@ def test_substrait_serialization(ctx):
     substrait_plan = ss.Producer.to_substrait_plan(df.logical_plan(), ctx)
 
 
-@pytest.mark.parametrize("path_to_str", (True, False))
+@pytest.mark.parametrize("path_to_str", [True, False])
 def test_substrait_file_serialization(ctx, tmp_path, path_to_str):
     batch = pa.RecordBatch.from_arrays(
         [pa.array([1, 2, 3]), pa.array([4, 5, 6])],
diff --git a/python/tests/test_udaf.py b/python/tests/test_udaf.py
index 97cf81f3c..453ff6f4f 100644
--- a/python/tests/test_udaf.py
+++ b/python/tests/test_udaf.py
@@ -17,8 +17,6 @@
 
 from __future__ import annotations
 
-from typing import List
-
 import pyarrow as pa
 import pyarrow.compute as pc
 import pytest
@@ -31,7 +29,7 @@ class Summarize(Accumulator):
     def __init__(self, initial_value: float = 0.0):
         self._sum = pa.scalar(initial_value)
 
-    def state(self) -> List[pa.Scalar]:
+    def state(self) -> list[pa.Scalar]:
         return [self._sum]
 
     def update(self, values: pa.Array) -> None:
@@ -39,7 +37,7 @@ def update(self, values: pa.Array) -> None:
         # This breaks on `None`
         self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py())
 
-    def merge(self, states: List[pa.Array]) -> None:
+    def merge(self, states: list[pa.Array]) -> None:
         # Not nice since pyarrow scalars can't be summed yet.
         # This breaks on `None`
         self._sum = pa.scalar(self._sum.as_py() + pc.sum(states[0]).as_py())
@@ -56,7 +54,7 @@ class MissingMethods(Accumulator):
     def __init__(self):
         self._sum = pa.scalar(0)
 
-    def state(self) -> List[pa.Scalar]:
+    def state(self) -> list[pa.Scalar]:
         return [self._sum]
 
 
@@ -86,7 +84,7 @@ def test_errors(df):
         "evaluate, merge, update)"
     )
     with pytest.raises(Exception, match=msg):
-        accum = udaf(  # noqa F841
+        accum = udaf(  # noqa: F841
             MissingMethods,
             pa.int64(),
             pa.int64(),
diff --git a/python/tests/test_udwf.py b/python/tests/test_udwf.py
index 2fea34aa3..3d6dcf9d8 100644
--- a/python/tests/test_udwf.py
+++ b/python/tests/test_udwf.py
@@ -298,7 +298,7 @@ def test_udwf_errors(df):
 ]
 
 
-@pytest.mark.parametrize("name,expr,expected", data_test_udwf_functions)
+@pytest.mark.parametrize(("name", "expr", "expected"), data_test_udwf_functions)
 def test_udwf_functions(df, name, expr, expected):
     df = df.select("a", "b", f.round(expr, lit(3)).alias(name))
 
diff --git a/python/tests/test_wrapper_coverage.py b/python/tests/test_wrapper_coverage.py
index ac064ba95..d7f6f6e35 100644
--- a/python/tests/test_wrapper_coverage.py
+++ b/python/tests/test_wrapper_coverage.py
@@ -19,6 +19,7 @@
 import datafusion.functions
 import datafusion.object_store
 import datafusion.substrait
+import pytest
 
 # EnumType introduced in 3.11. 3.10 and prior it was called EnumMeta.
 try:
@@ -41,10 +42,8 @@ def missing_exports(internal_obj, wrapped_obj) -> None:
         internal_attr = getattr(internal_obj, attr)
         wrapped_attr = getattr(wrapped_obj, attr)
 
-        if internal_attr is not None:
-            if wrapped_attr is None:
-                print("Missing attribute: ", attr)
-                assert False
+        if internal_attr is not None and wrapped_attr is None:
+            pytest.fail(f"Missing attribute: {attr}")
 
         if attr in ["__self__", "__class__"]:
             continue

From 3dcf7c7e5c0af0eb3c5e3bdf9c6e33fd4541b070 Mon Sep 17 00:00:00 2001
From: jsai28 <54253219+jsai28@users.noreply.github.com>
Date: Thu, 13 Mar 2025 04:09:03 -0600
Subject: [PATCH 14/22] feat/making global context accessible for users (#1060)

* Rename _global_ctx to global_ctx

* Add global context to python wrapper code

* Update context.py

* singleton for global context

* formatting

* remove udf from import

* remove _global_instance

* formatting

* formatting

* unnecessary test

* fix test_io.py

* ran ruff

* ran ruff format
---
 python/datafusion/context.py | 12 +++++++
 python/datafusion/io.py      | 63 ++++++++++++++++--------------------
 python/tests/test_context.py | 18 +++++++++++
 src/context.rs               |  2 +-
 4 files changed, 58 insertions(+), 37 deletions(-)

diff --git a/python/datafusion/context.py b/python/datafusion/context.py
index 0ab1a908a..58ad9a943 100644
--- a/python/datafusion/context.py
+++ b/python/datafusion/context.py
@@ -496,6 +496,18 @@ def __init__(
 
         self.ctx = SessionContextInternal(config, runtime)
 
+    @classmethod
+    def global_ctx(cls) -> SessionContext:
+        """Retrieve the global context as a `SessionContext` wrapper.
+
+        Returns:
+            A `SessionContext` object that wraps the global `SessionContextInternal`.
+        """
+        internal_ctx = SessionContextInternal.global_ctx()
+        wrapper = cls()
+        wrapper.ctx = internal_ctx
+        return wrapper
+
     def enable_url_table(self) -> SessionContext:
         """Control if local files can be queried as tables.
 
diff --git a/python/datafusion/io.py b/python/datafusion/io.py
index 3e39703e3..ef5ebf96f 100644
--- a/python/datafusion/io.py
+++ b/python/datafusion/io.py
@@ -21,10 +21,9 @@
 
 from typing import TYPE_CHECKING
 
+from datafusion.context import SessionContext
 from datafusion.dataframe import DataFrame
 
-from ._internal import SessionContext as SessionContextInternal
-
 if TYPE_CHECKING:
     import pathlib
 
@@ -68,16 +67,14 @@ def read_parquet(
     """
     if table_partition_cols is None:
         table_partition_cols = []
-    return DataFrame(
-        SessionContextInternal._global_ctx().read_parquet(
-            str(path),
-            table_partition_cols,
-            parquet_pruning,
-            file_extension,
-            skip_metadata,
-            schema,
-            file_sort_order,
-        )
+    return SessionContext.global_ctx().read_parquet(
+        str(path),
+        table_partition_cols,
+        parquet_pruning,
+        file_extension,
+        skip_metadata,
+        schema,
+        file_sort_order,
     )
 
 
@@ -110,15 +107,13 @@ def read_json(
     """
     if table_partition_cols is None:
         table_partition_cols = []
-    return DataFrame(
-        SessionContextInternal._global_ctx().read_json(
-            str(path),
-            schema,
-            schema_infer_max_records,
-            file_extension,
-            table_partition_cols,
-            file_compression_type,
-        )
+    return SessionContext.global_ctx().read_json(
+        str(path),
+        schema,
+        schema_infer_max_records,
+        file_extension,
+        table_partition_cols,
+        file_compression_type,
     )
 
 
@@ -161,17 +156,15 @@ def read_csv(
 
     path = [str(p) for p in path] if isinstance(path, list) else str(path)
 
-    return DataFrame(
-        SessionContextInternal._global_ctx().read_csv(
-            path,
-            schema,
-            has_header,
-            delimiter,
-            schema_infer_max_records,
-            file_extension,
-            table_partition_cols,
-            file_compression_type,
-        )
+    return SessionContext.global_ctx().read_csv(
+        path,
+        schema,
+        has_header,
+        delimiter,
+        schema_infer_max_records,
+        file_extension,
+        table_partition_cols,
+        file_compression_type,
     )
 
 
@@ -198,8 +191,6 @@ def read_avro(
     """
     if file_partition_cols is None:
         file_partition_cols = []
-    return DataFrame(
-        SessionContextInternal._global_ctx().read_avro(
-            str(path), schema, file_partition_cols, file_extension
-        )
+    return SessionContext.global_ctx().read_avro(
+        str(path), schema, file_partition_cols, file_extension
     )
diff --git a/python/tests/test_context.py b/python/tests/test_context.py
index 7a0a7aa08..4a15ac9cf 100644
--- a/python/tests/test_context.py
+++ b/python/tests/test_context.py
@@ -632,3 +632,21 @@ def test_sql_with_options_no_statements(ctx):
     options = SQLOptions().with_allow_statements(allow=False)
     with pytest.raises(Exception, match="SetVariable"):
         ctx.sql_with_options(sql, options=options)
+
+
+@pytest.fixture
+def batch():
+    return pa.RecordBatch.from_arrays(
+        [pa.array([4, 5, 6])],
+        names=["a"],
+    )
+
+
+def test_create_dataframe_with_global_ctx(batch):
+    ctx = SessionContext.global_ctx()
+
+    df = ctx.create_dataframe([[batch]])
+
+    result = df.collect()[0].column(0)
+
+    assert result == pa.array([4, 5, 6])
diff --git a/src/context.rs b/src/context.rs
index 9ba87eb8a..0db0f4d7e 100644
--- a/src/context.rs
+++ b/src/context.rs
@@ -308,7 +308,7 @@ impl PySessionContext {
 
     #[classmethod]
     #[pyo3(signature = ())]
-    fn _global_ctx(_cls: &Bound<'_, PyType>) -> PyResult<Self> {
+    fn global_ctx(_cls: &Bound<'_, PyType>) -> PyResult<Self> {
         Ok(Self {
             ctx: get_global_ctx().clone(),
         })

From 55141bad7c2270c14742e962d8bab1d4f1be27f5 Mon Sep 17 00:00:00 2001
From: Spaarsh <67336892+Spaarsh@users.noreply.github.com>
Date: Fri, 14 Mar 2025 18:26:31 +0530
Subject: [PATCH 15/22] Renaming Internal Structs (#1059)

* Renamed Expr to RawExpr

* Fixed CI test for exported classes to include RawExpr as well

* Fixed CI test for exported classes to check if Expr class covers RawExpr

* Generalized Raw* class checking

* fixes

* fixes

* fixed the CI test to not look for Raw classes in the datafusion  module

* Add additional text to unit test describing operation and ensure wrapped Raw classes are checked

* New ruff rule on main

* Resolve ruff errors

---------

Co-authored-by: Tim Saucer <timsaucer@gmail.com>
---
 python/datafusion/expr.py             |  8 ++--
 python/tests/test_wrapper_coverage.py | 55 +++++++++++++++++++--------
 src/expr.rs                           |  2 +-
 3 files changed, 45 insertions(+), 20 deletions(-)

diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py
index 702f75aed..77b6c272d 100644
--- a/python/datafusion/expr.py
+++ b/python/datafusion/expr.py
@@ -193,7 +193,7 @@ class Expr:
     :ref:`Expressions` in the online documentation for more information.
     """
 
-    def __init__(self, expr: expr_internal.Expr) -> None:
+    def __init__(self, expr: expr_internal.RawExpr) -> None:
         """This constructor should not be called by the end user."""
         self.expr = expr
 
@@ -383,7 +383,7 @@ def literal(value: Any) -> Expr:
             value = pa.scalar(value, type=pa.string_view())
         if not isinstance(value, pa.Scalar):
             value = pa.scalar(value)
-        return Expr(expr_internal.Expr.literal(value))
+        return Expr(expr_internal.RawExpr.literal(value))
 
     @staticmethod
     def string_literal(value: str) -> Expr:
@@ -398,13 +398,13 @@ def string_literal(value: str) -> Expr:
         """
         if isinstance(value, str):
             value = pa.scalar(value, type=pa.string())
-            return Expr(expr_internal.Expr.literal(value))
+            return Expr(expr_internal.RawExpr.literal(value))
         return Expr.literal(value)
 
     @staticmethod
     def column(value: str) -> Expr:
         """Creates a new expression representing a column."""
-        return Expr(expr_internal.Expr.column(value))
+        return Expr(expr_internal.RawExpr.column(value))
 
     def alias(self, name: str) -> Expr:
         """Assign a name to the expression."""
diff --git a/python/tests/test_wrapper_coverage.py b/python/tests/test_wrapper_coverage.py
index d7f6f6e35..a2de2d32b 100644
--- a/python/tests/test_wrapper_coverage.py
+++ b/python/tests/test_wrapper_coverage.py
@@ -28,37 +28,62 @@
     from enum import EnumMeta as EnumType
 
 
-def missing_exports(internal_obj, wrapped_obj) -> None:
-    # Special case enums - just make sure they exist since dir()
-    # and other functions get overridden.
+def missing_exports(internal_obj, wrapped_obj) -> None:  # noqa: C901
+    """
+    Identify if any of the rust exposted structs or functions do not have wrappers.
+
+    Special handling for:
+    - Raw* classes: Internal implementation details that shouldn't be exposed
+    - _global_ctx: Internal implementation detail
+    - __self__, __class__: Python special attributes
+    """
+    # Special case enums - EnumType overrides a some of the internal functions,
+    # so check all of the values exist and move on
     if isinstance(wrapped_obj, EnumType):
+        expected_values = [v for v in dir(internal_obj) if not v.startswith("__")]
+        for value in expected_values:
+            assert value in dir(wrapped_obj)
         return
 
-    for attr in dir(internal_obj):
-        if attr in ["_global_ctx"]:
-            continue
-        assert attr in dir(wrapped_obj)
+    for internal_attr_name in dir(internal_obj):
+        wrapped_attr_name = internal_attr_name.removeprefix("Raw")
+        assert wrapped_attr_name in dir(wrapped_obj)
 
-        internal_attr = getattr(internal_obj, attr)
-        wrapped_attr = getattr(wrapped_obj, attr)
+        internal_attr = getattr(internal_obj, internal_attr_name)
+        wrapped_attr = getattr(wrapped_obj, wrapped_attr_name)
 
-        if internal_attr is not None and wrapped_attr is None:
-            pytest.fail(f"Missing attribute: {attr}")
+        # There are some auto generated attributes that can be None, such as
+        # __kwdefaults__ and __doc__. As long as these are None on the internal
+        # object, it's okay to skip them. However if they do exist on the internal
+        # object they must also exist on the wrapped object.
+        if internal_attr is not None:
+            if wrapped_attr is None:
+                pytest.fail(f"Missing attribute: {internal_attr_name}")
 
-        if attr in ["__self__", "__class__"]:
+        if internal_attr_name in ["__self__", "__class__"]:
             continue
+
         if isinstance(internal_attr, list):
             assert isinstance(wrapped_attr, list)
+
+            # We have cases like __all__ that are a list and we want to be certain that
+            # every value in the list in the internal object is also in the wrapper list
             for val in internal_attr:
-                assert val in wrapped_attr
+                if isinstance(val, str) and val.startswith("Raw"):
+                    assert val[3:] in wrapped_attr
+                else:
+                    assert val in wrapped_attr
         elif hasattr(internal_attr, "__dict__"):
+            # Check all submodules recursively
             missing_exports(internal_attr, wrapped_attr)
 
 
 def test_datafusion_missing_exports() -> None:
     """Check for any missing python exports.
 
-    This test verifies that every exposed class, attribute, and function in
-    the internal (pyo3) module is also exposed in our python wrappers.
+    This test verifies that every exposed class, attribute,
+    and function in the internal (pyo3) module - datafusion._internal
+    is also exposed in our python wrappers - datafusion -
+    i.e., the ones exposed to the public.
     """
     missing_exports(datafusion._internal, datafusion)
diff --git a/src/expr.rs b/src/expr.rs
index e750be6a4..d3c528eb4 100644
--- a/src/expr.rs
+++ b/src/expr.rs
@@ -101,7 +101,7 @@ pub mod window;
 use sort_expr::{to_sort_expressions, PySortExpr};
 
 /// A PyExpr that can be used on a DataFrame
-#[pyclass(name = "Expr", module = "datafusion.expr", subclass)]
+#[pyclass(name = "RawExpr", module = "datafusion.expr", subclass)]
 #[derive(Debug, Clone)]
 pub struct PyExpr {
     pub expr: Expr,

From 4f457030f171a26d0c4cce4d55cf541519956fcc Mon Sep 17 00:00:00 2001
From: jsai28 <54253219+jsai28@users.noreply.github.com>
Date: Sat, 15 Mar 2025 04:57:38 -0600
Subject: [PATCH 16/22] added pytest asyncio tests (#1063)

---
 pyproject.toml                 |  1 +
 python/tests/test_dataframe.py | 54 ++++++++++++++++++++++++++++++++++
 uv.lock                        | 17 ++++++++++-
 3 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 060e3b80a..a4ed18c4c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -150,6 +150,7 @@ dev = [
     "maturin>=1.8.1",
     "numpy>1.25.0",
     "pytest>=7.4.4",
+    "pytest-asyncio>=0.23.3",
     "ruff>=0.9.1",
     "toml>=0.10.2",
     "pygithub==2.5.0",
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index d084f12dd..384b17878 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -771,6 +771,16 @@ def test_execution_plan(aggregate_df):
     assert rows_returned == 5
 
 
+@pytest.mark.asyncio
+async def test_async_iteration_of_df(aggregate_df):
+    rows_returned = 0
+    async for batch in aggregate_df.execute_stream():
+        assert batch is not None
+        rows_returned += len(batch.to_pyarrow()[0])
+
+    assert rows_returned == 5
+
+
 def test_repartition(df):
     df.repartition(2)
 
@@ -958,6 +968,18 @@ def test_execute_stream(df):
     assert not list(stream)  # after one iteration the generator must be exhausted
 
 
+@pytest.mark.asyncio
+async def test_execute_stream_async(df):
+    stream = df.execute_stream()
+    batches = [batch async for batch in stream]
+
+    assert all(batch is not None for batch in batches)
+
+    # After consuming all batches, the stream should be exhausted
+    remaining_batches = [batch async for batch in stream]
+    assert not remaining_batches
+
+
 @pytest.mark.parametrize("schema", [True, False])
 def test_execute_stream_to_arrow_table(df, schema):
     stream = df.execute_stream()
@@ -974,6 +996,25 @@ def test_execute_stream_to_arrow_table(df, schema):
     assert set(pyarrow_table.column_names) == {"a", "b", "c"}
 
 
+@pytest.mark.asyncio
+@pytest.mark.parametrize("schema", [True, False])
+async def test_execute_stream_to_arrow_table_async(df, schema):
+    stream = df.execute_stream()
+
+    if schema:
+        pyarrow_table = pa.Table.from_batches(
+            [batch.to_pyarrow() async for batch in stream], schema=df.schema()
+        )
+    else:
+        pyarrow_table = pa.Table.from_batches(
+            [batch.to_pyarrow() async for batch in stream]
+        )
+
+    assert isinstance(pyarrow_table, pa.Table)
+    assert pyarrow_table.shape == (3, 3)
+    assert set(pyarrow_table.column_names) == {"a", "b", "c"}
+
+
 def test_execute_stream_partitioned(df):
     streams = df.execute_stream_partitioned()
     assert all(batch is not None for stream in streams for batch in stream)
@@ -982,6 +1023,19 @@ def test_execute_stream_partitioned(df):
     )  # after one iteration all generators must be exhausted
 
 
+@pytest.mark.asyncio
+async def test_execute_stream_partitioned_async(df):
+    streams = df.execute_stream_partitioned()
+
+    for stream in streams:
+        batches = [batch async for batch in stream]
+        assert all(batch is not None for batch in batches)
+
+        # Ensure the stream is exhausted after iteration
+        remaining_batches = [batch async for batch in stream]
+        assert not remaining_batches
+
+
 def test_empty_to_arrow_table(df):
     # Convert empty datafusion dataframe to pyarrow Table
     pyarrow_table = df.limit(0).to_arrow_table()
diff --git a/uv.lock b/uv.lock
index 619b92856..7e4bc4c6b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -284,9 +284,11 @@ dependencies = [
 [package.dev-dependencies]
 dev = [
     { name = "maturin" },
+    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
     { name = "numpy", version = "2.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
     { name = "pygithub" },
     { name = "pytest" },
+    { name = "pytest-asyncio" },
     { name = "ruff" },
     { name = "toml" },
 ]
@@ -314,9 +316,10 @@ requires-dist = [
 [package.metadata.requires-dev]
 dev = [
     { name = "maturin", specifier = ">=1.8.1" },
-    { name = "numpy", marker = "python_full_version >= '3.10'", specifier = ">1.24.4" },
+    { name = "numpy", specifier = ">1.25.0" },
     { name = "pygithub", specifier = "==2.5.0" },
     { name = "pytest", specifier = ">=7.4.4" },
+    { name = "pytest-asyncio", specifier = ">=0.23.3" },
     { name = "ruff", specifier = ">=0.9.1" },
     { name = "toml", specifier = ">=0.10.2" },
 ]
@@ -1079,6 +1082,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/11/92/76a1c94d3afee238333bc0a42b82935dd8f9cf8ce9e336ff87ee14d9e1cf/pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6", size = 343083 },
 ]
 
+[[package]]
+name = "pytest-asyncio"
+version = "0.25.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f2/a8/ecbc8ede70921dd2f544ab1cadd3ff3bf842af27f87bbdea774c7baa1d38/pytest_asyncio-0.25.3.tar.gz", hash = "sha256:fc1da2cf9f125ada7e710b4ddad05518d4cee187ae9412e9ac9271003497f07a", size = 54239 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/67/17/3493c5624e48fd97156ebaec380dcaafee9506d7e2c46218ceebbb57d7de/pytest_asyncio-0.25.3-py3-none-any.whl", hash = "sha256:9e89518e0f9bd08928f97a3482fdc4e244df17529460bc038291ccaf8f85c7c3", size = 19467 },
+]
+
 [[package]]
 name = "python-dateutil"
 version = "2.9.0.post0"

From 2f52688d76e84794343c17ffaf3002534ecfd716 Mon Sep 17 00:00:00 2001
From: kosiew <kosiew@gmail.com>
Date: Sat, 15 Mar 2025 19:00:50 +0800
Subject: [PATCH 17/22] Add decorator for udwf (#1061)

* feat: Introduce create_udwf method for User-Defined Window Functions

- Added `create_udwf` static method to `WindowUDF` class, allowing users to create User-Defined Window Functions (UDWF) as both a function and a decorator.
- Updated type hinting for `_R` using `TypeAlias` for better clarity.
- Enhanced documentation with usage examples for both function and decorator styles, improving usability and understanding.

* refactor: Simplify UDWF test suite and introduce SimpleWindowCount evaluator

- Removed multiple exponential smoothing classes to streamline the code.
- Introduced SimpleWindowCount class for basic row counting functionality.
- Updated test cases to validate the new SimpleWindowCount evaluator.
- Refactored fixture and test functions for clarity and consistency.
- Enhanced error handling in UDWF creation tests.

* fix: Update type alias import to use typing_extensions for compatibility

* Add udwf tests for multiple input types and decorator syntax

* replace old def udwf

* refactor: Simplify df fixture by passing ctx as an argument

* refactor: Rename DataFrame fixtures and update test functions

- Renamed `df` fixture to `complex_window_df` for clarity.
- Renamed `simple_df` fixture to `count_window_df` to better reflect its purpose.
- Updated test functions to use the new fixture names, enhancing readability and maintainability.

* refactor: Update udwf calls in WindowUDF to use BiasedNumbers directly

- Changed udwf1 to use BiasedNumbers instead of bias_10.
- Added udwf2 to call udwf with bias_10.
- Introduced udwf3 to demonstrate a lambda function returning BiasedNumbers(20).

* feat: Add overloads for udwf function to support multiple input types and decorator syntax

* refactor: Simplify udwf method signature by removing redundant type hints

* refactor: Remove state_type from udwf method signature and update return type handling

- Eliminated the state_type parameter from the udwf method to simplify the function signature.
- Updated return type handling in the _function and _decorator methods to use a generic type _R for better type flexibility.
- Enhanced the decorator to wrap the original function, allowing for improved argument handling and expression return.

* refactor: Update volatility parameter type in udwf method signature to support Volatility enum

* Fix ruff errors

* fix C901 for def udwf

* refactor: Update udwf method signature and simplify input handling

- Changed the type hint for the return type in the _create_window_udf_decorator method to use pa.DataType directly instead of a TypeVar.
- Simplified the handling of input types by removing redundant checks and directly using the input types list.
- Removed unnecessary comments and cleaned up the code for better readability.
- Updated the test for udwf to use parameterized tests for better coverage and maintainability.

* refactor: Rename input_type to input_types in udwf method signature for clarity

* refactor: Enhance typing in udf.py by introducing Protocol for WindowEvaluator and improving import organization

* Revert "refactor: Enhance typing in udf.py by introducing Protocol for WindowEvaluator and improving import organization"

This reverts commit 16dbe5f3fd88f42d0a304384b162009bd9e49a35.
---
 python/datafusion/udf.py  | 123 +++++++++++++++++++++------
 python/tests/test_udwf.py | 170 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 264 insertions(+), 29 deletions(-)

diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py
index 603b7063d..e93a34ca5 100644
--- a/python/datafusion/udf.py
+++ b/python/datafusion/udf.py
@@ -621,6 +621,16 @@ def __call__(self, *args: Expr) -> Expr:
         args_raw = [arg.expr for arg in args]
         return Expr(self._udwf.__call__(*args_raw))
 
+    @overload
+    @staticmethod
+    def udwf(
+        input_types: pa.DataType | list[pa.DataType],
+        return_type: pa.DataType,
+        volatility: Volatility | str,
+        name: Optional[str] = None,
+    ) -> Callable[..., WindowUDF]: ...
+
+    @overload
     @staticmethod
     def udwf(
         func: Callable[[], WindowEvaluator],
@@ -628,24 +638,31 @@ def udwf(
         return_type: pa.DataType,
         volatility: Volatility | str,
         name: Optional[str] = None,
-    ) -> WindowUDF:
-        """Create a new User-Defined Window Function.
+    ) -> WindowUDF: ...
 
-        If your :py:class:`WindowEvaluator` can be instantiated with no arguments, you
-        can simply pass it's type as ``func``. If you need to pass additional arguments
-        to it's constructor, you can define a lambda or a factory method. During runtime
-        the :py:class:`WindowEvaluator` will be constructed for every instance in
-        which this UDWF is used. The following examples are all valid.
+    @staticmethod
+    def udwf(*args: Any, **kwargs: Any):  # noqa: D417
+        """Create a new User-Defined Window Function (UDWF).
 
-        .. code-block:: python
+        This class can be used both as a **function** and as a **decorator**.
+
+        Usage:
+            - **As a function**: Call `udwf(func, input_types, return_type, volatility,
+              name)`.
+            - **As a decorator**: Use `@udwf(input_types, return_type, volatility,
+              name)`. When using `udwf` as a decorator, **do not pass `func`
+              explicitly**.
 
+        **Function example:**
+            ```
             import pyarrow as pa
 
             class BiasedNumbers(WindowEvaluator):
                 def __init__(self, start: int = 0) -> None:
                     self.start = start
 
-                def evaluate_all(self, values: list[pa.Array], num_rows: int) -> pa.Array:
+                def evaluate_all(self, values: list[pa.Array],
+                    num_rows: int) -> pa.Array:
                     return pa.array([self.start + i for i in range(num_rows)])
 
             def bias_10() -> BiasedNumbers:
@@ -655,35 +672,93 @@ def bias_10() -> BiasedNumbers:
             udwf2 = udwf(bias_10, pa.int64(), pa.int64(), "immutable")
             udwf3 = udwf(lambda: BiasedNumbers(20), pa.int64(), pa.int64(), "immutable")
 
+            ```
+
+        **Decorator example:**
+            ```
+            @udwf(pa.int64(), pa.int64(), "immutable")
+            def biased_numbers() -> BiasedNumbers:
+                return BiasedNumbers(10)
+            ```
+
         Args:
-            func: A callable to create the window function.
-            input_types: The data types of the arguments to ``func``.
+            func: **Only needed when calling as a function. Skip this argument when
+                using `udwf` as a decorator.**
+            input_types: The data types of the arguments.
             return_type: The data type of the return value.
             volatility: See :py:class:`Volatility` for allowed values.
-            arguments: A list of arguments to pass in to the __init__ method for accum.
             name: A descriptive name for the function.
 
         Returns:
-            A user-defined window function.
-        """  # noqa: W505, E501
+            A user-defined window function that can be used in window function calls.
+        """
+        if args and callable(args[0]):
+            # Case 1: Used as a function, require the first parameter to be callable
+            return WindowUDF._create_window_udf(*args, **kwargs)
+        # Case 2: Used as a decorator with parameters
+        return WindowUDF._create_window_udf_decorator(*args, **kwargs)
+
+    @staticmethod
+    def _create_window_udf(
+        func: Callable[[], WindowEvaluator],
+        input_types: pa.DataType | list[pa.DataType],
+        return_type: pa.DataType,
+        volatility: Volatility | str,
+        name: Optional[str] = None,
+    ) -> WindowUDF:
+        """Create a WindowUDF instance from function arguments."""
         if not callable(func):
             msg = "`func` must be callable."
             raise TypeError(msg)
         if not isinstance(func(), WindowEvaluator):
             msg = "`func` must implement the abstract base class WindowEvaluator"
             raise TypeError(msg)
-        if name is None:
-            name = func().__class__.__qualname__.lower()
-        if isinstance(input_types, pa.DataType):
-            input_types = [input_types]
-        return WindowUDF(
-            name=name,
-            func=func,
-            input_types=input_types,
-            return_type=return_type,
-            volatility=volatility,
+
+        name = name or func.__qualname__.lower()
+        input_types = (
+            [input_types] if isinstance(input_types, pa.DataType) else input_types
         )
 
+        return WindowUDF(name, func, input_types, return_type, volatility)
+
+    @staticmethod
+    def _get_default_name(func: Callable) -> str:
+        """Get the default name for a function based on its attributes."""
+        if hasattr(func, "__qualname__"):
+            return func.__qualname__.lower()
+        return func.__class__.__name__.lower()
+
+    @staticmethod
+    def _normalize_input_types(
+        input_types: pa.DataType | list[pa.DataType],
+    ) -> list[pa.DataType]:
+        """Convert a single DataType to a list if needed."""
+        if isinstance(input_types, pa.DataType):
+            return [input_types]
+        return input_types
+
+    @staticmethod
+    def _create_window_udf_decorator(
+        input_types: pa.DataType | list[pa.DataType],
+        return_type: pa.DataType,
+        volatility: Volatility | str,
+        name: Optional[str] = None,
+    ) -> Callable[[Callable[[], WindowEvaluator]], Callable[..., Expr]]:
+        """Create a decorator for a WindowUDF."""
+
+        def decorator(func: Callable[[], WindowEvaluator]) -> Callable[..., Expr]:
+            udwf_caller = WindowUDF._create_window_udf(
+                func, input_types, return_type, volatility, name
+            )
+
+            @functools.wraps(func)
+            def wrapper(*args: Any, **kwargs: Any) -> Expr:
+                return udwf_caller(*args, **kwargs)
+
+            return wrapper
+
+        return decorator
+
 
 # Convenience exports so we can import instead of treating as
 # variables at the package root
diff --git a/python/tests/test_udwf.py b/python/tests/test_udwf.py
index 3d6dcf9d8..4190e7d64 100644
--- a/python/tests/test_udwf.py
+++ b/python/tests/test_udwf.py
@@ -162,14 +162,27 @@ def evaluate_all(self, values: list[pa.Array], num_rows: int) -> pa.Array:
         return pa.array(results)
 
 
+class SimpleWindowCount(WindowEvaluator):
+    """A simple window evaluator that counts rows."""
+
+    def __init__(self, base: int = 0) -> None:
+        self.base = base
+
+    def evaluate_all(self, values: list[pa.Array], num_rows: int) -> pa.Array:
+        return pa.array([self.base + i for i in range(num_rows)])
+
+
 class NotSubclassOfWindowEvaluator:
     pass
 
 
 @pytest.fixture
-def df():
-    ctx = SessionContext()
+def ctx():
+    return SessionContext()
+
 
+@pytest.fixture
+def complex_window_df(ctx):
     # create a RecordBatch and a new DataFrame from it
     batch = pa.RecordBatch.from_arrays(
         [
@@ -182,7 +195,17 @@ def df():
     return ctx.create_dataframe([[batch]])
 
 
-def test_udwf_errors(df):
+@pytest.fixture
+def count_window_df(ctx):
+    # create a RecordBatch and a new DataFrame from it
+    batch = pa.RecordBatch.from_arrays(
+        [pa.array([1, 2, 3]), pa.array([4, 4, 6])],
+        names=["a", "b"],
+    )
+    return ctx.create_dataframe([[batch]], name="test_table")
+
+
+def test_udwf_errors(complex_window_df):
     with pytest.raises(TypeError):
         udwf(
             NotSubclassOfWindowEvaluator,
@@ -192,6 +215,103 @@ def test_udwf_errors(df):
         )
 
 
+def test_udwf_errors_with_message():
+    """Test error cases for UDWF creation."""
+    with pytest.raises(
+        TypeError, match="`func` must implement the abstract base class WindowEvaluator"
+    ):
+        udwf(
+            NotSubclassOfWindowEvaluator, pa.int64(), pa.int64(), volatility="immutable"
+        )
+
+
+def test_udwf_basic_usage(count_window_df):
+    """Test basic UDWF usage with a simple counting window function."""
+    simple_count = udwf(
+        SimpleWindowCount, pa.int64(), pa.int64(), volatility="immutable"
+    )
+
+    df = count_window_df.select(
+        simple_count(column("a"))
+        .window_frame(WindowFrame("rows", None, None))
+        .build()
+        .alias("count")
+    )
+    result = df.collect()[0]
+    assert result.column(0) == pa.array([0, 1, 2])
+
+
+def test_udwf_with_args(count_window_df):
+    """Test UDWF with constructor arguments."""
+    count_base10 = udwf(
+        lambda: SimpleWindowCount(10), pa.int64(), pa.int64(), volatility="immutable"
+    )
+
+    df = count_window_df.select(
+        count_base10(column("a"))
+        .window_frame(WindowFrame("rows", None, None))
+        .build()
+        .alias("count")
+    )
+    result = df.collect()[0]
+    assert result.column(0) == pa.array([10, 11, 12])
+
+
+def test_udwf_decorator_basic(count_window_df):
+    """Test UDWF used as a decorator."""
+
+    @udwf([pa.int64()], pa.int64(), "immutable")
+    def window_count() -> WindowEvaluator:
+        return SimpleWindowCount()
+
+    df = count_window_df.select(
+        window_count(column("a"))
+        .window_frame(WindowFrame("rows", None, None))
+        .build()
+        .alias("count")
+    )
+    result = df.collect()[0]
+    assert result.column(0) == pa.array([0, 1, 2])
+
+
+def test_udwf_decorator_with_args(count_window_df):
+    """Test UDWF decorator with constructor arguments."""
+
+    @udwf([pa.int64()], pa.int64(), "immutable")
+    def window_count_base10() -> WindowEvaluator:
+        return SimpleWindowCount(10)
+
+    df = count_window_df.select(
+        window_count_base10(column("a"))
+        .window_frame(WindowFrame("rows", None, None))
+        .build()
+        .alias("count")
+    )
+    result = df.collect()[0]
+    assert result.column(0) == pa.array([10, 11, 12])
+
+
+def test_register_udwf(ctx, count_window_df):
+    """Test registering and using UDWF in SQL context."""
+    window_count = udwf(
+        SimpleWindowCount,
+        [pa.int64()],
+        pa.int64(),
+        volatility="immutable",
+        name="window_count",
+    )
+
+    ctx.register_udwf(window_count)
+    result = ctx.sql(
+        """
+        SELECT window_count(a)
+        OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED
+        FOLLOWING) FROM test_table
+        """
+    ).collect()[0]
+    assert result.column(0) == pa.array([0, 1, 2])
+
+
 smooth_default = udwf(
     ExponentialSmoothDefault,
     pa.float64(),
@@ -299,10 +419,50 @@ def test_udwf_errors(df):
 
 
 @pytest.mark.parametrize(("name", "expr", "expected"), data_test_udwf_functions)
-def test_udwf_functions(df, name, expr, expected):
-    df = df.select("a", "b", f.round(expr, lit(3)).alias(name))
+def test_udwf_functions(complex_window_df, name, expr, expected):
+    df = complex_window_df.select("a", "b", f.round(expr, lit(3)).alias(name))
 
     # execute and collect the first (and only) batch
     result = df.sort(column("a")).select(column(name)).collect()[0]
 
     assert result.column(0) == pa.array(expected)
+
+
+@pytest.mark.parametrize(
+    "udwf_func",
+    [
+        udwf(SimpleWindowCount, pa.int64(), pa.int64(), "immutable"),
+        udwf(SimpleWindowCount, [pa.int64()], pa.int64(), "immutable"),
+        udwf([pa.int64()], pa.int64(), "immutable")(lambda: SimpleWindowCount()),
+        udwf(pa.int64(), pa.int64(), "immutable")(lambda: SimpleWindowCount()),
+    ],
+)
+def test_udwf_overloads(udwf_func, count_window_df):
+    df = count_window_df.select(
+        udwf_func(column("a"))
+        .window_frame(WindowFrame("rows", None, None))
+        .build()
+        .alias("count")
+    )
+    result = df.collect()[0]
+    assert result.column(0) == pa.array([0, 1, 2])
+
+
+def test_udwf_named_function(ctx, count_window_df):
+    """Test UDWF with explicit name parameter."""
+    window_count = udwf(
+        SimpleWindowCount,
+        pa.int64(),
+        pa.int64(),
+        volatility="immutable",
+        name="my_custom_counter",
+    )
+
+    ctx.register_udwf(window_count)
+    result = ctx.sql(
+        """
+        SELECT my_custom_counter(a)
+        OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED
+        FOLLOWING) FROM test_table"""
+    ).collect()[0]
+    assert result.column(0) == pa.array([0, 1, 2])

From 7c1c08f8617ac97a2568eb0664e9d4ee30fceba9 Mon Sep 17 00:00:00 2001
From: Nirnay Roy <32942494+nirnayroy@users.noreply.github.com>
Date: Sat, 15 Mar 2025 17:05:05 +0530
Subject: [PATCH 18/22] feat: expose regex_count function (#1066)

* Added wrapper for regex_count function

* fix comment

---------

Co-authored-by: Nirnay Roy <nirnayroy1012@gmail.com>
---
 python/datafusion/functions.py | 18 ++++++++++++++++++
 python/tests/test_functions.py |  4 ++++
 src/functions.rs               | 20 ++++++++++++++++++++
 3 files changed, 42 insertions(+)

diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index 0cc7434cf..26bac149c 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -217,6 +217,7 @@
     "random",
     "range",
     "rank",
+    "regexp_count",
     "regexp_like",
     "regexp_match",
     "regexp_replace",
@@ -779,6 +780,23 @@ def regexp_replace(
     return Expr(f.regexp_replace(string.expr, pattern.expr, replacement.expr, flags))
 
 
+def regexp_count(
+    string: Expr, pattern: Expr, start: Expr, flags: Expr | None = None
+) -> Expr:
+    """Returns the number of matches in a string.
+
+    Optional start position (the first position is 1) to search for the regular
+    expression.
+    """
+    if flags is not None:
+        flags = flags.expr
+    if start is not None:
+        start = start.expr
+    else:
+        start = Expr.expr
+    return Expr(f.regexp_count(string.expr, pattern.expr, start, flags))
+
+
 def repeat(string: Expr, n: Expr) -> Expr:
     """Repeats the ``string`` to ``n`` times."""
     return Expr(f.repeat(string.expr, n.expr))
diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py
index ed88a16e3..161e1e3bb 100644
--- a/python/tests/test_functions.py
+++ b/python/tests/test_functions.py
@@ -740,6 +740,10 @@ def test_array_function_obj_tests(stmt, py_expr):
             f.regexp_replace(column("a"), literal("(ell|orl)"), literal("-")),
             pa.array(["H-o", "W-d", "!"]),
         ),
+        (
+            f.regexp_count(column("a"), literal("(ell|orl)"), literal(1)),
+            pa.array([1, 1, 0], type=pa.int64()),
+        ),
     ],
 )
 def test_string_functions(df, function, expected_result):
diff --git a/src/functions.rs b/src/functions.rs
index 6a8abb18d..8fac239b4 100644
--- a/src/functions.rs
+++ b/src/functions.rs
@@ -173,6 +173,25 @@ fn regexp_replace(
     )
     .into())
 }
+
+#[pyfunction]
+#[pyo3(signature = (string, pattern, start, flags=None))]
+/// Returns the number of matches found in the string.
+fn regexp_count(
+    string: PyExpr,
+    pattern: PyExpr,
+    start: Option<PyExpr>,
+    flags: Option<PyExpr>,
+) -> PyResult<PyExpr> {
+    Ok(functions::expr_fn::regexp_count(
+        string.expr,
+        pattern.expr,
+        start.map(|x| x.expr),
+        flags.map(|x| x.expr),
+    )
+    .into())
+}
+
 /// Creates a new Sort Expr
 #[pyfunction]
 fn order_by(expr: PyExpr, asc: bool, nulls_first: bool) -> PyResult<PySortExpr> {
@@ -943,6 +962,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_wrapped(wrap_pyfunction!(power))?;
     m.add_wrapped(wrap_pyfunction!(radians))?;
     m.add_wrapped(wrap_pyfunction!(random))?;
+    m.add_wrapped(wrap_pyfunction!(regexp_count))?;
     m.add_wrapped(wrap_pyfunction!(regexp_like))?;
     m.add_wrapped(wrap_pyfunction!(regexp_match))?;
     m.add_wrapped(wrap_pyfunction!(regexp_replace))?;

From b8dd97bc8eefcfecfa8dcc864c4898c654b236a9 Mon Sep 17 00:00:00 2001
From: Spaarsh <67336892+Spaarsh@users.noreply.github.com>
Date: Mon, 17 Mar 2025 20:08:16 +0530
Subject: [PATCH 19/22] Add additional ruff suggestions (#1062)

* Enabled ruff rule PT001 and ANN204

* Enabled ruff rule B008

* Enabled ruff rule EM101

* Enabled ruff rule PLR1714

* Enabled ruff rule ANN201

* Enabled ruff rule C400

* Enabled ruff rule B904

* Enabled ruff rule UP006

* Enabled ruff rule RUF012

* Enabled ruff rule FBT003

* Enabled ruff rule C416

* Enabled ruff rule SIM102

* Enabled ruff rule PGH003

* Enabled ruff rule PERF401

* Enabled ruff rule EM102

* Enabled ruff rule SIM108

* Enabled ruff rule ICN001

* Enabled ruff rule ICN001

* implemented reviews

* Update pyproject.toml to ignore `SIM102`

* Enabled ruff rule PLW2901

* Enabled ruff rule RET503

* Fixed failing ruff tests
---
 benchmarks/db-benchmark/groupby-datafusion.py |  24 ++--
 benchmarks/db-benchmark/join-datafusion.py    |   5 +-
 benchmarks/tpch/tpch.py                       |   7 +-
 dev/release/generate-changelog.py             |   6 +-
 docs/source/conf.py                           |   4 +-
 examples/create-context.py                    |  12 +-
 examples/python-udaf.py                       |  36 +++--
 examples/python-udf-comparisons.py            |   9 +-
 examples/python-udf.py                        |  12 +-
 examples/query-pyarrow-data.py                |  10 +-
 examples/sql-using-python-udaf.py             |   2 +-
 examples/tpch/_tests.py                       |   4 +-
 examples/tpch/convert_data_to_parquet.py      | 134 +++++++++---------
 examples/tpch/q08_market_share.py             |   2 +-
 examples/tpch/q19_discounted_revenue.py       |   4 +-
 .../tpch/q21_suppliers_kept_orders_waiting.py |   2 +-
 pyproject.toml                                |  20 ---
 python/datafusion/__init__.py                 |   8 +-
 python/datafusion/catalog.py                  |   4 +-
 python/datafusion/context.py                  |  51 +++----
 python/datafusion/dataframe.py                |  55 +++----
 python/datafusion/expr.py                     |  31 ++--
 python/datafusion/functions.py                |   9 +-
 python/tests/test_functions.py                |   2 +-
 python/tests/test_wrapper_coverage.py         |   7 +-
 25 files changed, 213 insertions(+), 247 deletions(-)

diff --git a/benchmarks/db-benchmark/groupby-datafusion.py b/benchmarks/db-benchmark/groupby-datafusion.py
index 04bf7a149..f9e8d638b 100644
--- a/benchmarks/db-benchmark/groupby-datafusion.py
+++ b/benchmarks/db-benchmark/groupby-datafusion.py
@@ -20,7 +20,7 @@
 import timeit
 
 import datafusion as df
-import pyarrow
+import pyarrow as pa
 from datafusion import (
     RuntimeEnvBuilder,
     SessionConfig,
@@ -37,7 +37,7 @@
 exec(open("./_helpers/helpers.py").read())
 
 
-def ans_shape(batches):
+def ans_shape(batches) -> tuple[int, int]:
     rows, cols = 0, 0
     for batch in batches:
         rows += batch.num_rows
@@ -48,7 +48,7 @@ def ans_shape(batches):
     return rows, cols
 
 
-def execute(df):
+def execute(df) -> list:
     print(df.execution_plan().display_indent())
     return df.collect()
 
@@ -68,14 +68,14 @@ def execute(df):
 src_grp = os.path.join("data", data_name + ".csv")
 print("loading dataset %s" % src_grp, flush=True)
 
-schema = pyarrow.schema(
+schema = pa.schema(
     [
-        ("id4", pyarrow.int32()),
-        ("id5", pyarrow.int32()),
-        ("id6", pyarrow.int32()),
-        ("v1", pyarrow.int32()),
-        ("v2", pyarrow.int32()),
-        ("v3", pyarrow.float64()),
+        ("id4", pa.int32()),
+        ("id5", pa.int32()),
+        ("id6", pa.int32()),
+        ("v1", pa.int32()),
+        ("v2", pa.int32()),
+        ("v3", pa.float64()),
     ]
 )
 
@@ -93,8 +93,8 @@ def execute(df):
 )
 config = (
     SessionConfig()
-    .with_repartition_joins(False)
-    .with_repartition_aggregations(False)
+    .with_repartition_joins(enabled=False)
+    .with_repartition_aggregations(enabled=False)
     .set("datafusion.execution.coalesce_batches", "false")
 )
 ctx = SessionContext(config, runtime)
diff --git a/benchmarks/db-benchmark/join-datafusion.py b/benchmarks/db-benchmark/join-datafusion.py
index b45ebf632..039868031 100755
--- a/benchmarks/db-benchmark/join-datafusion.py
+++ b/benchmarks/db-benchmark/join-datafusion.py
@@ -29,7 +29,7 @@
 exec(open("./_helpers/helpers.py").read())
 
 
-def ans_shape(batches):
+def ans_shape(batches) -> tuple[int, int]:
     rows, cols = 0, 0
     for batch in batches:
         rows += batch.num_rows
@@ -57,7 +57,8 @@ def ans_shape(batches):
     os.path.join("data", y_data_name[2] + ".csv"),
 ]
 if len(src_jn_y) != 3:
-    raise Exception("Something went wrong in preparing files used for join")
+    error_msg = "Something went wrong in preparing files used for join"
+    raise Exception(error_msg)
 
 print(
     "loading datasets "
diff --git a/benchmarks/tpch/tpch.py b/benchmarks/tpch/tpch.py
index bfb9ac398..2d1bbae5b 100644
--- a/benchmarks/tpch/tpch.py
+++ b/benchmarks/tpch/tpch.py
@@ -21,7 +21,7 @@
 from datafusion import SessionContext
 
 
-def bench(data_path, query_path):
+def bench(data_path, query_path) -> None:
     with open("results.csv", "w") as results:
         # register tables
         start = time.time()
@@ -68,10 +68,7 @@ def bench(data_path, query_path):
             with open(f"{query_path}/q{query}.sql") as f:
                 text = f.read()
                 tmp = text.split(";")
-                queries = []
-                for str in tmp:
-                    if len(str.strip()) > 0:
-                        queries.append(str.strip())
+                queries = [s.strip() for s in tmp if len(s.strip()) > 0]
 
                 try:
                     start = time.time()
diff --git a/dev/release/generate-changelog.py b/dev/release/generate-changelog.py
index e30e2def2..d86736773 100755
--- a/dev/release/generate-changelog.py
+++ b/dev/release/generate-changelog.py
@@ -24,7 +24,7 @@
 from github import Github
 
 
-def print_pulls(repo_name, title, pulls):
+def print_pulls(repo_name, title, pulls) -> None:
     if len(pulls) > 0:
         print(f"**{title}:**")
         print()
@@ -34,7 +34,7 @@ def print_pulls(repo_name, title, pulls):
         print()
 
 
-def generate_changelog(repo, repo_name, tag1, tag2, version):
+def generate_changelog(repo, repo_name, tag1, tag2, version) -> None:
     # get a list of commits between two tags
     print(f"Fetching list of commits between {tag1} and {tag2}", file=sys.stderr)
     comparison = repo.compare(tag1, tag2)
@@ -154,7 +154,7 @@ def generate_changelog(repo, repo_name, tag1, tag2, version):
     )
 
 
-def cli(args=None):
+def cli(args=None) -> None:
     """Process command line arguments."""
     if not args:
         args = sys.argv[1:]
diff --git a/docs/source/conf.py b/docs/source/conf.py
index c82a189e0..0be03d81d 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -73,7 +73,7 @@
 autoapi_python_class_content = "both"
 
 
-def autoapi_skip_member_fn(app, what, name, obj, skip, options):  # noqa: ARG001
+def autoapi_skip_member_fn(app, what, name, obj, skip, options) -> bool:  # noqa: ARG001
     skip_contents = [
         # Re-exports
         ("class", "datafusion.DataFrame"),
@@ -93,7 +93,7 @@ def autoapi_skip_member_fn(app, what, name, obj, skip, options):  # noqa: ARG001
     return skip
 
 
-def setup(sphinx):
+def setup(sphinx) -> None:
     sphinx.connect("autoapi-skip-member", autoapi_skip_member_fn)
 
 
diff --git a/examples/create-context.py b/examples/create-context.py
index 760c8513e..0026d6162 100644
--- a/examples/create-context.py
+++ b/examples/create-context.py
@@ -25,14 +25,14 @@
 runtime = RuntimeEnvBuilder().with_disk_manager_os().with_fair_spill_pool(10000000)
 config = (
     SessionConfig()
-    .with_create_default_catalog_and_schema(True)
+    .with_create_default_catalog_and_schema(enabled=True)
     .with_default_catalog_and_schema("foo", "bar")
     .with_target_partitions(8)
-    .with_information_schema(True)
-    .with_repartition_joins(False)
-    .with_repartition_aggregations(False)
-    .with_repartition_windows(False)
-    .with_parquet_pruning(False)
+    .with_information_schema(enabled=True)
+    .with_repartition_joins(enabled=False)
+    .with_repartition_aggregations(enabled=False)
+    .with_repartition_windows(enabled=False)
+    .with_parquet_pruning(enabled=False)
     .set("datafusion.execution.parquet.pushdown_filters", "true")
 )
 ctx = SessionContext(config, runtime)
diff --git a/examples/python-udaf.py b/examples/python-udaf.py
index 538f69571..6655edb0a 100644
--- a/examples/python-udaf.py
+++ b/examples/python-udaf.py
@@ -16,7 +16,7 @@
 # under the License.
 
 import datafusion
-import pyarrow
+import pyarrow as pa
 import pyarrow.compute
 from datafusion import Accumulator, col, udaf
 
@@ -26,25 +26,21 @@ class MyAccumulator(Accumulator):
     Interface of a user-defined accumulation.
     """
 
-    def __init__(self):
-        self._sum = pyarrow.scalar(0.0)
+    def __init__(self) -> None:
+        self._sum = pa.scalar(0.0)
 
-    def update(self, values: pyarrow.Array) -> None:
+    def update(self, values: pa.Array) -> None:
         # not nice since pyarrow scalars can't be summed yet. This breaks on `None`
-        self._sum = pyarrow.scalar(
-            self._sum.as_py() + pyarrow.compute.sum(values).as_py()
-        )
+        self._sum = pa.scalar(self._sum.as_py() + pa.compute.sum(values).as_py())
 
-    def merge(self, states: pyarrow.Array) -> None:
+    def merge(self, states: pa.Array) -> None:
         # not nice since pyarrow scalars can't be summed yet. This breaks on `None`
-        self._sum = pyarrow.scalar(
-            self._sum.as_py() + pyarrow.compute.sum(states).as_py()
-        )
+        self._sum = pa.scalar(self._sum.as_py() + pa.compute.sum(states).as_py())
 
-    def state(self) -> pyarrow.Array:
-        return pyarrow.array([self._sum.as_py()])
+    def state(self) -> pa.Array:
+        return pa.array([self._sum.as_py()])
 
-    def evaluate(self) -> pyarrow.Scalar:
+    def evaluate(self) -> pa.Scalar:
         return self._sum
 
 
@@ -52,17 +48,17 @@ def evaluate(self) -> pyarrow.Scalar:
 ctx = datafusion.SessionContext()
 
 # create a RecordBatch and a new DataFrame from it
-batch = pyarrow.RecordBatch.from_arrays(
-    [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
+batch = pa.RecordBatch.from_arrays(
+    [pa.array([1, 2, 3]), pa.array([4, 5, 6])],
     names=["a", "b"],
 )
 df = ctx.create_dataframe([[batch]])
 
 my_udaf = udaf(
     MyAccumulator,
-    pyarrow.float64(),
-    pyarrow.float64(),
-    [pyarrow.float64()],
+    pa.float64(),
+    pa.float64(),
+    [pa.float64()],
     "stable",
 )
 
@@ -70,4 +66,4 @@ def evaluate(self) -> pyarrow.Scalar:
 
 result = df.collect()[0]
 
-assert result.column(0) == pyarrow.array([6.0])
+assert result.column(0) == pa.array([6.0])
diff --git a/examples/python-udf-comparisons.py b/examples/python-udf-comparisons.py
index c5d5ec8dd..eb0825011 100644
--- a/examples/python-udf-comparisons.py
+++ b/examples/python-udf-comparisons.py
@@ -112,8 +112,8 @@ def is_of_interest_impl(
     returnflag_arr: pa.Array,
 ) -> pa.Array:
     result = []
-    for idx, partkey in enumerate(partkey_arr):
-        partkey = partkey.as_py()
+    for idx, partkey_val in enumerate(partkey_arr):
+        partkey = partkey_val.as_py()
         suppkey = suppkey_arr[idx].as_py()
         returnflag = returnflag_arr[idx].as_py()
         value = (partkey, suppkey, returnflag)
@@ -162,10 +162,7 @@ def udf_using_pyarrow_compute_impl(
         resultant_arr = pc.and_(filtered_partkey_arr, filtered_suppkey_arr)
         resultant_arr = pc.and_(resultant_arr, filtered_returnflag_arr)
 
-        if results is None:
-            results = resultant_arr
-        else:
-            results = pc.or_(results, resultant_arr)
+        results = resultant_arr if results is None else pc.or_(results, resultant_arr)
 
     return results
 
diff --git a/examples/python-udf.py b/examples/python-udf.py
index fb2bc253e..1c08acd1a 100644
--- a/examples/python-udf.py
+++ b/examples/python-udf.py
@@ -15,23 +15,23 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import pyarrow
+import pyarrow as pa
 from datafusion import SessionContext, udf
 from datafusion import functions as f
 
 
-def is_null(array: pyarrow.Array) -> pyarrow.Array:
+def is_null(array: pa.Array) -> pa.Array:
     return array.is_null()
 
 
-is_null_arr = udf(is_null, [pyarrow.int64()], pyarrow.bool_(), "stable")
+is_null_arr = udf(is_null, [pa.int64()], pa.bool_(), "stable")
 
 # create a context
 ctx = SessionContext()
 
 # create a RecordBatch and a new DataFrame from it
-batch = pyarrow.RecordBatch.from_arrays(
-    [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
+batch = pa.RecordBatch.from_arrays(
+    [pa.array([1, 2, 3]), pa.array([4, 5, 6])],
     names=["a", "b"],
 )
 df = ctx.create_dataframe([[batch]])
@@ -40,4 +40,4 @@ def is_null(array: pyarrow.Array) -> pyarrow.Array:
 
 result = df.collect()[0]
 
-assert result.column(0) == pyarrow.array([False] * 3)
+assert result.column(0) == pa.array([False] * 3)
diff --git a/examples/query-pyarrow-data.py b/examples/query-pyarrow-data.py
index e3456fb5b..9cfe8a62b 100644
--- a/examples/query-pyarrow-data.py
+++ b/examples/query-pyarrow-data.py
@@ -16,15 +16,15 @@
 # under the License.
 
 import datafusion
-import pyarrow
+import pyarrow as pa
 from datafusion import col
 
 # create a context
 ctx = datafusion.SessionContext()
 
 # create a RecordBatch and a new DataFrame from it
-batch = pyarrow.RecordBatch.from_arrays(
-    [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
+batch = pa.RecordBatch.from_arrays(
+    [pa.array([1, 2, 3]), pa.array([4, 5, 6])],
     names=["a", "b"],
 )
 df = ctx.create_dataframe([[batch]])
@@ -38,5 +38,5 @@
 # execute and collect the first (and only) batch
 result = df.collect()[0]
 
-assert result.column(0) == pyarrow.array([5, 7, 9])
-assert result.column(1) == pyarrow.array([-3, -3, -3])
+assert result.column(0) == pa.array([5, 7, 9])
+assert result.column(1) == pa.array([-3, -3, -3])
diff --git a/examples/sql-using-python-udaf.py b/examples/sql-using-python-udaf.py
index 60ab8d134..32ce38900 100644
--- a/examples/sql-using-python-udaf.py
+++ b/examples/sql-using-python-udaf.py
@@ -25,7 +25,7 @@ class MyAccumulator(Accumulator):
     Interface of a user-defined accumulation.
     """
 
-    def __init__(self):
+    def __init__(self) -> None:
         self._sum = pa.scalar(0.0)
 
     def update(self, values: pa.Array) -> None:
diff --git a/examples/tpch/_tests.py b/examples/tpch/_tests.py
index 2be4dfabd..80ff80244 100644
--- a/examples/tpch/_tests.py
+++ b/examples/tpch/_tests.py
@@ -91,7 +91,7 @@ def check_q17(df):
         ("q22_global_sales_opportunity", "q22"),
     ],
 )
-def test_tpch_query_vs_answer_file(query_code: str, answer_file: str):
+def test_tpch_query_vs_answer_file(query_code: str, answer_file: str) -> None:
     module = import_module(query_code)
     df: DataFrame = module.df
 
@@ -122,3 +122,5 @@ def test_tpch_query_vs_answer_file(query_code: str, answer_file: str):
 
     assert df.join(df_expected, on=cols, how="anti").count() == 0
     assert df.count() == df_expected.count()
+
+    return None
diff --git a/examples/tpch/convert_data_to_parquet.py b/examples/tpch/convert_data_to_parquet.py
index 73097fac5..fd0fcca49 100644
--- a/examples/tpch/convert_data_to_parquet.py
+++ b/examples/tpch/convert_data_to_parquet.py
@@ -25,112 +25,112 @@
 import os
 
 import datafusion
-import pyarrow
+import pyarrow as pa
 
 ctx = datafusion.SessionContext()
 
 all_schemas = {}
 
 all_schemas["customer"] = [
-    ("C_CUSTKEY", pyarrow.int64()),
-    ("C_NAME", pyarrow.string()),
-    ("C_ADDRESS", pyarrow.string()),
-    ("C_NATIONKEY", pyarrow.int64()),
-    ("C_PHONE", pyarrow.string()),
-    ("C_ACCTBAL", pyarrow.decimal128(15, 2)),
-    ("C_MKTSEGMENT", pyarrow.string()),
-    ("C_COMMENT", pyarrow.string()),
+    ("C_CUSTKEY", pa.int64()),
+    ("C_NAME", pa.string()),
+    ("C_ADDRESS", pa.string()),
+    ("C_NATIONKEY", pa.int64()),
+    ("C_PHONE", pa.string()),
+    ("C_ACCTBAL", pa.decimal128(15, 2)),
+    ("C_MKTSEGMENT", pa.string()),
+    ("C_COMMENT", pa.string()),
 ]
 
 all_schemas["lineitem"] = [
-    ("L_ORDERKEY", pyarrow.int64()),
-    ("L_PARTKEY", pyarrow.int64()),
-    ("L_SUPPKEY", pyarrow.int64()),
-    ("L_LINENUMBER", pyarrow.int32()),
-    ("L_QUANTITY", pyarrow.decimal128(15, 2)),
-    ("L_EXTENDEDPRICE", pyarrow.decimal128(15, 2)),
-    ("L_DISCOUNT", pyarrow.decimal128(15, 2)),
-    ("L_TAX", pyarrow.decimal128(15, 2)),
-    ("L_RETURNFLAG", pyarrow.string()),
-    ("L_LINESTATUS", pyarrow.string()),
-    ("L_SHIPDATE", pyarrow.date32()),
-    ("L_COMMITDATE", pyarrow.date32()),
-    ("L_RECEIPTDATE", pyarrow.date32()),
-    ("L_SHIPINSTRUCT", pyarrow.string()),
-    ("L_SHIPMODE", pyarrow.string()),
-    ("L_COMMENT", pyarrow.string()),
+    ("L_ORDERKEY", pa.int64()),
+    ("L_PARTKEY", pa.int64()),
+    ("L_SUPPKEY", pa.int64()),
+    ("L_LINENUMBER", pa.int32()),
+    ("L_QUANTITY", pa.decimal128(15, 2)),
+    ("L_EXTENDEDPRICE", pa.decimal128(15, 2)),
+    ("L_DISCOUNT", pa.decimal128(15, 2)),
+    ("L_TAX", pa.decimal128(15, 2)),
+    ("L_RETURNFLAG", pa.string()),
+    ("L_LINESTATUS", pa.string()),
+    ("L_SHIPDATE", pa.date32()),
+    ("L_COMMITDATE", pa.date32()),
+    ("L_RECEIPTDATE", pa.date32()),
+    ("L_SHIPINSTRUCT", pa.string()),
+    ("L_SHIPMODE", pa.string()),
+    ("L_COMMENT", pa.string()),
 ]
 
 all_schemas["nation"] = [
-    ("N_NATIONKEY", pyarrow.int64()),
-    ("N_NAME", pyarrow.string()),
-    ("N_REGIONKEY", pyarrow.int64()),
-    ("N_COMMENT", pyarrow.string()),
+    ("N_NATIONKEY", pa.int64()),
+    ("N_NAME", pa.string()),
+    ("N_REGIONKEY", pa.int64()),
+    ("N_COMMENT", pa.string()),
 ]
 
 all_schemas["orders"] = [
-    ("O_ORDERKEY", pyarrow.int64()),
-    ("O_CUSTKEY", pyarrow.int64()),
-    ("O_ORDERSTATUS", pyarrow.string()),
-    ("O_TOTALPRICE", pyarrow.decimal128(15, 2)),
-    ("O_ORDERDATE", pyarrow.date32()),
-    ("O_ORDERPRIORITY", pyarrow.string()),
-    ("O_CLERK", pyarrow.string()),
-    ("O_SHIPPRIORITY", pyarrow.int32()),
-    ("O_COMMENT", pyarrow.string()),
+    ("O_ORDERKEY", pa.int64()),
+    ("O_CUSTKEY", pa.int64()),
+    ("O_ORDERSTATUS", pa.string()),
+    ("O_TOTALPRICE", pa.decimal128(15, 2)),
+    ("O_ORDERDATE", pa.date32()),
+    ("O_ORDERPRIORITY", pa.string()),
+    ("O_CLERK", pa.string()),
+    ("O_SHIPPRIORITY", pa.int32()),
+    ("O_COMMENT", pa.string()),
 ]
 
 all_schemas["part"] = [
-    ("P_PARTKEY", pyarrow.int64()),
-    ("P_NAME", pyarrow.string()),
-    ("P_MFGR", pyarrow.string()),
-    ("P_BRAND", pyarrow.string()),
-    ("P_TYPE", pyarrow.string()),
-    ("P_SIZE", pyarrow.int32()),
-    ("P_CONTAINER", pyarrow.string()),
-    ("P_RETAILPRICE", pyarrow.decimal128(15, 2)),
-    ("P_COMMENT", pyarrow.string()),
+    ("P_PARTKEY", pa.int64()),
+    ("P_NAME", pa.string()),
+    ("P_MFGR", pa.string()),
+    ("P_BRAND", pa.string()),
+    ("P_TYPE", pa.string()),
+    ("P_SIZE", pa.int32()),
+    ("P_CONTAINER", pa.string()),
+    ("P_RETAILPRICE", pa.decimal128(15, 2)),
+    ("P_COMMENT", pa.string()),
 ]
 
 all_schemas["partsupp"] = [
-    ("PS_PARTKEY", pyarrow.int64()),
-    ("PS_SUPPKEY", pyarrow.int64()),
-    ("PS_AVAILQTY", pyarrow.int32()),
-    ("PS_SUPPLYCOST", pyarrow.decimal128(15, 2)),
-    ("PS_COMMENT", pyarrow.string()),
+    ("PS_PARTKEY", pa.int64()),
+    ("PS_SUPPKEY", pa.int64()),
+    ("PS_AVAILQTY", pa.int32()),
+    ("PS_SUPPLYCOST", pa.decimal128(15, 2)),
+    ("PS_COMMENT", pa.string()),
 ]
 
 all_schemas["region"] = [
-    ("r_REGIONKEY", pyarrow.int64()),
-    ("r_NAME", pyarrow.string()),
-    ("r_COMMENT", pyarrow.string()),
+    ("r_REGIONKEY", pa.int64()),
+    ("r_NAME", pa.string()),
+    ("r_COMMENT", pa.string()),
 ]
 
 all_schemas["supplier"] = [
-    ("S_SUPPKEY", pyarrow.int64()),
-    ("S_NAME", pyarrow.string()),
-    ("S_ADDRESS", pyarrow.string()),
-    ("S_NATIONKEY", pyarrow.int32()),
-    ("S_PHONE", pyarrow.string()),
-    ("S_ACCTBAL", pyarrow.decimal128(15, 2)),
-    ("S_COMMENT", pyarrow.string()),
+    ("S_SUPPKEY", pa.int64()),
+    ("S_NAME", pa.string()),
+    ("S_ADDRESS", pa.string()),
+    ("S_NATIONKEY", pa.int32()),
+    ("S_PHONE", pa.string()),
+    ("S_ACCTBAL", pa.decimal128(15, 2)),
+    ("S_COMMENT", pa.string()),
 ]
 
 curr_dir = os.path.dirname(os.path.abspath(__file__))
-for filename, curr_schema in all_schemas.items():
+for filename, curr_schema_val in all_schemas.items():
     # For convenience, go ahead and convert the schema column names to lowercase
-    curr_schema = [(s[0].lower(), s[1]) for s in curr_schema]
+    curr_schema = [(s[0].lower(), s[1]) for s in curr_schema_val]
 
     # Pre-collect the output columns so we can ignore the null field we add
     # in to handle the trailing | in the file
     output_cols = [r[0] for r in curr_schema]
 
-    curr_schema = [pyarrow.field(r[0], r[1], nullable=False) for r in curr_schema]
+    curr_schema = [pa.field(r[0], r[1], nullable=False) for r in curr_schema]
 
     # Trailing | requires extra field for in processing
-    curr_schema.append(("some_null", pyarrow.null()))
+    curr_schema.append(("some_null", pa.null()))
 
-    schema = pyarrow.schema(curr_schema)
+    schema = pa.schema(curr_schema)
 
     source_file = os.path.abspath(
         os.path.join(curr_dir, f"../../benchmarks/tpch/data/{filename}.csv")
diff --git a/examples/tpch/q08_market_share.py b/examples/tpch/q08_market_share.py
index d46df30f2..4bf50efba 100644
--- a/examples/tpch/q08_market_share.py
+++ b/examples/tpch/q08_market_share.py
@@ -150,7 +150,7 @@
 df = df.with_column(
     "national_volume",
     F.case(col("s_suppkey").is_null())
-    .when(lit(False), col("volume"))
+    .when(lit(value=False), col("volume"))
     .otherwise(lit(0.0)),
 )
 
diff --git a/examples/tpch/q19_discounted_revenue.py b/examples/tpch/q19_discounted_revenue.py
index 2b87e1120..bd492aac0 100644
--- a/examples/tpch/q19_discounted_revenue.py
+++ b/examples/tpch/q19_discounted_revenue.py
@@ -89,8 +89,8 @@ def is_of_interest(
     same number of rows in the output.
     """
     result = []
-    for idx, brand in enumerate(brand_arr):
-        brand = brand.as_py()
+    for idx, brand_val in enumerate(brand_arr):
+        brand = brand_val.as_py()
         if brand in items_of_interest:
             values_of_interest = items_of_interest[brand]
 
diff --git a/examples/tpch/q21_suppliers_kept_orders_waiting.py b/examples/tpch/q21_suppliers_kept_orders_waiting.py
index 9bbaad779..619c4406b 100644
--- a/examples/tpch/q21_suppliers_kept_orders_waiting.py
+++ b/examples/tpch/q21_suppliers_kept_orders_waiting.py
@@ -65,7 +65,7 @@
 df = df.with_column(
     "failed_supp",
     F.case(col("l_receiptdate") > col("l_commitdate"))
-    .when(lit(True), col("l_suppkey"))
+    .when(lit(value=True), col("l_suppkey"))
     .end(),
 )
 
diff --git a/pyproject.toml b/pyproject.toml
index a4ed18c4c..d86b657ec 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -80,37 +80,17 @@ ignore = [
     "TD003",   # Allow TODO lines
     "UP007",   # Disallowing Union is pedantic
     # TODO: Enable all of the following, but this PR is getting too large already
-    "PT001",
-    "ANN204",
-    "B008",
-    "EM101",
     "PLR0913",
-    "PLR1714",
-    "ANN201",
-    "C400",
     "TRY003",
-    "B904",
-    "UP006",
-    "RUF012",
-    "FBT003",
-    "C416",
-    "SIM102",
-    "PGH003",
     "PLR2004",
-    "PERF401",
     "PD901",
-    "EM102",
     "ERA001",
-    "SIM108",
-    "ICN001",
     "ANN001",
     "ANN202",
     "PTH",
     "N812",
     "INP001",
     "DTZ007",
-    "PLW2901",
-    "RET503",
     "RUF015",
     "A005",
     "TC001",
diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py
index 286e5dc31..d871fdb71 100644
--- a/python/datafusion/__init__.py
+++ b/python/datafusion/__init__.py
@@ -92,17 +92,17 @@
 ]
 
 
-def column(value: str):
+def column(value: str) -> Expr:
     """Create a column expression."""
     return Expr.column(value)
 
 
-def col(value: str):
+def col(value: str) -> Expr:
     """Create a column expression."""
     return Expr.column(value)
 
 
-def literal(value):
+def literal(value) -> Expr:
     """Create a literal expression."""
     return Expr.literal(value)
 
@@ -120,6 +120,6 @@ def str_lit(value):
     return string_literal(value)
 
 
-def lit(value):
+def lit(value) -> Expr:
     """Create a literal expression."""
     return Expr.literal(value)
diff --git a/python/datafusion/catalog.py b/python/datafusion/catalog.py
index 0560f4704..6c3f188cc 100644
--- a/python/datafusion/catalog.py
+++ b/python/datafusion/catalog.py
@@ -24,7 +24,7 @@
 import datafusion._internal as df_internal
 
 if TYPE_CHECKING:
-    import pyarrow
+    import pyarrow as pa
 
 
 class Catalog:
@@ -67,7 +67,7 @@ def __init__(self, table: df_internal.Table) -> None:
         self.table = table
 
     @property
-    def schema(self) -> pyarrow.Schema:
+    def schema(self) -> pa.Schema:
         """Returns the schema associated with this table."""
         return self.table.schema
 
diff --git a/python/datafusion/context.py b/python/datafusion/context.py
index 58ad9a943..1429a4975 100644
--- a/python/datafusion/context.py
+++ b/python/datafusion/context.py
@@ -40,9 +40,9 @@
 if TYPE_CHECKING:
     import pathlib
 
-    import pandas
-    import polars
-    import pyarrow
+    import pandas as pd
+    import polars as pl
+    import pyarrow as pa
 
     from datafusion.plan import ExecutionPlan, LogicalPlan
 
@@ -537,7 +537,7 @@ def register_listing_table(
         path: str | pathlib.Path,
         table_partition_cols: list[tuple[str, str]] | None = None,
         file_extension: str = ".parquet",
-        schema: pyarrow.Schema | None = None,
+        schema: pa.Schema | None = None,
         file_sort_order: list[list[Expr | SortExpr]] | None = None,
     ) -> None:
         """Register multiple files as a single table.
@@ -606,14 +606,14 @@ def sql_with_options(self, query: str, options: SQLOptions) -> DataFrame:
 
     def create_dataframe(
         self,
-        partitions: list[list[pyarrow.RecordBatch]],
+        partitions: list[list[pa.RecordBatch]],
         name: str | None = None,
-        schema: pyarrow.Schema | None = None,
+        schema: pa.Schema | None = None,
     ) -> DataFrame:
         """Create and return a dataframe using the provided partitions.
 
         Args:
-            partitions: :py:class:`pyarrow.RecordBatch` partitions to register.
+            partitions: :py:class:`pa.RecordBatch` partitions to register.
             name: Resultant dataframe name.
             schema: Schema for the partitions.
 
@@ -684,16 +684,14 @@ def from_arrow(
         return DataFrame(self.ctx.from_arrow(data, name))
 
     @deprecated("Use ``from_arrow`` instead.")
-    def from_arrow_table(
-        self, data: pyarrow.Table, name: str | None = None
-    ) -> DataFrame:
+    def from_arrow_table(self, data: pa.Table, name: str | None = None) -> DataFrame:
         """Create a :py:class:`~datafusion.dataframe.DataFrame` from an Arrow table.
 
         This is an alias for :py:func:`from_arrow`.
         """
         return self.from_arrow(data, name)
 
-    def from_pandas(self, data: pandas.DataFrame, name: str | None = None) -> DataFrame:
+    def from_pandas(self, data: pd.DataFrame, name: str | None = None) -> DataFrame:
         """Create a :py:class:`~datafusion.dataframe.DataFrame` from a Pandas DataFrame.
 
         Args:
@@ -705,7 +703,7 @@ def from_pandas(self, data: pandas.DataFrame, name: str | None = None) -> DataFr
         """
         return DataFrame(self.ctx.from_pandas(data, name))
 
-    def from_polars(self, data: polars.DataFrame, name: str | None = None) -> DataFrame:
+    def from_polars(self, data: pl.DataFrame, name: str | None = None) -> DataFrame:
         """Create a :py:class:`~datafusion.dataframe.DataFrame` from a Polars DataFrame.
 
         Args:
@@ -719,7 +717,7 @@ def from_polars(self, data: polars.DataFrame, name: str | None = None) -> DataFr
 
     # https://github.com/apache/datafusion-python/pull/1016#discussion_r1983239116
     # is the discussion on how we arrived at adding register_view
-    def register_view(self, name: str, df: DataFrame):
+    def register_view(self, name: str, df: DataFrame) -> None:
         """Register a :py:class: `~datafusion.detaframe.DataFrame` as a view.
 
         Args:
@@ -755,7 +753,7 @@ def register_table_provider(
         self.ctx.register_table_provider(name, provider)
 
     def register_record_batches(
-        self, name: str, partitions: list[list[pyarrow.RecordBatch]]
+        self, name: str, partitions: list[list[pa.RecordBatch]]
     ) -> None:
         """Register record batches as a table.
 
@@ -776,7 +774,7 @@ def register_parquet(
         parquet_pruning: bool = True,
         file_extension: str = ".parquet",
         skip_metadata: bool = True,
-        schema: pyarrow.Schema | None = None,
+        schema: pa.Schema | None = None,
         file_sort_order: list[list[SortExpr]] | None = None,
     ) -> None:
         """Register a Parquet file as a table.
@@ -817,7 +815,7 @@ def register_csv(
         self,
         name: str,
         path: str | pathlib.Path | list[str | pathlib.Path],
-        schema: pyarrow.Schema | None = None,
+        schema: pa.Schema | None = None,
         has_header: bool = True,
         delimiter: str = ",",
         schema_infer_max_records: int = 1000,
@@ -843,10 +841,7 @@ def register_csv(
                 selected for data input.
             file_compression_type: File compression type.
         """
-        if isinstance(path, list):
-            path = [str(p) for p in path]
-        else:
-            path = str(path)
+        path = [str(p) for p in path] if isinstance(path, list) else str(path)
 
         self.ctx.register_csv(
             name,
@@ -863,7 +858,7 @@ def register_json(
         self,
         name: str,
         path: str | pathlib.Path,
-        schema: pyarrow.Schema | None = None,
+        schema: pa.Schema | None = None,
         schema_infer_max_records: int = 1000,
         file_extension: str = ".json",
         table_partition_cols: list[tuple[str, str]] | None = None,
@@ -901,7 +896,7 @@ def register_avro(
         self,
         name: str,
         path: str | pathlib.Path,
-        schema: pyarrow.Schema | None = None,
+        schema: pa.Schema | None = None,
         file_extension: str = ".avro",
         table_partition_cols: list[tuple[str, str]] | None = None,
     ) -> None:
@@ -923,8 +918,8 @@ def register_avro(
             name, str(path), schema, file_extension, table_partition_cols
         )
 
-    def register_dataset(self, name: str, dataset: pyarrow.dataset.Dataset) -> None:
-        """Register a :py:class:`pyarrow.dataset.Dataset` as a table.
+    def register_dataset(self, name: str, dataset: pa.dataset.Dataset) -> None:
+        """Register a :py:class:`pa.dataset.Dataset` as a table.
 
         Args:
             name: Name of the table to register.
@@ -975,7 +970,7 @@ def session_id(self) -> str:
     def read_json(
         self,
         path: str | pathlib.Path,
-        schema: pyarrow.Schema | None = None,
+        schema: pa.Schema | None = None,
         schema_infer_max_records: int = 1000,
         file_extension: str = ".json",
         table_partition_cols: list[tuple[str, str]] | None = None,
@@ -1012,7 +1007,7 @@ def read_json(
     def read_csv(
         self,
         path: str | pathlib.Path | list[str] | list[pathlib.Path],
-        schema: pyarrow.Schema | None = None,
+        schema: pa.Schema | None = None,
         has_header: bool = True,
         delimiter: str = ",",
         schema_infer_max_records: int = 1000,
@@ -1065,7 +1060,7 @@ def read_parquet(
         parquet_pruning: bool = True,
         file_extension: str = ".parquet",
         skip_metadata: bool = True,
-        schema: pyarrow.Schema | None = None,
+        schema: pa.Schema | None = None,
         file_sort_order: list[list[Expr | SortExpr]] | None = None,
     ) -> DataFrame:
         """Read a Parquet source into a :py:class:`~datafusion.dataframe.Dataframe`.
@@ -1110,7 +1105,7 @@ def read_parquet(
     def read_avro(
         self,
         path: str | pathlib.Path,
-        schema: pyarrow.Schema | None = None,
+        schema: pa.Schema | None = None,
         file_partition_cols: list[tuple[str, str]] | None = None,
         file_extension: str = ".avro",
     ) -> DataFrame:
diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
index d1c71c2bb..26fe8f453 100644
--- a/python/datafusion/dataframe.py
+++ b/python/datafusion/dataframe.py
@@ -26,10 +26,8 @@
     TYPE_CHECKING,
     Any,
     Iterable,
-    List,
     Literal,
     Optional,
-    Type,
     Union,
     overload,
 )
@@ -75,7 +73,7 @@ class Compression(Enum):
     LZ4_RAW = "lz4_raw"
 
     @classmethod
-    def from_str(cls: Type[Compression], value: str) -> Compression:
+    def from_str(cls: type[Compression], value: str) -> Compression:
         """Convert a string to a Compression enum value.
 
         Args:
@@ -89,11 +87,13 @@ def from_str(cls: Type[Compression], value: str) -> Compression:
         """
         try:
             return cls(value.lower())
-        except ValueError:
+        except ValueError as err:
             valid_values = str([item.value for item in Compression])
-            raise ValueError(
-                f"{value} is not a valid Compression. Valid values are: {valid_values}"
-            )
+            error_msg = f"""
+                {value} is not a valid Compression.
+                Valid values are: {valid_values}
+                """
+            raise ValueError(error_msg) from err
 
     def get_default_level(self) -> Optional[int]:
         """Get the default compression level for the compression type.
@@ -132,7 +132,7 @@ def into_view(self) -> pa.Table:
         """Convert DataFrame as a ViewTable which can be used in register_table."""
         return self.df.into_view()
 
-    def __getitem__(self, key: str | List[str]) -> DataFrame:
+    def __getitem__(self, key: str | list[str]) -> DataFrame:
         """Return a new :py:class`DataFrame` with the specified column or columns.
 
         Args:
@@ -287,8 +287,7 @@ def _simplify_expression(
                 if isinstance(expr, Expr):
                     expr_list.append(expr.expr)
                 elif isinstance(expr, Iterable):
-                    for inner_expr in expr:
-                        expr_list.append(inner_expr.expr)
+                    expr_list.extend(inner_expr.expr for inner_expr in expr)
                 else:
                     raise NotImplementedError
             if named_exprs:
@@ -513,10 +512,15 @@ def join(
         # This check is to prevent breaking API changes where users prior to
         # DF 43.0.0 would  pass the join_keys as a positional argument instead
         # of a keyword argument.
-        if isinstance(on, tuple) and len(on) == 2:
-            if isinstance(on[0], list) and isinstance(on[1], list):
-                join_keys = on  # type: ignore
-                on = None
+        if (
+            isinstance(on, tuple)
+            and len(on) == 2
+            and isinstance(on[0], list)
+            and isinstance(on[1], list)
+        ):
+            # We know this is safe because we've checked the types
+            join_keys = on  # type: ignore[assignment]
+            on = None
 
         if join_keys is not None:
             warnings.warn(
@@ -529,18 +533,17 @@ def join(
 
         if on is not None:
             if left_on is not None or right_on is not None:
-                raise ValueError(
-                    "`left_on` or `right_on` should not provided with `on`"
-                )
+                error_msg = "`left_on` or `right_on` should not provided with `on`"
+                raise ValueError(error_msg)
             left_on = on
             right_on = on
         elif left_on is not None or right_on is not None:
             if left_on is None or right_on is None:
-                raise ValueError("`left_on` and `right_on` should both be provided.")
+                error_msg = "`left_on` and `right_on` should both be provided."
+                raise ValueError(error_msg)
         else:
-            raise ValueError(
-                "either `on` or `left_on` and `right_on` should be provided."
-            )
+            error_msg = "either `on` or `left_on` and `right_on` should be provided."
+            raise ValueError(error_msg)
         if isinstance(left_on, str):
             left_on = [left_on]
         if isinstance(right_on, str):
@@ -726,9 +729,11 @@ def write_parquet(
         if isinstance(compression, str):
             compression = Compression.from_str(compression)
 
-        if compression in {Compression.GZIP, Compression.BROTLI, Compression.ZSTD}:
-            if compression_level is None:
-                compression_level = compression.get_default_level()
+        if (
+            compression in {Compression.GZIP, Compression.BROTLI, Compression.ZSTD}
+            and compression_level is None
+        ):
+            compression_level = compression.get_default_level()
 
         self.df.write_parquet(str(path), compression.value, compression_level)
 
@@ -824,7 +829,7 @@ def unnest_columns(self, *columns: str, preserve_nulls: bool = True) -> DataFram
         Returns:
             A DataFrame with the columns expanded.
         """
-        columns = [c for c in columns]
+        columns = list(columns)
         return DataFrame(self.df.unnest_columns(columns, preserve_nulls=preserve_nulls))
 
     def __arrow_c_stream__(self, requested_schema: pa.Schema) -> Any:
diff --git a/python/datafusion/expr.py b/python/datafusion/expr.py
index 77b6c272d..2697d8143 100644
--- a/python/datafusion/expr.py
+++ b/python/datafusion/expr.py
@@ -22,7 +22,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, Optional, Type
+from typing import TYPE_CHECKING, Any, ClassVar, Optional
 
 import pyarrow as pa
 
@@ -176,7 +176,7 @@ def sort_or_default(e: Expr | SortExpr) -> expr_internal.SortExpr:
     """Helper function to return a default Sort if an Expr is provided."""
     if isinstance(e, SortExpr):
         return e.raw_sort
-    return SortExpr(e, True, True).raw_sort
+    return SortExpr(e, ascending=True, nulls_first=True).raw_sort
 
 
 def sort_list_to_raw_sort_list(
@@ -439,24 +439,21 @@ def fill_null(self, value: Any | Expr | None = None) -> Expr:
             value = Expr.literal(value)
         return Expr(functions_internal.nvl(self.expr, value.expr))
 
-    _to_pyarrow_types = {
+    _to_pyarrow_types: ClassVar[dict[type, pa.DataType]] = {
         float: pa.float64(),
         int: pa.int64(),
         str: pa.string(),
         bool: pa.bool_(),
     }
 
-    def cast(
-        self, to: pa.DataType[Any] | Type[float] | Type[int] | Type[str] | Type[bool]
-    ) -> Expr:
+    def cast(self, to: pa.DataType[Any] | type[float | int | str | bool]) -> Expr:
         """Cast to a new data type."""
         if not isinstance(to, pa.DataType):
             try:
                 to = self._to_pyarrow_types[to]
-            except KeyError:
-                raise TypeError(
-                    "Expected instance of pyarrow.DataType or builtins.type"
-                )
+            except KeyError as err:
+                error_msg = "Expected instance of pyarrow.DataType or builtins.type"
+                raise TypeError(error_msg) from err
 
         return Expr(self.expr.cast(to))
 
@@ -565,9 +562,7 @@ def partition_by(self, *partition_by: Expr) -> ExprFuncBuilder:
         set parameters for either window or aggregate functions. If used on any other
         type of expression, an error will be generated when ``build()`` is called.
         """
-        return ExprFuncBuilder(
-            self.expr.partition_by(list(e.expr for e in partition_by))
-        )
+        return ExprFuncBuilder(self.expr.partition_by([e.expr for e in partition_by]))
 
     def window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder:
         """Set the frame fora  window function.
@@ -610,7 +605,7 @@ def over(self, window: Window) -> Expr:
 
 
 class ExprFuncBuilder:
-    def __init__(self, builder: expr_internal.ExprFuncBuilder):
+    def __init__(self, builder: expr_internal.ExprFuncBuilder) -> None:
         self.builder = builder
 
     def order_by(self, *exprs: Expr) -> ExprFuncBuilder:
@@ -638,7 +633,7 @@ def null_treatment(self, null_treatment: NullTreatment) -> ExprFuncBuilder:
     def partition_by(self, *partition_by: Expr) -> ExprFuncBuilder:
         """Set partitioning for window functions."""
         return ExprFuncBuilder(
-            self.builder.partition_by(list(e.expr for e in partition_by))
+            self.builder.partition_by([e.expr for e in partition_by])
         )
 
     def window_frame(self, window_frame: WindowFrame) -> ExprFuncBuilder:
@@ -693,11 +688,11 @@ def __init__(
         """
         if not isinstance(start_bound, pa.Scalar) and start_bound is not None:
             start_bound = pa.scalar(start_bound)
-            if units == "rows" or units == "groups":
+            if units in ("rows", "groups"):
                 start_bound = start_bound.cast(pa.uint64())
         if not isinstance(end_bound, pa.Scalar) and end_bound is not None:
             end_bound = pa.scalar(end_bound)
-            if units == "rows" or units == "groups":
+            if units in ("rows", "groups"):
                 end_bound = end_bound.cast(pa.uint64())
         self.window_frame = expr_internal.WindowFrame(units, start_bound, end_bound)
 
@@ -709,7 +704,7 @@ def get_lower_bound(self) -> WindowFrameBound:
         """Returns starting bound."""
         return WindowFrameBound(self.window_frame.get_lower_bound())
 
-    def get_upper_bound(self):
+    def get_upper_bound(self) -> WindowFrameBound:
         """Returns end bound."""
         return WindowFrameBound(self.window_frame.get_upper_bound())
 
diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py
index 26bac149c..5cf914e16 100644
--- a/python/datafusion/functions.py
+++ b/python/datafusion/functions.py
@@ -790,10 +790,7 @@ def regexp_count(
     """
     if flags is not None:
         flags = flags.expr
-    if start is not None:
-        start = start.expr
-    else:
-        start = Expr.expr
+    start = start.expr if start is not None else Expr.expr
     return Expr(f.regexp_count(string.expr, pattern.expr, start, flags))
 
 
@@ -817,13 +814,15 @@ def right(string: Expr, n: Expr) -> Expr:
     return Expr(f.right(string.expr, n.expr))
 
 
-def round(value: Expr, decimal_places: Expr = Expr.literal(0)) -> Expr:
+def round(value: Expr, decimal_places: Expr | None = None) -> Expr:
     """Round the argument to the nearest integer.
 
     If the optional ``decimal_places`` is specified, round to the nearest number of
     decimal places. You can specify a negative number of decimal places. For example
     ``round(lit(125.2345), lit(-2))`` would yield a value of ``100.0``.
     """
+    if decimal_places is None:
+        decimal_places = Expr.literal(0)
     return Expr(f.round(value.expr, decimal_places.expr))
 
 
diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py
index 161e1e3bb..37f2075f5 100644
--- a/python/tests/test_functions.py
+++ b/python/tests/test_functions.py
@@ -81,7 +81,7 @@ def test_literal(df):
         literal("1"),
         literal("OK"),
         literal(3.14),
-        literal(True),
+        literal(value=True),
         literal(b"hello world"),
     )
     result = df.collect()
diff --git a/python/tests/test_wrapper_coverage.py b/python/tests/test_wrapper_coverage.py
index a2de2d32b..926a65961 100644
--- a/python/tests/test_wrapper_coverage.py
+++ b/python/tests/test_wrapper_coverage.py
@@ -28,7 +28,7 @@
     from enum import EnumMeta as EnumType
 
 
-def missing_exports(internal_obj, wrapped_obj) -> None:  # noqa: C901
+def missing_exports(internal_obj, wrapped_obj) -> None:
     """
     Identify if any of the rust exposted structs or functions do not have wrappers.
 
@@ -56,9 +56,8 @@ def missing_exports(internal_obj, wrapped_obj) -> None:  # noqa: C901
         # __kwdefaults__ and __doc__. As long as these are None on the internal
         # object, it's okay to skip them. However if they do exist on the internal
         # object they must also exist on the wrapped object.
-        if internal_attr is not None:
-            if wrapped_attr is None:
-                pytest.fail(f"Missing attribute: {internal_attr_name}")
+        if internal_attr is not None and wrapped_attr is None:
+            pytest.fail(f"Missing attribute: {internal_attr_name}")
 
         if internal_attr_name in ["__self__", "__class__"]:
             continue

From 42982dad27ad03e7e9395d4c3ae3064c2b489434 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Sat, 22 Mar 2025 10:14:55 -0400
Subject: [PATCH 20/22] Improve collection during repr and repr_html (#1036)

* Improve table readout of a dataframe in jupyter notebooks by making the table scrollable and displaying the first record batch up to 2MB

* Add option to only display a portion of a cell data and the user can click on a button to toggle showing more or less

* We cannot expect that the first non-empy batch is sufficient for our 2MB limit, so switch over to collecting until we run out or use up the size

* Update python unit test to allow the additional formatting data to exist and only check the table contents

* Combining collection for repr and repr_html into one function

* Small clippy suggestion

* Collect was occuring twice on repr

* Switch to execute_stream_partitioned
---
 python/tests/test_dataframe.py |  23 ++--
 src/dataframe.rs               | 240 ++++++++++++++++++++++++++++-----
 src/utils.rs                   |   2 +-
 3 files changed, 225 insertions(+), 40 deletions(-)

diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index 384b17878..718ebf69d 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 import os
+import re
 from typing import Any
 
 import pyarrow as pa
@@ -1245,13 +1246,17 @@ def add_with_parameter(df_internal, value: Any) -> DataFrame:
 def test_dataframe_repr_html(df) -> None:
     output = df._repr_html_()
 
-    ref_html = """<table border='1'>
-        <tr><th>a</td><th>b</td><th>c</td></tr>
-        <tr><td>1</td><td>4</td><td>8</td></tr>
-        <tr><td>2</td><td>5</td><td>5</td></tr>
-        <tr><td>3</td><td>6</td><td>8</td></tr>
-        </table>
-        """
+    # Since we've added a fair bit of processing to the html output, lets just verify
+    # the values we are expecting in the table exist. Use regex and ignore everything
+    # between the <th></th> and <td></td>. We also don't want the closing > on the
+    # td and th segments because that is where the formatting data is written.
 
-    # Ignore whitespace just to make this test look cleaner
-    assert output.replace(" ", "") == ref_html.replace(" ", "")
+    headers = ["a", "b", "c"]
+    headers = [f"<th(.*?)>{v}</th>" for v in headers]
+    header_pattern = "(.*?)".join(headers)
+    assert len(re.findall(header_pattern, output, re.DOTALL)) == 1
+
+    body_data = [[1, 4, 8], [2, 5, 5], [3, 6, 8]]
+    body_lines = [f"<td(.*?)>{v}</td>" for inner in body_data for v in inner]
+    body_pattern = "(.*?)".join(body_lines)
+    assert len(re.findall(body_pattern, output, re.DOTALL)) == 1
diff --git a/src/dataframe.rs b/src/dataframe.rs
index 243e2e14f..be10b8c28 100644
--- a/src/dataframe.rs
+++ b/src/dataframe.rs
@@ -31,9 +31,11 @@ use datafusion::common::UnnestOptions;
 use datafusion::config::{CsvOptions, TableParquetOptions};
 use datafusion::dataframe::{DataFrame, DataFrameWriteOptions};
 use datafusion::datasource::TableProvider;
+use datafusion::error::DataFusionError;
 use datafusion::execution::SendableRecordBatchStream;
 use datafusion::parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel};
 use datafusion::prelude::*;
+use futures::{StreamExt, TryStreamExt};
 use pyo3::exceptions::PyValueError;
 use pyo3::prelude::*;
 use pyo3::pybacked::PyBackedStr;
@@ -70,6 +72,9 @@ impl PyTableProvider {
         PyTable::new(table_provider)
     }
 }
+const MAX_TABLE_BYTES_TO_DISPLAY: usize = 2 * 1024 * 1024; // 2 MB
+const MIN_TABLE_ROWS_TO_DISPLAY: usize = 20;
+const MAX_LENGTH_CELL_WITHOUT_MINIMIZE: usize = 25;
 
 /// A PyDataFrame is a representation of a logical plan and an API to compose statements.
 /// Use it to build a plan and `.collect()` to execute the plan and collect the result.
@@ -111,56 +116,151 @@ impl PyDataFrame {
     }
 
     fn __repr__(&self, py: Python) -> PyDataFusionResult<String> {
-        let df = self.df.as_ref().clone().limit(0, Some(10))?;
-        let batches = wait_for_future(py, df.collect())?;
-        let batches_as_string = pretty::pretty_format_batches(&batches);
-        match batches_as_string {
-            Ok(batch) => Ok(format!("DataFrame()\n{batch}")),
-            Err(err) => Ok(format!("Error: {:?}", err.to_string())),
+        let (batches, has_more) = wait_for_future(
+            py,
+            collect_record_batches_to_display(self.df.as_ref().clone(), 10, 10),
+        )?;
+        if batches.is_empty() {
+            // This should not be reached, but do it for safety since we index into the vector below
+            return Ok("No data to display".to_string());
         }
-    }
 
-    fn _repr_html_(&self, py: Python) -> PyDataFusionResult<String> {
-        let mut html_str = "<table border='1'>\n".to_string();
+        let batches_as_displ =
+            pretty::pretty_format_batches(&batches).map_err(py_datafusion_err)?;
+
+        let additional_str = match has_more {
+            true => "\nData truncated.",
+            false => "",
+        };
 
-        let df = self.df.as_ref().clone().limit(0, Some(10))?;
-        let batches = wait_for_future(py, df.collect())?;
+        Ok(format!("DataFrame()\n{batches_as_displ}{additional_str}"))
+    }
 
+    fn _repr_html_(&self, py: Python) -> PyDataFusionResult<String> {
+        let (batches, has_more) = wait_for_future(
+            py,
+            collect_record_batches_to_display(
+                self.df.as_ref().clone(),
+                MIN_TABLE_ROWS_TO_DISPLAY,
+                usize::MAX,
+            ),
+        )?;
         if batches.is_empty() {
-            html_str.push_str("</table>\n");
-            return Ok(html_str);
+            // This should not be reached, but do it for safety since we index into the vector below
+            return Ok("No data to display".to_string());
         }
 
+        let table_uuid = uuid::Uuid::new_v4().to_string();
+
+        let mut html_str = "
+        <style>
+            .expandable-container {
+                display: inline-block;
+                max-width: 200px;
+            }
+            .expandable {
+                white-space: nowrap;
+                overflow: hidden;
+                text-overflow: ellipsis;
+                display: block;
+            }
+            .full-text {
+                display: none;
+                white-space: normal;
+            }
+            .expand-btn {
+                cursor: pointer;
+                color: blue;
+                text-decoration: underline;
+                border: none;
+                background: none;
+                font-size: inherit;
+                display: block;
+                margin-top: 5px;
+            }
+        </style>
+
+        <div style=\"width: 100%; max-width: 1000px; max-height: 300px; overflow: auto; border: 1px solid #ccc;\">
+            <table style=\"border-collapse: collapse; min-width: 100%\">
+                <thead>\n".to_string();
+
         let schema = batches[0].schema();
 
         let mut header = Vec::new();
         for field in schema.fields() {
-            header.push(format!("<th>{}</td>", field.name()));
+            header.push(format!("<th style='border: 1px solid black; padding: 8px; text-align: left; background-color: #f2f2f2; white-space: nowrap; min-width: fit-content; max-width: fit-content;'>{}</th>", field.name()));
         }
         let header_str = header.join("");
-        html_str.push_str(&format!("<tr>{}</tr>\n", header_str));
-
-        for batch in batches {
-            let formatters = batch
-                .columns()
-                .iter()
-                .map(|c| ArrayFormatter::try_new(c.as_ref(), &FormatOptions::default()))
-                .map(|c| {
-                    c.map_err(|e| PyValueError::new_err(format!("Error: {:?}", e.to_string())))
-                })
-                .collect::<Result<Vec<_>, _>>()?;
-
-            for row in 0..batch.num_rows() {
+        html_str.push_str(&format!("<tr>{}</tr></thead><tbody>\n", header_str));
+
+        let batch_formatters = batches
+            .iter()
+            .map(|batch| {
+                batch
+                    .columns()
+                    .iter()
+                    .map(|c| ArrayFormatter::try_new(c.as_ref(), &FormatOptions::default()))
+                    .map(|c| {
+                        c.map_err(|e| PyValueError::new_err(format!("Error: {:?}", e.to_string())))
+                    })
+                    .collect::<Result<Vec<_>, _>>()
+            })
+            .collect::<Result<Vec<_>, _>>()?;
+
+        let rows_per_batch = batches.iter().map(|batch| batch.num_rows());
+
+        // We need to build up row by row for html
+        let mut table_row = 0;
+        for (batch_formatter, num_rows_in_batch) in batch_formatters.iter().zip(rows_per_batch) {
+            for batch_row in 0..num_rows_in_batch {
+                table_row += 1;
                 let mut cells = Vec::new();
-                for formatter in &formatters {
-                    cells.push(format!("<td>{}</td>", formatter.value(row)));
+                for (col, formatter) in batch_formatter.iter().enumerate() {
+                    let cell_data = formatter.value(batch_row).to_string();
+                    // From testing, primitive data types do not typically get larger than 21 characters
+                    if cell_data.len() > MAX_LENGTH_CELL_WITHOUT_MINIMIZE {
+                        let short_cell_data = &cell_data[0..MAX_LENGTH_CELL_WITHOUT_MINIMIZE];
+                        cells.push(format!("
+                            <td style='border: 1px solid black; padding: 8px; text-align: left; white-space: nowrap;'>
+                                <div class=\"expandable-container\">
+                                    <span class=\"expandable\" id=\"{table_uuid}-min-text-{table_row}-{col}\">{short_cell_data}</span>
+                                    <span class=\"full-text\" id=\"{table_uuid}-full-text-{table_row}-{col}\">{cell_data}</span>
+                                    <button class=\"expand-btn\" onclick=\"toggleDataFrameCellText('{table_uuid}',{table_row},{col})\">...</button>
+                                </div>
+                            </td>"));
+                    } else {
+                        cells.push(format!("<td style='border: 1px solid black; padding: 8px; text-align: left; white-space: nowrap;'>{}</td>", formatter.value(batch_row)));
+                    }
                 }
                 let row_str = cells.join("");
                 html_str.push_str(&format!("<tr>{}</tr>\n", row_str));
             }
         }
+        html_str.push_str("</tbody></table></div>\n");
+
+        html_str.push_str("
+            <script>
+            function toggleDataFrameCellText(table_uuid, row, col) {
+                var shortText = document.getElementById(table_uuid + \"-min-text-\" + row + \"-\" + col);
+                var fullText = document.getElementById(table_uuid + \"-full-text-\" + row + \"-\" + col);
+                var button = event.target;
+
+                if (fullText.style.display === \"none\") {
+                    shortText.style.display = \"none\";
+                    fullText.style.display = \"inline\";
+                    button.textContent = \"(less)\";
+                } else {
+                    shortText.style.display = \"inline\";
+                    fullText.style.display = \"none\";
+                    button.textContent = \"...\";
+                }
+            }
+            </script>
+        ");
 
-        html_str.push_str("</table>\n");
+        if has_more {
+            html_str.push_str("Data truncated due to size.");
+        }
 
         Ok(html_str)
     }
@@ -771,3 +871,83 @@ fn record_batch_into_schema(
 
     RecordBatch::try_new(schema, data_arrays)
 }
+
+/// This is a helper function to return the first non-empty record batch from executing a DataFrame.
+/// It additionally returns a bool, which indicates if there are more record batches available.
+/// We do this so we can determine if we should indicate to the user that the data has been
+/// truncated. This collects until we have achived both of these two conditions
+///
+/// - We have collected our minimum number of rows
+/// - We have reached our limit, either data size or maximum number of rows
+///
+/// Otherwise it will return when the stream has exhausted. If you want a specific number of
+/// rows, set min_rows == max_rows.
+async fn collect_record_batches_to_display(
+    df: DataFrame,
+    min_rows: usize,
+    max_rows: usize,
+) -> Result<(Vec<RecordBatch>, bool), DataFusionError> {
+    let partitioned_stream = df.execute_stream_partitioned().await?;
+    let mut stream = futures::stream::iter(partitioned_stream).flatten();
+    let mut size_estimate_so_far = 0;
+    let mut rows_so_far = 0;
+    let mut record_batches = Vec::default();
+    let mut has_more = false;
+
+    while (size_estimate_so_far < MAX_TABLE_BYTES_TO_DISPLAY && rows_so_far < max_rows)
+        || rows_so_far < min_rows
+    {
+        let mut rb = match stream.next().await {
+            None => {
+                break;
+            }
+            Some(Ok(r)) => r,
+            Some(Err(e)) => return Err(e),
+        };
+
+        let mut rows_in_rb = rb.num_rows();
+        if rows_in_rb > 0 {
+            size_estimate_so_far += rb.get_array_memory_size();
+
+            if size_estimate_so_far > MAX_TABLE_BYTES_TO_DISPLAY {
+                let ratio = MAX_TABLE_BYTES_TO_DISPLAY as f32 / size_estimate_so_far as f32;
+                let total_rows = rows_in_rb + rows_so_far;
+
+                let mut reduced_row_num = (total_rows as f32 * ratio).round() as usize;
+                if reduced_row_num < min_rows {
+                    reduced_row_num = min_rows.min(total_rows);
+                }
+
+                let limited_rows_this_rb = reduced_row_num - rows_so_far;
+                if limited_rows_this_rb < rows_in_rb {
+                    rows_in_rb = limited_rows_this_rb;
+                    rb = rb.slice(0, limited_rows_this_rb);
+                    has_more = true;
+                }
+            }
+
+            if rows_in_rb + rows_so_far > max_rows {
+                rb = rb.slice(0, max_rows - rows_so_far);
+                has_more = true;
+            }
+
+            rows_so_far += rb.num_rows();
+            record_batches.push(rb);
+        }
+    }
+
+    if record_batches.is_empty() {
+        return Ok((Vec::default(), false));
+    }
+
+    if !has_more {
+        // Data was not already truncated, so check to see if more record batches remain
+        has_more = match stream.try_next().await {
+            Ok(None) => false, // reached end
+            Ok(Some(_)) => true,
+            Err(_) => false, // Stream disconnected
+        };
+    }
+
+    Ok((record_batches, has_more))
+}
diff --git a/src/utils.rs b/src/utils.rs
index 999aad755..3487de21b 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -42,7 +42,7 @@ pub(crate) fn get_tokio_runtime() -> &'static TokioRuntime {
 #[inline]
 pub(crate) fn get_global_ctx() -> &'static SessionContext {
     static CTX: OnceLock<SessionContext> = OnceLock::new();
-    CTX.get_or_init(|| SessionContext::new())
+    CTX.get_or_init(SessionContext::new)
 }
 
 /// Utility to collect rust futures with GIL released

From d0315ffa704aba467f769f444208b7ce26d83037 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Sat, 22 Mar 2025 14:37:24 -0400
Subject: [PATCH 21/22] feat: Update DataFusion dependency to 46 (#1079)

* Update DataFusion dependency to 46

* There was an update upstream in the exec but it is not a breaking change and only needs unit test updates
---
 Cargo.lock                     | 296 +++++++++++++++++++--------------
 Cargo.toml                     |  18 +-
 python/tests/test_dataframe.py |   3 +-
 src/expr.rs                    |  39 +++--
 src/expr/aggregate.rs          |  10 +-
 src/expr/aggregate_expr.rs     |  11 +-
 src/expr/window.rs             |  24 ++-
 src/functions.rs               |  34 ++--
 8 files changed, 252 insertions(+), 183 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 5c7f2bf3c..3a4915f23 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -179,9 +179,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "arrow"
-version = "54.2.0"
+version = "54.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "755b6da235ac356a869393c23668c663720b8749dd6f15e52b6c214b4b964cc7"
+checksum = "84ef243634a39fb6e9d1710737e7a5ef96c9bacabd2326859ff889bc9ef755e5"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -201,9 +201,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-arith"
-version = "54.2.0"
+version = "54.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64656a1e0b13ca766f8440752e9a93e11014eec7b67909986f83ed0ab1fe37b8"
+checksum = "8f420c6aef51dad2e4a96ce29c0ec90ad84880bdb60b321c74c652a6be07b93f"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -215,9 +215,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-array"
-version = "54.2.0"
+version = "54.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "57a4a6d2896083cfbdf84a71a863b22460d0708f8206a8373c52e326cc72ea1a"
+checksum = "24bda5ff6461a4ff9739959b3d57b377f45e3f878f7be1a4f28137c0a8f339fa"
 dependencies = [
  "ahash",
  "arrow-buffer",
@@ -232,9 +232,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-buffer"
-version = "54.2.0"
+version = "54.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cef870583ce5e4f3b123c181706f2002fb134960f9a911900f64ba4830c7a43a"
+checksum = "bc6ed265c73f134a583d02c3cab5e16afab9446d8048ede8707e31f85fad58a0"
 dependencies = [
  "bytes",
  "half",
@@ -243,9 +243,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-cast"
-version = "54.2.0"
+version = "54.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ac7eba5a987f8b4a7d9629206ba48e19a1991762795bbe5d08497b7736017ee"
+checksum = "01c648572391edcef10e5fd458db70ba27ed6f71bcaee04397d0cfb100b34f8b"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -264,9 +264,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-csv"
-version = "54.2.0"
+version = "54.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "90f12542b8164398fc9ec595ff783c4cf6044daa89622c5a7201be920e4c0d4c"
+checksum = "a02fb265a6d8011a7d3ad1a36f25816ad0a3bb04cb8e9fe7929c165b98c0cbcd"
 dependencies = [
  "arrow-array",
  "arrow-cast",
@@ -280,9 +280,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-data"
-version = "54.2.0"
+version = "54.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b095e8a4f3c309544935d53e04c3bfe4eea4e71c3de6fe0416d1f08bb4441a83"
+checksum = "5f2cebf504bb6a92a134a87fff98f01b14fbb3a93ecf7aef90cd0f888c5fffa4"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
@@ -292,9 +292,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ipc"
-version = "54.2.0"
+version = "54.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "65c63da4afedde2b25ef69825cd4663ca76f78f79ffe2d057695742099130ff6"
+checksum = "8e6405b287671c88846e7751f7291f717b164911474cabac6d3d8614d5aa7374"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -306,9 +306,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-json"
-version = "54.2.0"
+version = "54.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9551d9400532f23a370cabbea1dc5a53c49230397d41f96c4c8eedf306199305"
+checksum = "5329bf9e7390cbb6b117ddd4d82e94c5362ea4cab5095697139429f36a38350c"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -319,16 +319,18 @@ dependencies = [
  "half",
  "indexmap",
  "lexical-core",
+ "memchr",
  "num",
  "serde",
  "serde_json",
+ "simdutf8",
 ]
 
 [[package]]
 name = "arrow-ord"
-version = "54.2.0"
+version = "54.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c07223476f8219d1ace8cd8d85fa18c4ebd8d945013f25ef5c72e85085ca4ee"
+checksum = "e103c13d4b80da28339c1d7aa23dd85bd59f42158acc45d39eeb6770627909ce"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -339,9 +341,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-row"
-version = "54.2.0"
+version = "54.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91b194b38bfd89feabc23e798238989c6648b2506ad639be42ec8eb1658d82c4"
+checksum = "170549a11b8534f3097a0619cfe89c42812345dc998bcf81128fc700b84345b8"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -352,18 +354,18 @@ dependencies = [
 
 [[package]]
 name = "arrow-schema"
-version = "54.2.0"
+version = "54.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f40f6be8f78af1ab610db7d9b236e21d587b7168e368a36275d2e5670096735"
+checksum = "a5c53775bba63f319189f366d2b86e9a8889373eb198f07d8544938fc9f8ed9a"
 dependencies = [
  "bitflags 2.8.0",
 ]
 
 [[package]]
 name = "arrow-select"
-version = "54.2.0"
+version = "54.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac265273864a820c4a179fc67182ccc41ea9151b97024e1be956f0f2369c2539"
+checksum = "0a99003b2eb562b8d9c99dfb672306f15e94b20d3734179d596895703e821dcf"
 dependencies = [
  "ahash",
  "arrow-array",
@@ -375,9 +377,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-string"
-version = "54.2.0"
+version = "54.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d44c8eed43be4ead49128370f7131f054839d3d6003e52aebf64322470b8fbd0"
+checksum = "90fdb130ee8325f4cd8262e19bb6baa3cbcef2b2573c4bee8c6fda7ea08199d7"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -535,9 +537,9 @@ dependencies = [
 
 [[package]]
 name = "blake3"
-version = "1.5.5"
+version = "1.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e"
+checksum = "b17679a8d69b6d7fd9cd9801a536cec9fa5e5970b69f9d4747f70b39b031f5e7"
 dependencies = [
  "arrayref",
  "arrayvec",
@@ -649,15 +651,15 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
 
 [[package]]
 name = "chrono"
-version = "0.4.39"
+version = "0.4.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825"
+checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c"
 dependencies = [
  "android-tzdata",
  "iana-time-zone",
  "num-traits",
  "serde",
- "windows-targets",
+ "windows-link",
 ]
 
 [[package]]
@@ -864,30 +866,32 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a"
+checksum = "914e6f9525599579abbd90b0f7a55afcaaaa40350b9e9ed52563f126dfe45fd3"
 dependencies = [
  "apache-avro",
  "arrow",
- "arrow-array",
  "arrow-ipc",
  "arrow-schema",
- "async-compression",
  "async-trait",
  "bytes",
  "bzip2 0.5.1",
  "chrono",
  "datafusion-catalog",
+ "datafusion-catalog-listing",
  "datafusion-common",
  "datafusion-common-runtime",
+ "datafusion-datasource",
  "datafusion-execution",
  "datafusion-expr",
+ "datafusion-expr-common",
  "datafusion-functions",
  "datafusion-functions-aggregate",
  "datafusion-functions-nested",
  "datafusion-functions-table",
  "datafusion-functions-window",
+ "datafusion-macros",
  "datafusion-optimizer",
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
@@ -896,7 +900,6 @@ dependencies = [
  "datafusion-sql",
  "flate2",
  "futures",
- "glob",
  "itertools 0.14.0",
  "log",
  "num-traits",
@@ -908,7 +911,6 @@ dependencies = [
  "sqlparser",
  "tempfile",
  "tokio",
- "tokio-util",
  "url",
  "uuid",
  "xz2",
@@ -917,9 +919,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1"
+checksum = "998a6549e6ee4ee3980e05590b2960446a56b343ea30199ef38acd0e0b9036e2"
 dependencies = [
  "arrow",
  "async-trait",
@@ -933,22 +935,40 @@ dependencies = [
  "itertools 0.14.0",
  "log",
  "parking_lot",
- "sqlparser",
+]
+
+[[package]]
+name = "datafusion-catalog-listing"
+version = "46.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5ac10096a5b3c0d8a227176c0e543606860842e943594ccddb45cf42a526e43"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "futures",
+ "log",
+ "object_store",
+ "tokio",
 ]
 
 [[package]]
 name = "datafusion-common"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602"
+checksum = "1f53d7ec508e1b3f68bd301cee3f649834fad51eff9240d898a4b2614cfd0a7a"
 dependencies = [
  "ahash",
  "apache-avro",
  "arrow",
- "arrow-array",
- "arrow-buffer",
  "arrow-ipc",
- "arrow-schema",
  "base64 0.22.1",
  "half",
  "hashbrown 0.14.5",
@@ -966,25 +986,59 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48"
+checksum = "e0fcf41523b22e14cc349b01526e8b9f59206653037f2949a4adbfde5f8cb668"
 dependencies = [
  "log",
  "tokio",
 ]
 
+[[package]]
+name = "datafusion-datasource"
+version = "46.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf7f37ad8b6e88b46c7eeab3236147d32ea64b823544f498455a8d9042839c92"
+dependencies = [
+ "arrow",
+ "async-compression",
+ "async-trait",
+ "bytes",
+ "bzip2 0.5.1",
+ "chrono",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "flate2",
+ "futures",
+ "glob",
+ "itertools 0.14.0",
+ "log",
+ "object_store",
+ "rand",
+ "tokio",
+ "tokio-util",
+ "url",
+ "xz2",
+ "zstd",
+]
+
 [[package]]
 name = "datafusion-doc"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66"
+checksum = "7db7a0239fd060f359dc56c6e7db726abaa92babaed2fb2e91c3a8b2fff8b256"
 
 [[package]]
 name = "datafusion-execution"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27"
+checksum = "0938f9e5b6bc5782be4111cdfb70c02b7b5451bf34fd57e4de062a7f7c4e31f1"
 dependencies = [
  "arrow",
  "dashmap",
@@ -1001,9 +1055,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1"
+checksum = "b36c28b00b00019a8695ad7f1a53ee1673487b90322ecbd604e2cf32894eb14f"
 dependencies = [
  "arrow",
  "chrono",
@@ -1022,26 +1076,25 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr-common"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885"
+checksum = "18f0a851a436c5a2139189eb4617a54e6a9ccb9edc96c4b3c83b3bb7c58b950e"
 dependencies = [
  "arrow",
  "datafusion-common",
+ "indexmap",
  "itertools 0.14.0",
  "paste",
 ]
 
 [[package]]
 name = "datafusion-ffi"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ff47a79d442207c168c6e3e1d970c248589c148e4800e5b285ac1b2cb1a230f8"
+checksum = "d740dd9f32a4f4ed1b907e6934201bb059efe6c877532512c661771d973c7b21"
 dependencies = [
  "abi_stable",
  "arrow",
- "arrow-array",
- "arrow-schema",
  "async-ffi",
  "async-trait",
  "datafusion",
@@ -1055,9 +1108,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b"
+checksum = "e3196e37d7b65469fb79fee4f05e5bb58a456831035f9a38aa5919aeb3298d40"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -1071,7 +1124,6 @@ dependencies = [
  "datafusion-expr",
  "datafusion-expr-common",
  "datafusion-macros",
- "hashbrown 0.14.5",
  "hex",
  "itertools 0.14.0",
  "log",
@@ -1085,14 +1137,12 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6"
+checksum = "adfc2d074d5ee4d9354fdcc9283d5b2b9037849237ddecb8942a29144b77ca05"
 dependencies = [
  "ahash",
  "arrow",
- "arrow-buffer",
- "arrow-schema",
  "datafusion-common",
  "datafusion-doc",
  "datafusion-execution",
@@ -1108,9 +1158,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate-common"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c"
+checksum = "1cbceba0f98d921309a9121b702bcd49289d383684cccabf9a92cda1602f3bbb"
 dependencies = [
  "ahash",
  "arrow",
@@ -1121,15 +1171,12 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-nested"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e"
+checksum = "170e27ce4baa27113ddf5f77f1a7ec484b0dbeda0c7abbd4bad3fc609c8ab71a"
 dependencies = [
  "arrow",
- "arrow-array",
- "arrow-buffer",
  "arrow-ord",
- "arrow-schema",
  "datafusion-common",
  "datafusion-doc",
  "datafusion-execution",
@@ -1145,9 +1192,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-table"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36"
+checksum = "7d3a06a7f0817ded87b026a437e7e51de7f59d48173b0a4e803aa896a7bd6bb5"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1161,9 +1208,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5"
+checksum = "d6c608b66496a1e05e3d196131eb9bebea579eed1f59e88d962baf3dda853bc6"
 dependencies = [
  "datafusion-common",
  "datafusion-doc",
@@ -1178,9 +1225,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window-common"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4"
+checksum = "da2f9d83348957b4ad0cd87b5cb9445f2651863a36592fe5484d43b49a5f8d82"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -1188,9 +1235,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-macros"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af"
+checksum = "4800e1ff7ecf8f310887e9b54c9c444b8e215ccbc7b21c2f244cfae373b1ece7"
 dependencies = [
  "datafusion-expr",
  "quote",
@@ -1199,9 +1246,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-optimizer"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54"
+checksum = "971c51c54cd309001376fae752fb15a6b41750b6d1552345c46afbfb6458801b"
 dependencies = [
  "arrow",
  "chrono",
@@ -1218,15 +1265,12 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c"
+checksum = "e1447c2c6bc8674a16be4786b4abf528c302803fafa186aa6275692570e64d85"
 dependencies = [
  "ahash",
  "arrow",
- "arrow-array",
- "arrow-buffer",
- "arrow-schema",
  "datafusion-common",
  "datafusion-expr",
  "datafusion-expr-common",
@@ -1243,13 +1287,12 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435"
+checksum = "69f8c25dcd069073a75b3d2840a79d0f81e64bdd2c05f2d3d18939afb36a7dcb"
 dependencies = [
  "ahash",
  "arrow",
- "arrow-buffer",
  "datafusion-common",
  "datafusion-expr-common",
  "hashbrown 0.14.5",
@@ -1258,12 +1301,11 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-optimizer"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb"
+checksum = "68da5266b5b9847c11d1b3404ee96b1d423814e1973e1ad3789131e5ec912763"
 dependencies = [
  "arrow",
- "arrow-schema",
  "datafusion-common",
  "datafusion-execution",
  "datafusion-expr",
@@ -1271,23 +1313,19 @@ dependencies = [
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
  "datafusion-physical-plan",
- "futures",
  "itertools 0.14.0",
  "log",
  "recursive",
- "url",
 ]
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f"
+checksum = "88cc160df00e413e370b3b259c8ea7bfbebc134d32de16325950e9e923846b7f"
 dependencies = [
  "ahash",
  "arrow",
- "arrow-array",
- "arrow-buffer",
  "arrow-ord",
  "arrow-schema",
  "async-trait",
@@ -1312,9 +1350,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-proto"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2db5d79f0c974041787b899d24dc91bdab2ff112d1942dd71356a4ce3b407e6c"
+checksum = "6f6ef4c6eb52370cb48639e25e2331a415aac0b2b0a0a472b36e26603bdf184f"
 dependencies = [
  "arrow",
  "chrono",
@@ -1328,9 +1366,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-proto-common"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de21bde1603aac0ff32cf478e47081be6e3583c6861fe8f57034da911efe7578"
+checksum = "5faf4a9bbb0d0a305fea8a6db21ba863286b53e53a212e687d2774028dd6f03f"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1362,13 +1400,11 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43"
+checksum = "325a212b67b677c0eb91447bf9a11b630f9fc4f62d8e5d145bf859f5a6b29e64"
 dependencies = [
  "arrow",
- "arrow-array",
- "arrow-schema",
  "bigdecimal",
  "datafusion-common",
  "datafusion-expr",
@@ -1381,11 +1417,10 @@ dependencies = [
 
 [[package]]
 name = "datafusion-substrait"
-version = "45.0.0"
+version = "46.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1634405abd8bd3c64c352f2da2f2aec6d80a815930257e0db0ce4ff5daf00944"
+checksum = "2c2be3226a683e02cff65181e66e62eba9f812ed0e9b7ec8fe11ac8dabf1a73f"
 dependencies = [
- "arrow-buffer",
  "async-recursion",
  "async-trait",
  "chrono",
@@ -1395,6 +1430,7 @@ dependencies = [
  "pbjson-types",
  "prost",
  "substrait",
+ "tokio",
  "url",
 ]
 
@@ -1472,9 +1508,9 @@ dependencies = [
 
 [[package]]
 name = "flate2"
-version = "1.0.35"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c"
+checksum = "11faaf5a5236997af9848be0bef4db95824b1d534ebc64d0f0c6cf3e67bd38dc"
 dependencies = [
  "crc32fast",
  "miniz_oxide",
@@ -2117,9 +2153,9 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.169"
+version = "0.2.171"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
+checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
 
 [[package]]
 name = "libflate"
@@ -2447,9 +2483,9 @@ dependencies = [
 
 [[package]]
 name = "parquet"
-version = "54.1.0"
+version = "54.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a01a0efa30bbd601ae85b375c728efdb211ade54390281628a7b16708beb235"
+checksum = "94243778210509a5a5e9e012872127180c155d73a9cd6e2df9243d213e81e100"
 dependencies = [
  "ahash",
  "arrow-array",
@@ -2479,7 +2515,6 @@ dependencies = [
  "tokio",
  "twox-hash",
  "zstd",
- "zstd-sys",
 ]
 
 [[package]]
@@ -3401,11 +3436,12 @@ dependencies = [
 
 [[package]]
 name = "sqlparser"
-version = "0.53.0"
+version = "0.54.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8"
+checksum = "c66e3b7374ad4a6af849b08b3e7a6eda0edbd82f0fd59b57e22671bf16979899"
 dependencies = [
  "log",
+ "recursive",
  "sqlparser_derive",
 ]
 
@@ -3466,9 +3502,9 @@ dependencies = [
 
 [[package]]
 name = "substrait"
-version = "0.52.3"
+version = "0.53.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5db15789cecbfdf6b1fcf2db807e767c92273bdc407ac057c2194b070c597756"
+checksum = "6fac3d70185423235f37b889764e184b81a5af4bb7c95833396ee9bd92577e1b"
 dependencies = [
  "heck",
  "pbjson",
@@ -3922,12 +3958,14 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
 
 [[package]]
 name = "uuid"
-version = "1.13.1"
+version = "1.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ced87ca4be083373936a67f8de945faa23b6b42384bd5b64434850802c6dccd0"
+checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9"
 dependencies = [
  "getrandom 0.3.1",
+ "js-sys",
  "serde",
+ "wasm-bindgen",
 ]
 
 [[package]]
@@ -4114,6 +4152,12 @@ dependencies = [
  "windows-targets",
 ]
 
+[[package]]
+name = "windows-link"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38"
+
 [[package]]
 name = "windows-registry"
 version = "0.2.0"
diff --git a/Cargo.toml b/Cargo.toml
index 50967a219..8afabdd82 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -34,24 +34,24 @@ protoc = [ "datafusion-substrait/protoc" ]
 substrait = ["dep:datafusion-substrait"]
 
 [dependencies]
-tokio = { version = "1.42", features = ["macros", "rt", "rt-multi-thread", "sync"] }
+tokio = { version = "1.43", features = ["macros", "rt", "rt-multi-thread", "sync"] }
 pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py39"] }
 pyo3-async-runtimes = { version = "0.23", features = ["tokio-runtime"]}
-arrow = { version = "54", features = ["pyarrow"] }
-datafusion = { version = "45.0.0", features = ["avro", "unicode_expressions"] }
-datafusion-substrait = { version = "45.0.0", optional = true }
-datafusion-proto = { version = "45.0.0" }
-datafusion-ffi = { version = "45.0.0" }
-prost = "0.13" # keep in line with `datafusion-substrait`
+arrow = { version = "54.2.1", features = ["pyarrow"] }
+datafusion = { version = "46.0.1", features = ["avro", "unicode_expressions"] }
+datafusion-substrait = { version = "46.0.1", optional = true }
+datafusion-proto = { version = "46.0.1" }
+datafusion-ffi = { version = "46.0.1" }
+prost = "0.13.1" # keep in line with `datafusion-substrait`
 uuid = { version = "1.12", features = ["v4"] }
 mimalloc = { version = "0.1", optional = true, default-features = false, features = ["local_dynamic_tls"] }
-async-trait = "0.1"
+async-trait = "0.1.73"
 futures = "0.3"
 object_store = { version = "0.11.0", features = ["aws", "gcp", "azure", "http"] }
 url = "2"
 
 [build-dependencies]
-prost-types = "0.13" # keep in line with `datafusion-substrait`
+prost-types = "0.13.1" # keep in line with `datafusion-substrait`
 pyo3-build-config = "0.23"
 
 [lib]
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index 718ebf69d..eda13930d 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -753,7 +753,8 @@ def test_execution_plan(aggregate_df):
     assert "AggregateExec:" in indent
     assert "CoalesceBatchesExec:" in indent
     assert "RepartitionExec:" in indent
-    assert "CsvExec:" in indent
+    assert "DataSourceExec:" in indent
+    assert "file_type=csv" in indent
 
     ctx = SessionContext()
     rows_returned = 0
diff --git a/src/expr.rs b/src/expr.rs
index d3c528eb4..561170289 100644
--- a/src/expr.rs
+++ b/src/expr.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use datafusion::logical_expr::expr::{AggregateFunctionParams, WindowFunctionParams};
 use datafusion::logical_expr::utils::exprlist_to_fields;
 use datafusion::logical_expr::{
     ExprFuncBuilder, ExprFunctionExt, LogicalPlan, WindowFunctionDefinition,
@@ -172,6 +173,7 @@ impl PyExpr {
             Expr::ScalarSubquery(value) => {
                 Ok(scalar_subquery::PyScalarSubquery::from(value.clone()).into_bound_py_any(py)?)
             }
+            #[allow(deprecated)]
             Expr::Wildcard { qualifier, options } => Err(py_unsupported_variant_err(format!(
                 "Converting Expr::Wildcard to a Python object is not implemented : {:?} {:?}",
                 qualifier, options
@@ -332,7 +334,6 @@ impl PyExpr {
             | Expr::AggregateFunction { .. }
             | Expr::WindowFunction { .. }
             | Expr::InList { .. }
-            | Expr::Wildcard { .. }
             | Expr::Exists { .. }
             | Expr::InSubquery { .. }
             | Expr::GroupingSet(..)
@@ -346,6 +347,10 @@ impl PyExpr {
             | Expr::Unnest(_)
             | Expr::IsNotUnknown(_) => RexType::Call,
             Expr::ScalarSubquery(..) => RexType::ScalarSubquery,
+            #[allow(deprecated)]
+            Expr::Wildcard { .. } => {
+                return Err(py_unsupported_variant_err("Expr::Wildcard is unsupported"))
+            }
         })
     }
 
@@ -394,11 +399,15 @@ impl PyExpr {
             | Expr::InSubquery(InSubquery { expr, .. }) => Ok(vec![PyExpr::from(*expr.clone())]),
 
             // Expr variants containing a collection of Expr(s) for operands
-            Expr::AggregateFunction(AggregateFunction { args, .. })
+            Expr::AggregateFunction(AggregateFunction {
+                params: AggregateFunctionParams { args, .. },
+                ..
+            })
             | Expr::ScalarFunction(ScalarFunction { args, .. })
-            | Expr::WindowFunction(WindowFunction { args, .. }) => {
-                Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect())
-            }
+            | Expr::WindowFunction(WindowFunction {
+                params: WindowFunctionParams { args, .. },
+                ..
+            }) => Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect()),
 
             // Expr(s) that require more specific processing
             Expr::Case(Case {
@@ -465,13 +474,17 @@ impl PyExpr {
             Expr::GroupingSet(..)
             | Expr::Unnest(_)
             | Expr::OuterReferenceColumn(_, _)
-            | Expr::Wildcard { .. }
             | Expr::ScalarSubquery(..)
             | Expr::Placeholder { .. }
             | Expr::Exists { .. } => Err(py_runtime_err(format!(
                 "Unimplemented Expr type: {}",
                 self.expr
             ))),
+
+            #[allow(deprecated)]
+            Expr::Wildcard { .. } => {
+                Err(py_unsupported_variant_err("Expr::Wildcard is unsupported"))
+            }
         }
     }
 
@@ -575,7 +588,7 @@ impl PyExpr {
             Expr::AggregateFunction(agg_fn) => {
                 let window_fn = Expr::WindowFunction(WindowFunction::new(
                     WindowFunctionDefinition::AggregateUDF(agg_fn.func.clone()),
-                    agg_fn.args.clone(),
+                    agg_fn.params.args.clone(),
                 ));
 
                 add_builder_fns_to_window(
@@ -663,16 +676,8 @@ impl PyExpr {
 
     /// Create a [Field] representing an [Expr], given an input [LogicalPlan] to resolve against
     pub fn expr_to_field(expr: &Expr, input_plan: &LogicalPlan) -> PyDataFusionResult<Arc<Field>> {
-        match expr {
-            Expr::Wildcard { .. } => {
-                // Since * could be any of the valid column names just return the first one
-                Ok(Arc::new(input_plan.schema().field(0).clone()))
-            }
-            _ => {
-                let fields = exprlist_to_fields(&[expr.clone()], input_plan)?;
-                Ok(fields[0].1.clone())
-            }
-        }
+        let fields = exprlist_to_fields(&[expr.clone()], input_plan)?;
+        Ok(fields[0].1.clone())
     }
     fn _types(expr: &Expr) -> PyResult<DataTypeMap> {
         match expr {
diff --git a/src/expr/aggregate.rs b/src/expr/aggregate.rs
index 8fc9da5b0..a99d83d23 100644
--- a/src/expr/aggregate.rs
+++ b/src/expr/aggregate.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use datafusion::common::DataFusionError;
-use datafusion::logical_expr::expr::{AggregateFunction, Alias};
+use datafusion::logical_expr::expr::{AggregateFunction, AggregateFunctionParams, Alias};
 use datafusion::logical_expr::logical_plan::Aggregate;
 use datafusion::logical_expr::Expr;
 use pyo3::{prelude::*, IntoPyObjectExt};
@@ -126,9 +126,11 @@ impl PyAggregate {
         match expr {
             // TODO: This Alias logic seems to be returning some strange results that we should investigate
             Expr::Alias(Alias { expr, .. }) => self._aggregation_arguments(expr.as_ref()),
-            Expr::AggregateFunction(AggregateFunction { func: _, args, .. }) => {
-                Ok(args.iter().map(|e| PyExpr::from(e.clone())).collect())
-            }
+            Expr::AggregateFunction(AggregateFunction {
+                func: _,
+                params: AggregateFunctionParams { args, .. },
+                ..
+            }) => Ok(args.iter().map(|e| PyExpr::from(e.clone())).collect()),
             _ => Err(py_type_err(
                 "Encountered a non Aggregate type in aggregation_arguments",
             )),
diff --git a/src/expr/aggregate_expr.rs b/src/expr/aggregate_expr.rs
index 09471097f..c09f116e3 100644
--- a/src/expr/aggregate_expr.rs
+++ b/src/expr/aggregate_expr.rs
@@ -40,7 +40,13 @@ impl From<AggregateFunction> for PyAggregateFunction {
 
 impl Display for PyAggregateFunction {
     fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
-        let args: Vec<String> = self.aggr.args.iter().map(|expr| expr.to_string()).collect();
+        let args: Vec<String> = self
+            .aggr
+            .params
+            .args
+            .iter()
+            .map(|expr| expr.to_string())
+            .collect();
         write!(f, "{}({})", self.aggr.func.name(), args.join(", "))
     }
 }
@@ -54,12 +60,13 @@ impl PyAggregateFunction {
 
     /// is this a distinct aggregate such as `COUNT(DISTINCT expr)`
     fn is_distinct(&self) -> bool {
-        self.aggr.distinct
+        self.aggr.params.distinct
     }
 
     /// Get the arguments to the aggregate function
     fn args(&self) -> Vec<PyExpr> {
         self.aggr
+            .params
             .args
             .iter()
             .map(|expr| PyExpr::from(expr.clone()))
diff --git a/src/expr/window.rs b/src/expr/window.rs
index 13deaec25..c5467bf94 100644
--- a/src/expr/window.rs
+++ b/src/expr/window.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use datafusion::common::{DataFusionError, ScalarValue};
-use datafusion::logical_expr::expr::WindowFunction;
+use datafusion::logical_expr::expr::{WindowFunction, WindowFunctionParams};
 use datafusion::logical_expr::{Expr, Window, WindowFrame, WindowFrameBound, WindowFrameUnits};
 use pyo3::{prelude::*, IntoPyObjectExt};
 use std::fmt::{self, Display, Formatter};
@@ -118,7 +118,10 @@ impl PyWindowExpr {
     /// Returns order by columns in a window function expression
     pub fn get_sort_exprs(&self, expr: PyExpr) -> PyResult<Vec<PySortExpr>> {
         match expr.expr.unalias() {
-            Expr::WindowFunction(WindowFunction { order_by, .. }) => py_sort_expr_list(&order_by),
+            Expr::WindowFunction(WindowFunction {
+                params: WindowFunctionParams { order_by, .. },
+                ..
+            }) => py_sort_expr_list(&order_by),
             other => Err(not_window_function_err(other)),
         }
     }
@@ -126,9 +129,10 @@ impl PyWindowExpr {
     /// Return partition by columns in a window function expression
     pub fn get_partition_exprs(&self, expr: PyExpr) -> PyResult<Vec<PyExpr>> {
         match expr.expr.unalias() {
-            Expr::WindowFunction(WindowFunction { partition_by, .. }) => {
-                py_expr_list(&partition_by)
-            }
+            Expr::WindowFunction(WindowFunction {
+                params: WindowFunctionParams { partition_by, .. },
+                ..
+            }) => py_expr_list(&partition_by),
             other => Err(not_window_function_err(other)),
         }
     }
@@ -136,7 +140,10 @@ impl PyWindowExpr {
     /// Return input args for window function
     pub fn get_args(&self, expr: PyExpr) -> PyResult<Vec<PyExpr>> {
         match expr.expr.unalias() {
-            Expr::WindowFunction(WindowFunction { args, .. }) => py_expr_list(&args),
+            Expr::WindowFunction(WindowFunction {
+                params: WindowFunctionParams { args, .. },
+                ..
+            }) => py_expr_list(&args),
             other => Err(not_window_function_err(other)),
         }
     }
@@ -152,7 +159,10 @@ impl PyWindowExpr {
     /// Returns a Pywindow frame for a given window function expression
     pub fn get_frame(&self, expr: PyExpr) -> Option<PyWindowFrame> {
         match expr.expr.unalias() {
-            Expr::WindowFunction(WindowFunction { window_frame, .. }) => Some(window_frame.into()),
+            Expr::WindowFunction(WindowFunction {
+                params: WindowFunctionParams { window_frame, .. },
+                ..
+            }) => Some(window_frame.into()),
             _ => None,
         }
     }
diff --git a/src/functions.rs b/src/functions.rs
index 8fac239b4..9c406b95a 100644
--- a/src/functions.rs
+++ b/src/functions.rs
@@ -17,6 +17,7 @@
 
 use datafusion::functions_aggregate::all_default_aggregate_functions;
 use datafusion::functions_window::all_default_window_functions;
+use datafusion::logical_expr::expr::WindowFunctionParams;
 use datafusion::logical_expr::ExprFunctionExt;
 use datafusion::logical_expr::WindowFrame;
 use pyo3::{prelude::*, wrap_pyfunction};
@@ -215,10 +216,7 @@ fn alias(expr: PyExpr, name: &str) -> PyResult<PyExpr> {
 #[pyfunction]
 fn col(name: &str) -> PyResult<PyExpr> {
     Ok(PyExpr {
-        expr: datafusion::logical_expr::Expr::Column(Column {
-            relation: None,
-            name: name.to_string(),
-        }),
+        expr: datafusion::logical_expr::Expr::Column(Column::new_unqualified(name)),
     })
 }
 
@@ -333,19 +331,21 @@ fn window(
     Ok(PyExpr {
         expr: datafusion::logical_expr::Expr::WindowFunction(WindowFunction {
             fun,
-            args: args.into_iter().map(|x| x.expr).collect::<Vec<_>>(),
-            partition_by: partition_by
-                .unwrap_or_default()
-                .into_iter()
-                .map(|x| x.expr)
-                .collect::<Vec<_>>(),
-            order_by: order_by
-                .unwrap_or_default()
-                .into_iter()
-                .map(|x| x.into())
-                .collect::<Vec<_>>(),
-            window_frame,
-            null_treatment: None,
+            params: WindowFunctionParams {
+                args: args.into_iter().map(|x| x.expr).collect::<Vec<_>>(),
+                partition_by: partition_by
+                    .unwrap_or_default()
+                    .into_iter()
+                    .map(|x| x.expr)
+                    .collect::<Vec<_>>(),
+                order_by: order_by
+                    .unwrap_or_default()
+                    .into_iter()
+                    .map(|x| x.into())
+                    .collect::<Vec<_>>(),
+                window_frame,
+                null_treatment: None,
+            },
         }),
     })
 }

From 1aa9cb98bc2cc070048027ac96658c69958db516 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Sat, 22 Mar 2025 14:46:09 -0400
Subject: [PATCH 22/22] Update changelog and version number

---
 Cargo.lock              |  2 +-
 Cargo.toml              |  2 +-
 dev/changelog/46.0.0.md | 73 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 75 insertions(+), 2 deletions(-)
 create mode 100644 dev/changelog/46.0.0.md

diff --git a/Cargo.lock b/Cargo.lock
index 3a4915f23..f90038c50 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1377,7 +1377,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-python"
-version = "45.2.0"
+version = "46.0.0"
 dependencies = [
  "arrow",
  "async-trait",
diff --git a/Cargo.toml b/Cargo.toml
index 8afabdd82..bc8639d4c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "datafusion-python"
-version = "45.2.0"
+version = "46.0.0"
 homepage = "https://datafusion.apache.org/python"
 repository = "https://github.com/apache/datafusion-python"
 authors = ["Apache DataFusion <dev@datafusion.apache.org>"]
diff --git a/dev/changelog/46.0.0.md b/dev/changelog/46.0.0.md
new file mode 100644
index 000000000..3e5768099
--- /dev/null
+++ b/dev/changelog/46.0.0.md
@@ -0,0 +1,73 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Apache DataFusion Python 46.0.0 Changelog
+
+This release consists of 21 commits from 11 contributors. See credits at the end of this changelog for more information.
+
+**Implemented enhancements:**
+
+- feat: reads using global ctx [#982](https://github.com/apache/datafusion-python/pull/982) (ion-elgreco)
+- feat: Implementation of udf and udaf decorator [#1040](https://github.com/apache/datafusion-python/pull/1040) (CrystalZhou0529)
+- feat: expose regex_count function [#1066](https://github.com/apache/datafusion-python/pull/1066) (nirnayroy)
+- feat: Update DataFusion dependency to 46 [#1079](https://github.com/apache/datafusion-python/pull/1079) (timsaucer)
+
+**Fixed bugs:**
+
+- fix: add to_timestamp_nanos [#1020](https://github.com/apache/datafusion-python/pull/1020) (chenkovsky)
+- fix: type checking [#993](https://github.com/apache/datafusion-python/pull/993) (chenkovsky)
+
+**Other:**
+
+- [infra] Fail Clippy on rust build warnings [#1029](https://github.com/apache/datafusion-python/pull/1029) (kevinjqliu)
+- Add user documentation for the FFI approach [#1031](https://github.com/apache/datafusion-python/pull/1031) (timsaucer)
+- build(deps): bump arrow from 54.1.0 to 54.2.0 [#1035](https://github.com/apache/datafusion-python/pull/1035) (dependabot[bot])
+- Chore: Release datafusion-python 45 [#1024](https://github.com/apache/datafusion-python/pull/1024) (timsaucer)
+- Enable Dataframe to be converted into views which can be used in register_table [#1016](https://github.com/apache/datafusion-python/pull/1016) (kosiew)
+- Add ruff check for missing futures import [#1052](https://github.com/apache/datafusion-python/pull/1052) (timsaucer)
+- Enable take comments to assign issues to users [#1058](https://github.com/apache/datafusion-python/pull/1058) (timsaucer)
+- Update python min version to 3.9 [#1043](https://github.com/apache/datafusion-python/pull/1043) (kevinjqliu)
+- feat/improve ruff test coverage [#1055](https://github.com/apache/datafusion-python/pull/1055) (timsaucer)
+- feat/making global context accessible for users [#1060](https://github.com/apache/datafusion-python/pull/1060) (jsai28)
+- Renaming Internal Structs [#1059](https://github.com/apache/datafusion-python/pull/1059) (Spaarsh)
+- test: add pytest asyncio tests [#1063](https://github.com/apache/datafusion-python/pull/1063) (jsai28)
+- Add decorator for udwf [#1061](https://github.com/apache/datafusion-python/pull/1061) (kosiew)
+- Add additional ruff suggestions [#1062](https://github.com/apache/datafusion-python/pull/1062) (Spaarsh)
+- Improve collection during repr and repr_html [#1036](https://github.com/apache/datafusion-python/pull/1036) (timsaucer)
+
+## Credits
+
+Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor.
+
+```
+     7	Tim Saucer
+     2	Kevin Liu
+     2	Spaarsh
+     2	jsai28
+     2	kosiew
+     1	Chen Chongchen
+     1	Chongchen Chen
+     1	Crystal Zhou
+     1	Ion Koutsouris
+     1	Nirnay Roy
+     1	dependabot[bot]
+```
+
+Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release.
+