Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Add Arrow C streaming, DataFrame iteration, and OOM-safe streaming execution #2207

Add Arrow C streaming, DataFrame iteration, and OOM-safe streaming execution

Add Arrow C streaming, DataFrame iteration, and OOM-safe streaming execution #2207

Workflow file for this run

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
name: Python Release Build
on:
pull_request:
branches: ["main"]
push:
tags: ["*-rc*"]
branches: ["branch-*"]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- name: Install Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- uses: astral-sh/setup-uv@v6
with:
enable-cache: true
# Use the --no-install-package to only install the dependencies
# but do not yet build the rust library
- name: Install dependencies
run: uv sync --dev --no-install-package datafusion
# Update output format to enable automatic inline annotations.
- name: Run Ruff
run: |
uv run --no-project ruff check --output-format=github python/
uv run --no-project ruff format --check python/
- name: Run codespell
run: |
uv run --no-project codespell --toml pyproject.toml
generate-license:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: astral-sh/setup-uv@v6
with:
enable-cache: true
- name: Generate license file
run: uv run --no-project python ./dev/create_license.py
- uses: actions/upload-artifact@v4
with:
name: python-wheel-license
path: LICENSE.txt
build-python-mac-win:
needs: [generate-license]
name: Mac/Win
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
python-version: ["3.10"]
os: [macos-latest, windows-latest]
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- uses: dtolnay/rust-toolchain@stable
- run: rm LICENSE.txt
- name: Download LICENSE.txt
uses: actions/download-artifact@v5
with:
name: python-wheel-license
path: .
- name: Install Protoc
uses: arduino/setup-protoc@v3
with:
version: "27.4"
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: astral-sh/setup-uv@v6
with:
enable-cache: true
- name: Build Python package
run: |
uv sync --dev --no-install-package datafusion
uv run --no-project maturin build --release --strip --features substrait
- name: List Windows wheels
if: matrix.os == 'windows-latest'
run: dir target\wheels\
# since the runner is dynamic shellcheck (from actionlint) can't infer this is powershell
# so we specify it explicitly
shell: powershell
- name: List Mac wheels
if: matrix.os != 'windows-latest'
run: find target/wheels/
- name: Archive wheels
uses: actions/upload-artifact@v4
with:
name: dist-${{ matrix.os }}
path: target/wheels/*
build-macos-x86_64:
needs: [generate-license]
name: Mac x86_64
runs-on: macos-13
strategy:
fail-fast: false
matrix:
python-version: ["3.10"]
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- uses: dtolnay/rust-toolchain@stable
- run: rm LICENSE.txt
- name: Download LICENSE.txt
uses: actions/download-artifact@v5
with:
name: python-wheel-license
path: .
- name: Install Protoc
uses: arduino/setup-protoc@v3
with:
version: "27.4"
repo-token: ${{ secrets.GITHUB_TOKEN }}
- uses: astral-sh/setup-uv@v6
with:
enable-cache: true
- name: Build Python package
run: |
uv sync --dev --no-install-package datafusion
uv run --no-project maturin build --release --strip --features substrait
- name: List Mac wheels
run: find target/wheels/
- name: Archive wheels
uses: actions/upload-artifact@v4
with:
name: dist-macos-aarch64
path: target/wheels/*
build-manylinux-x86_64:
needs: [generate-license]
name: Manylinux x86_64
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- run: rm LICENSE.txt
- name: Download LICENSE.txt
uses: actions/download-artifact@v5
with:
name: python-wheel-license
path: .
- run: cat LICENSE.txt
- name: Build wheels
uses: PyO3/maturin-action@v1
env:
RUST_BACKTRACE: 1
with:
rust-toolchain: nightly
target: x86_64
manylinux: auto
rustup-components: rust-std rustfmt # Keep them in one line due to https://github.com/PyO3/maturin-action/issues/153
args: --release --manylinux 2014 --features protoc,substrait
- name: Archive wheels
uses: actions/upload-artifact@v4
with:
name: dist-manylinux-x86_64
path: target/wheels/*
build-manylinux-aarch64:
needs: [generate-license]
name: Manylinux arm64
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- run: rm LICENSE.txt
- name: Download LICENSE.txt
uses: actions/download-artifact@v5
with:
name: python-wheel-license
path: .
- run: cat LICENSE.txt
- name: Build wheels
uses: PyO3/maturin-action@v1
env:
RUST_BACKTRACE: 1
with:
rust-toolchain: nightly
target: aarch64
# Use manylinux_2_28-cross because the manylinux2014-cross has GCC 4.8.5, which causes the build to fail
manylinux: 2_28
rustup-components: rust-std rustfmt # Keep them in one line due to https://github.com/PyO3/maturin-action/issues/153
args: --release --features protoc,substrait
- name: Archive wheels
uses: actions/upload-artifact@v4
with:
name: dist-manylinux-aarch64
path: target/wheels/*
build-sdist:
needs: [generate-license]
name: Source distribution
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- run: rm LICENSE.txt
- name: Download LICENSE.txt
uses: actions/download-artifact@v5
with:
name: python-wheel-license
path: .
- run: cat LICENSE.txt
- name: Build sdist
uses: PyO3/maturin-action@v1
with:
rust-toolchain: stable
manylinux: auto
rustup-components: rust-std rustfmt
args: --release --sdist --out dist --features protoc,substrait
- name: Assert sdist build does not generate wheels
run: |
if [ "$(ls -A target/wheels)" ]; then
echo "Error: Sdist build generated wheels"
exit 1
else
echo "Directory is clean"
fi
shell: bash
merge-build-artifacts:
runs-on: ubuntu-latest
needs:
- build-python-mac-win
- build-macos-x86_64
- build-manylinux-x86_64
- build-manylinux-aarch64
- build-sdist
steps:
- name: Merge Build Artifacts
uses: actions/upload-artifact/merge@v4
with:
name: dist
pattern: dist-*
# Documentation build job that runs after wheels are built
build-docs:
name: Build docs
runs-on: ubuntu-latest
needs: [build-manylinux-x86_64] # Only need the Linux wheel for docs
# Only run docs on main branch pushes, tags, or PRs
if: github.event_name == 'push' || github.event_name == 'pull_request'
steps:
- name: Set target branch
if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref_type == 'tag')
id: target-branch
run: |
set -x
if test '${{ github.ref }}' = 'refs/heads/main'; then
echo "value=asf-staging" >> "$GITHUB_OUTPUT"
elif test '${{ github.ref_type }}' = 'tag'; then
echo "value=asf-site" >> "$GITHUB_OUTPUT"
else
echo "Unsupported input: ${{ github.ref }} / ${{ github.ref_type }}"
exit 1
fi
- name: Checkout docs sources
uses: actions/checkout@v5
- name: Checkout docs target branch
if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref_type == 'tag')
uses: actions/checkout@v5
with:
fetch-depth: 0
ref: ${{ steps.target-branch.outputs.value }}
path: docs-target
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install dependencies
uses: astral-sh/setup-uv@v6
with:
enable-cache: true
# Download the Linux wheel built in the previous job
- name: Download pre-built Linux wheel
uses: actions/download-artifact@v5
with:
name: dist-manylinux-x86_64
path: wheels/
# Install from the pre-built wheel
- name: Install from pre-built wheel
run: |
set -x
uv venv
# Install documentation dependencies
uv sync --dev --no-install-package datafusion --group docs
# Install the pre-built wheel
WHEEL=$(find wheels/ -name "*.whl" | head -1)
if [ -n "$WHEEL" ]; then
echo "Installing wheel: $WHEEL"
uv pip install "$WHEEL"
else
echo "ERROR: No wheel found!"
exit 1
fi
- name: Build docs
run: |
set -x
cd docs
curl -O https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv
curl -O https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2021-01.parquet
uv run --no-project make html
- name: Copy & push the generated HTML
if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref_type == 'tag')
run: |
set -x
cd docs-target
# delete anything but: 1) '.'; 2) '..'; 3) .git/
find ./ | grep -vE "^./$|^../$|^./.git" | xargs rm -rf
cp ../.asf.yaml .
cp -r ../docs/build/html/* .
git status --porcelain
if [ "$(git status --porcelain)" != "" ]; then
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add --all
git commit -m 'Publish built docs triggered by ${{ github.sha }}'
git push || git push --force
fi
# NOTE: PyPI publish needs to be done manually for now after release passed the vote
# release:
# name: Publish in PyPI
# needs: [build-manylinux, build-python-mac-win]
# runs-on: ubuntu-latest
# steps:
# - uses: actions/download-artifact@v5
# - name: Publish to PyPI
# uses: pypa/gh-action-pypi-publish@master
# with:
# user: __token__
# password: ${{ secrets.pypi_password }}