diff --git a/.github/workflows/alpine-test.yml b/.github/workflows/alpine-test.yml index 2c1eed391..ceba11fb8 100644 --- a/.github/workflows/alpine-test.yml +++ b/.github/workflows/alpine-test.yml @@ -2,8 +2,11 @@ name: test-alpine on: [push, pull_request, workflow_dispatch] +permissions: + contents: read + jobs: - build: + test: runs-on: ubuntu-latest container: @@ -16,10 +19,10 @@ jobs: steps: - name: Prepare Alpine Linux run: | - apk add sudo git git-daemon python3 py3-pip + apk add sudo git git-daemon python3 py3-pip py3-virtualenv echo 'Defaults env_keep += "CI GITHUB_* RUNNER_*"' >/etc/sudoers.d/ci_env addgroup -g 127 docker - adduser -D -u 1001 runner + adduser -D -u 1001 runner # TODO: Check if this still works on GHA as intended. adduser runner docker shell: sh -exo pipefail {0} # Run this as root, not the "runner" user. @@ -44,23 +47,23 @@ jobs: # and cause subsequent tests to fail cat test/fixtures/.gitconfig >> ~/.gitconfig - - name: Set up virtualenv + - name: Set up virtual environment run: | python -m venv .venv - . .venv/bin/activate - printf '%s=%s\n' 'PATH' "$PATH" 'VIRTUAL_ENV' "$VIRTUAL_ENV" >>"$GITHUB_ENV" - name: Update PyPA packages run: | - # Get the latest pip, wheel, and prior to Python 3.12, setuptools. - python -m pip install -U pip $(pip freeze --all | grep -ow ^setuptools) wheel + . .venv/bin/activate + python -m pip install -U pip 'setuptools; python_version<"3.12"' wheel - name: Install project and test dependencies run: | - pip install ".[test]" + . .venv/bin/activate + pip install '.[test]' - name: Show version and platform information run: | + . .venv/bin/activate uname -a command -v git python git version @@ -69,4 +72,5 @@ jobs: - name: Test with pytest run: | + . .venv/bin/activate pytest --color=yes -p no:sugar --instafail -vv diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index ae5241898..2bee952af 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -19,38 +19,48 @@ on: jobs: analyze: - name: Analyze + name: Analyze (${{ matrix.language }}) # Runner size impacts CodeQL analysis time. To learn more, please see: # - https://gh.io/recommended-hardware-resources-for-running-codeql # - https://gh.io/supported-runners-and-hardware-resources - # - https://gh.io/using-larger-runners - # Consider using larger runners for possible analysis time improvements. + # - https://gh.io/using-larger-runners (GitHub.com only) + # Consider using larger runners or machines with greater resources for possible analysis time improvements. runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} - timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} permissions: - actions: read - contents: read security-events: write strategy: fail-fast: false matrix: - language: [ 'python' ] - # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] - # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both - # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both - # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support - + include: + - language: actions + build-mode: none + - language: python + build-mode: none + # CodeQL supports the following values keywords for 'language': 'actions', 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' + # Use `c-cpp` to analyze code written in C, C++ or both + # Use 'java-kotlin' to analyze code written in Java, Kotlin or both + # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both + # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, + # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. + # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how + # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages steps: - name: Checkout repository uses: actions/checkout@v4 + # Add any setup steps before running the `github/codeql-action/init` action. + # This includes steps like installing compilers or runtimes (`actions/setup-node` + # or others). This is typically only required for manual builds. + # - name: Setup runtime (example) + # uses: actions/setup-example@v1 + # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} - setup-python-dependencies: false + build-mode: ${{ matrix.build-mode }} # If you wish to specify custom queries, you can do so here or in a config file. # By default, queries listed here will override any specified in a config file. # Prefix the list here with "+" to use these queries and those in the config file. @@ -58,21 +68,21 @@ jobs: # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs # queries: security-extended,security-and-quality - - # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). - # If this step fails, then you should remove it and run the build manually (see below) - - name: Autobuild - uses: github/codeql-action/autobuild@v3 - + # If the analyze step fails for one of the languages you are analyzing with + # "We were unable to automatically build your code", modify the matrix above + # to set the build mode to "manual" for that language. Then modify this step + # to build your code. # ℹ️ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun - - # If the Autobuild fails above, remove it and uncomment the following three lines. - # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. - - # - run: | - # echo "Run, Build Application using script" - # ./location_of_script_within_repo/buildscript.sh + - if: matrix.build-mode == 'manual' + shell: bash + run: | + echo 'If you are using a "manual" build mode for one or more of the' \ + 'languages you are analyzing, replace this with the commands to build' \ + 'your code, for example:' + echo ' make bootstrap' + echo ' make release' + exit 1 - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v3 diff --git a/.github/workflows/cygwin-test.yml b/.github/workflows/cygwin-test.yml index bde4ea659..cc9e1edf0 100644 --- a/.github/workflows/cygwin-test.yml +++ b/.github/workflows/cygwin-test.yml @@ -2,35 +2,47 @@ name: test-cygwin on: [push, pull_request, workflow_dispatch] -jobs: - build: - runs-on: windows-latest +permissions: + contents: read +jobs: + test: strategy: + matrix: + selection: [fast, perf] + include: + - selection: fast + additional-pytest-args: --ignore=test/performance + - selection: perf + additional-pytest-args: test/performance + fail-fast: false + runs-on: windows-latest + env: CHERE_INVOKING: "1" CYGWIN_NOWINPATH: "1" defaults: run: - shell: C:\tools\cygwin\bin\bash.exe --login --norc -eo pipefail -o igncr "{0}" + shell: C:\cygwin\bin\bash.exe --login --norc -eo pipefail -o igncr "{0}" steps: - name: Force LF line endings run: | git config --global core.autocrlf false # Affects the non-Cygwin git. - shell: bash # Use Git Bash instead of Cygwin Bash for this step. + shell: pwsh # Do this outside Cygwin, to affect actions/checkout. - uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Set up Cygwin - uses: egor-tensin/setup-cygwin@v4 + - name: Install Cygwin + uses: cygwin/cygwin-install-action@v6 with: - packages: python39=3.9.16-1 python39-pip python39-virtualenv git + packages: git python39 python-pip-wheel python-setuptools-wheel python-wheel-wheel + add-to-path: false # No need to change $PATH outside the Cygwin environment. - name: Arrange for verbose output run: | @@ -55,19 +67,18 @@ jobs: # and cause subsequent tests to fail cat test/fixtures/.gitconfig >> ~/.gitconfig - - name: Ensure the "pip" command is available + - name: Set up virtual environment run: | - # This is used unless, and before, an updated pip is installed. - ln -s pip3 /usr/bin/pip + python3.9 -m venv .venv + echo 'BASH_ENV=.venv/bin/activate' >>"$GITHUB_ENV" - name: Update PyPA packages run: | - # Get the latest pip, wheel, and prior to Python 3.12, setuptools. - python -m pip install -U pip $(pip freeze --all | grep -ow ^setuptools) wheel + python -m pip install -U pip 'setuptools; python_version<"3.12"' wheel - name: Install project and test dependencies run: | - pip install ".[test]" + pip install '.[test]' - name: Show version and platform information run: | @@ -77,6 +88,6 @@ jobs: python --version python -c 'import os, sys; print(f"sys.platform={sys.platform!r}, os.name={os.name!r}")' - - name: Test with pytest + - name: Test with pytest (${{ matrix.additional-pytest-args }}) run: | - pytest --color=yes -p no:sugar --instafail -vv + pytest --color=yes -p no:sugar --instafail -vv ${{ matrix.additional-pytest-args }} diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index a0e81a993..ceba0dd85 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -2,6 +2,9 @@ name: Lint on: [push, pull_request, workflow_dispatch] +permissions: + contents: read + jobs: lint: runs-on: ubuntu-latest diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 031b0e6b2..4457a341f 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -9,23 +9,30 @@ permissions: contents: read jobs: - build: + test: strategy: - fail-fast: false matrix: - os: ["ubuntu-latest", "macos-13", "macos-14", "windows-latest"] - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + os-type: [ubuntu, macos, windows] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.13t"] exclude: - - os: "macos-14" - python-version: "3.7" - - os: "macos-14" - python-version: "3.8" - - os: "macos-14" - python-version: "3.9" + - os-type: macos + python-version: "3.7" # Not available for the ARM-based macOS runners. + - os-type: macos + python-version: "3.13t" + - os-type: windows + python-version: "3.13" # FIXME: Fix and enable Python 3.13 on Windows (#1955). + - os-type: windows + python-version: "3.13t" include: + - os-ver: latest + - os-type: ubuntu + python-version: "3.7" + os-ver: "22.04" - experimental: false - runs-on: ${{ matrix.os }} + fail-fast: false + + runs-on: ${{ matrix.os-type }}-${{ matrix.os-ver }} defaults: run: @@ -43,10 +50,12 @@ jobs: allow-prereleases: ${{ matrix.experimental }} - name: Set up WSL (Windows) - if: startsWith(matrix.os, 'windows') - uses: Vampire/setup-wsl@v3.1.1 + if: matrix.os-type == 'windows' + uses: Vampire/setup-wsl@v6.0.0 with: - distribution: Debian + wsl-version: 1 + distribution: Alpine + additional-packages: bash - name: Prepare this repo for tests run: | @@ -62,12 +71,11 @@ jobs: - name: Update PyPA packages run: | - # Get the latest pip, wheel, and prior to Python 3.12, setuptools. - python -m pip install -U pip $(pip freeze --all | grep -ow ^setuptools) wheel + python -m pip install -U pip 'setuptools; python_version<"3.12"' wheel - name: Install project and test dependencies run: | - pip install ".[test]" + pip install '.[test]' - name: Show version and platform information run: | @@ -79,7 +87,7 @@ jobs: # For debugging hook tests on native Windows systems that may have WSL. - name: Show bash.exe candidates (Windows) - if: startsWith(matrix.os, 'windows') + if: matrix.os-type == 'windows' run: | set +e bash.exe -c 'printenv WSL_DISTRO_NAME; uname -a' @@ -88,10 +96,11 @@ jobs: - name: Check types with mypy run: | - mypy --python-version=${{ matrix.python-version }} + mypy --python-version="${PYTHON_VERSION%t}" # Version only, with no "t" for free-threaded. env: MYPY_FORCE_COLOR: "1" TERM: "xterm-256color" # For color: https://github.com/python/mypy/issues/13817 + PYTHON_VERSION: ${{ matrix.python-version }} # With new versions of mypy new issues might arise. This is a problem if there is # nobody able to fix them, so we have to ignore errors until that changes. continue-on-error: true @@ -102,6 +111,7 @@ jobs: continue-on-error: false - name: Documentation + if: matrix.python-version != '3.7' run: | - pip install ".[doc]" + pip install '.[doc]' make -C doc html diff --git a/.gitignore b/.gitignore index d85569405..eab294a65 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,8 @@ __pycache__/ # Transient editor files *.swp *~ +\#*# +.#*# # Editor configuration nbproject diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 686d5fa1e..737b56d45 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,28 +1,38 @@ repos: +- repo: https://github.com/codespell-project/codespell + rev: v2.4.1 + hooks: + - id: codespell + additional_dependencies: [tomli] + exclude: ^test/fixtures/ + - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.3 + rev: v0.11.12 hooks: - - id: ruff + - id: ruff-check args: ["--fix"] exclude: ^git/ext/ - id: ruff-format exclude: ^git/ext/ - repo: https://github.com/shellcheck-py/shellcheck-py - rev: v0.9.0.6 + rev: v0.10.0.1 hooks: - id: shellcheck args: [--color] exclude: ^test/fixtures/polyglot$|^git/ext/ - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v5.0.0 hooks: + - id: end-of-file-fixer + exclude: ^test/fixtures/|COPYING|LICENSE + - id: check-symlinks - id: check-toml - id: check-yaml - id: check-merge-conflict - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.16 + rev: v0.24.1 hooks: - - id: validate-pyproject + - id: validate-pyproject diff --git a/AUTHORS b/AUTHORS index 9311b3962..b57113edd 100644 --- a/AUTHORS +++ b/AUTHORS @@ -54,5 +54,7 @@ Contributors are: -Wenhan Zhu -Eliah Kagan -Ethan Lin +-Jonas Scharpf +-Gordon Marx Portions derived from other open source works and are clearly marked. diff --git a/README.md b/README.md index d365a6584..59c6f995b 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,7 @@ In the less common case that you do not want to install test dependencies, `pip #### With editable *dependencies* (not preferred, and rarely needed) -In rare cases, you may want to work on GitPython and one or both of its [gitdb](https://github.com/gitpython-developers/gitdb) and [smmap](https://github.com/gitpython-developers/smmap) dependencies at the same time, with changes in your local working copy of gitdb or smmap immediatley reflected in the behavior of your local working copy of GitPython. This can be done by making editable installations of those dependencies in the same virtual environment where you install GitPython. +In rare cases, you may want to work on GitPython and one or both of its [gitdb](https://github.com/gitpython-developers/gitdb) and [smmap](https://github.com/gitpython-developers/smmap) dependencies at the same time, with changes in your local working copy of gitdb or smmap immediately reflected in the behavior of your local working copy of GitPython. This can be done by making editable installations of those dependencies in the same virtual environment where you install GitPython. If you want to do that *and* you want the versions in GitPython's git submodules to be used, then pass `-e git/ext/gitdb` and/or `-e git/ext/gitdb/gitdb/ext/smmap` to `pip install`. This can be done in any order, and in separate `pip install` commands or the same one, so long as `-e` appears before *each* path. For example, you can install GitPython, gitdb, and smmap editably in the currently active virtual environment this way: diff --git a/SECURITY.md b/SECURITY.md index d39425b70..0aea34845 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -11,4 +11,6 @@ Only the latest version of GitPython can receive security updates. If a vulnerab ## Reporting a Vulnerability -Please report private portions of a vulnerability to . Doing so helps to receive updates and collaborate on the matter, without disclosing it publicliy right away. +Please report private portions of a vulnerability to . Doing so helps to receive updates and collaborate on the matter, without disclosing it publicly right away. + +Vulnerabilities in GitPython's dependencies [gitdb](https://github.com/gitpython-developers/gitdb/blob/master/SECURITY.md) or [smmap](https://github.com/gitpython-developers/smmap/blob/master/SECURITY.md), which primarily exist to support GitPython, can be reported here as well, at that same link. The affected package (`GitPython`, `gitdb`, or `smmap`) can be included in the report, if known. diff --git a/VERSION b/VERSION index d1bf6638d..e6af1c454 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.43 +3.1.44 diff --git a/doc/requirements.txt b/doc/requirements.txt index 7769af5ae..81140d898 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,8 +1,3 @@ -sphinx == 4.3.2 +sphinx >= 7.1.2, < 7.2 sphinx_rtd_theme -sphinxcontrib-applehelp >= 1.0.2, <= 1.0.4 -sphinxcontrib-devhelp == 1.0.2 -sphinxcontrib-htmlhelp >= 2.0.0, <= 2.0.1 -sphinxcontrib-qthelp == 1.0.3 -sphinxcontrib-serializinghtml == 1.1.5 sphinx-autodoc-typehints diff --git a/doc/source/changes.rst b/doc/source/changes.rst index 0bc757134..00a3c660e 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -2,6 +2,12 @@ Changelog ========= +3.1.44 +====== + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.44 + 3.1.43 ====== @@ -20,7 +26,7 @@ https://github.com/gitpython-developers/GitPython/releases/tag/3.1.42 3.1.41 ====== -This release is relevant for security as it fixes a possible arbitary +This release is relevant for security as it fixes a possible arbitrary code execution on Windows. See this PR for details: https://github.com/gitpython-developers/GitPython/pull/1792 diff --git a/doc/source/index.rst b/doc/source/index.rst index 72db8ee5a..ca5229ac3 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -21,4 +21,3 @@ Indices and tables * :ref:`genindex` * :ref:`modindex` * :ref:`search` - diff --git a/doc/source/intro.rst b/doc/source/intro.rst index 4f22a0942..d053bd117 100644 --- a/doc/source/intro.rst +++ b/doc/source/intro.rst @@ -122,4 +122,3 @@ License Information =================== GitPython is licensed under the New BSD License. See the LICENSE file for more information. - diff --git a/doc/source/roadmap.rst b/doc/source/roadmap.rst index a573df33a..34c953626 100644 --- a/doc/source/roadmap.rst +++ b/doc/source/roadmap.rst @@ -6,4 +6,3 @@ The full list of milestones including associated tasks can be found on GitHub: https://github.com/gitpython-developers/GitPython/issues Select the respective milestone to filter the list of issues accordingly. - diff --git a/fuzzing/fuzz-targets/fuzz_submodule.py b/fuzzing/fuzz-targets/fuzz_submodule.py index ca47690ea..afa653d0d 100644 --- a/fuzzing/fuzz-targets/fuzz_submodule.py +++ b/fuzzing/fuzz-targets/fuzz_submodule.py @@ -3,24 +3,21 @@ import os import tempfile from configparser import ParsingError -from utils import is_expected_exception_message, get_max_filename_length - -if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover - path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git")) - os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary - +from utils import ( + setup_git_environment, + handle_exception, + get_max_filename_length, +) + +# Setup the Git environment +setup_git_environment() from git import Repo, GitCommandError, InvalidGitRepositoryError -if not sys.warnoptions: # pragma: no cover - # The warnings filter below can be overridden by passing the -W option - # to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable. - import warnings - import logging - # Fuzzing data causes some modules to generate a large number of warnings - # which are not usually interesting and make the test output hard to read, so we ignore them. - warnings.simplefilter("ignore") - logging.getLogger().setLevel(logging.ERROR) +def sanitize_input(input_str, max_length=255): + """Sanitize and truncate inputs to avoid invalid Git operations.""" + sanitized = "".join(ch for ch in input_str if ch.isalnum() or ch in ("-", "_", ".")) + return sanitized[:max_length] def TestOneInput(data): @@ -33,39 +30,64 @@ def TestOneInput(data): try: with tempfile.TemporaryDirectory() as submodule_temp_dir: sub_repo = Repo.init(submodule_temp_dir, bare=fdp.ConsumeBool()) - sub_repo.index.commit(fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512))) + commit_message = sanitize_input(fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512))) + sub_repo.index.commit(commit_message) + + submodule_name = sanitize_input( + fdp.ConsumeUnicodeNoSurrogates( + fdp.ConsumeIntInRange(1, get_max_filename_length(repo.working_tree_dir)) + ) + ) + + submodule_path = os.path.relpath( + os.path.join(repo.working_tree_dir, submodule_name), + start=repo.working_tree_dir, + ) - submodule_name = f"submodule_{fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512))}" - submodule_path = os.path.join(repo.working_tree_dir, submodule_name) - submodule_url = sub_repo.git_dir + # Ensure submodule_path is valid + if not submodule_name or submodule_name.startswith("/") or ".." in submodule_name: + return -1 # Reject invalid input so they are not added to the corpus - submodule = repo.create_submodule(submodule_name, submodule_path, url=submodule_url) - repo.index.commit(f"Added submodule {submodule_name}") + submodule = repo.create_submodule(submodule_name, submodule_path, url=sub_repo.git_dir) + repo.index.commit("Added submodule") with submodule.config_writer() as writer: key_length = fdp.ConsumeIntInRange(1, max(1, fdp.remaining_bytes())) value_length = fdp.ConsumeIntInRange(1, max(1, fdp.remaining_bytes())) writer.set_value( - fdp.ConsumeUnicodeNoSurrogates(key_length), fdp.ConsumeUnicodeNoSurrogates(value_length) + sanitize_input(fdp.ConsumeUnicodeNoSurrogates(key_length)), + sanitize_input(fdp.ConsumeUnicodeNoSurrogates(value_length)), ) writer.release() - submodule.update(init=fdp.ConsumeBool(), dry_run=fdp.ConsumeBool(), force=fdp.ConsumeBool()) + submodule.update( + init=fdp.ConsumeBool(), + dry_run=fdp.ConsumeBool(), + force=fdp.ConsumeBool(), + ) + submodule_repo = submodule.module() - new_file_name = fdp.ConsumeUnicodeNoSurrogates( - fdp.ConsumeIntInRange(1, max(1, get_max_filename_length(submodule_repo.working_tree_dir))) + new_file_name = sanitize_input( + fdp.ConsumeUnicodeNoSurrogates( + fdp.ConsumeIntInRange(1, get_max_filename_length(submodule_repo.working_tree_dir)) + ) ) new_file_path = os.path.join(submodule_repo.working_tree_dir, new_file_name) with open(new_file_path, "wb") as new_file: new_file.write(fdp.ConsumeBytes(fdp.ConsumeIntInRange(1, 512))) + submodule_repo.index.add([new_file_path]) submodule_repo.index.commit("Added new file to submodule") repo.submodule_update(recursive=fdp.ConsumeBool()) - submodule_repo.head.reset(commit="HEAD~1", working_tree=fdp.ConsumeBool(), head=fdp.ConsumeBool()) - # Use fdp.PickValueInList to ensure at least one of 'module' or 'configuration' is True + submodule_repo.head.reset( + commit="HEAD~1", + working_tree=fdp.ConsumeBool(), + head=fdp.ConsumeBool(), + ) + module_option_value, configuration_option_value = fdp.PickValueInList( [(True, False), (False, True), (True, True)] ) @@ -86,20 +108,11 @@ def TestOneInput(data): IsADirectoryError, NotADirectoryError, BrokenPipeError, + PermissionError, ): return -1 - except ValueError as e: - expected_messages = [ - "SHA is empty", - "Reference at", - "embedded null byte", - "This submodule instance does not exist anymore", - "cmd stdin was empty", - ] - if is_expected_exception_message(e, expected_messages): - return -1 - else: - raise e + except Exception as e: + return handle_exception(e) def main(): diff --git a/fuzzing/fuzz-targets/utils.py b/fuzzing/fuzz-targets/utils.py index f522d2959..97e6eab98 100644 --- a/fuzzing/fuzz-targets/utils.py +++ b/fuzzing/fuzz-targets/utils.py @@ -1,6 +1,9 @@ import atheris # pragma: no cover import os # pragma: no cover -from typing import List # pragma: no cover +import re # pragma: no cover +import traceback # pragma: no cover +import sys # pragma: no cover +from typing import Set, Tuple, List # pragma: no cover @atheris.instrument_func @@ -35,3 +38,85 @@ def get_max_filename_length(path: str) -> int: # pragma: no cover int: The maximum filename length. """ return os.pathconf(path, "PC_NAME_MAX") + + +@atheris.instrument_func +def read_lines_from_file(file_path: str) -> list: + """Read lines from a file and return them as a list.""" + try: + with open(file_path, "r") as f: + return [line.strip() for line in f if line.strip()] + except FileNotFoundError: + print(f"File not found: {file_path}") + return [] + except IOError as e: + print(f"Error reading file {file_path}: {e}") + return [] + + +@atheris.instrument_func +def load_exception_list(file_path: str = "explicit-exceptions-list.txt") -> Set[Tuple[str, str]]: + """Load and parse the exception list from a default or specified file.""" + try: + bundle_dir = os.path.dirname(os.path.abspath(__file__)) + full_path = os.path.join(bundle_dir, file_path) + lines = read_lines_from_file(full_path) + exception_list: Set[Tuple[str, str]] = set() + for line in lines: + match = re.match(r"(.+):(\d+):", line) + if match: + file_path: str = match.group(1).strip() + line_number: str = str(match.group(2).strip()) + exception_list.add((file_path, line_number)) + return exception_list + except Exception as e: + print(f"Error loading exception list: {e}") + return set() + + +@atheris.instrument_func +def match_exception_with_traceback(exception_list: Set[Tuple[str, str]], exc_traceback) -> bool: + """Match exception traceback with the entries in the exception list.""" + for filename, lineno, _, _ in traceback.extract_tb(exc_traceback): + for file_pattern, line_pattern in exception_list: + # Ensure filename and line_number are strings for regex matching + if re.fullmatch(file_pattern, filename) and re.fullmatch(line_pattern, str(lineno)): + return True + return False + + +@atheris.instrument_func +def check_exception_against_list(exc_traceback, exception_file: str = "explicit-exceptions-list.txt") -> bool: + """Check if the exception traceback matches any entry in the exception list.""" + exception_list = load_exception_list(exception_file) + return match_exception_with_traceback(exception_list, exc_traceback) + + +@atheris.instrument_func +def handle_exception(e: Exception) -> int: + """Encapsulate exception handling logic for reusability.""" + exc_traceback = e.__traceback__ + if check_exception_against_list(exc_traceback): + return -1 + else: + raise e + + +@atheris.instrument_func +def setup_git_environment() -> None: + """Set up the environment variables for Git.""" + bundle_dir = os.path.dirname(os.path.abspath(__file__)) + if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover + bundled_git_binary_path = os.path.join(bundle_dir, "git") + os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = bundled_git_binary_path + + if not sys.warnoptions: # pragma: no cover + # The warnings filter below can be overridden by passing the -W option + # to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable. + import warnings + import logging + + # Fuzzing data causes some modules to generate a large number of warnings + # which are not usually interesting and make the test output hard to read, so we ignore them. + warnings.simplefilter("ignore") + logging.getLogger().setLevel(logging.ERROR) diff --git a/fuzzing/oss-fuzz-scripts/build.sh b/fuzzing/oss-fuzz-scripts/build.sh index e0b3a50ab..c156e872d 100644 --- a/fuzzing/oss-fuzz-scripts/build.sh +++ b/fuzzing/oss-fuzz-scripts/build.sh @@ -15,5 +15,5 @@ find "$SRC" -maxdepth 1 \ # Build fuzzers in $OUT. find "$SRC/gitpython/fuzzing" -name 'fuzz_*.py' -print0 | while IFS= read -r -d '' fuzz_harness; do - compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):." + compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):." --add-data="$SRC/explicit-exceptions-list.txt:." done diff --git a/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh index bbdcf5357..924a3cbf3 100755 --- a/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh +++ b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh @@ -91,6 +91,17 @@ create_seed_corpora_zips "$WORK/qa-assets/gitpython/corpora" prepare_dictionaries_for_fuzz_targets "$WORK/qa-assets/gitpython/dictionaries" "$SRC/gitpython/fuzzing" +pushd "$SRC/gitpython/" +# Search for 'raise' and 'assert' statements in Python files within GitPython's source code and submodules, saving the +# matched file path, line number, and line content to a file named 'explicit-exceptions-list.txt'. +# This file can then be used by fuzz harnesses to check exception tracebacks and filter out explicitly raised or otherwise +# anticipated exceptions to reduce false positive test failures. + +git grep -n --recurse-submodules -e '\braise\b' -e '\bassert\b' -- '*.py' -- ':!setup.py' -- ':!test/**' -- ':!fuzzing/**' > "$SRC/explicit-exceptions-list.txt" + +popd + + # The OSS-Fuzz base image has outdated dependencies by default so we upgrade them below. python3 -m pip install --upgrade pip # Upgrade to the latest versions known to work at the time the below changes were introduced: diff --git a/git/cmd.py b/git/cmd.py index 90fc39cd6..15d7820df 100644 --- a/git/cmd.py +++ b/git/cmd.py @@ -60,6 +60,11 @@ overload, ) +if sys.version_info >= (3, 10): + from typing import TypeAlias +else: + from typing_extensions import TypeAlias + from git.types import Literal, PathLike, TBD if TYPE_CHECKING: @@ -207,7 +212,7 @@ def pump_stream( ) if stderr_handler: error_str: Union[str, bytes] = ( - "error: process killed because it timed out." f" kill_after_timeout={kill_after_timeout} seconds" + f"error: process killed because it timed out. kill_after_timeout={kill_after_timeout} seconds" ) if not decode_streams and isinstance(p_stderr, BinaryIO): # Assume stderr_handler needs binary input. @@ -268,12 +273,12 @@ def _safer_popen_windows( if shell: # The original may be immutable, or the caller may reuse it. Mutate a copy. env = {} if env is None else dict(env) - env["NoDefaultCurrentDirectoryInExePath"] = "1" # The "1" can be an value. + env["NoDefaultCurrentDirectoryInExePath"] = "1" # The "1" can be any value. # When not using a shell, the current process does the search in a # CreateProcessW API call, so the variable must be set in our environment. With # a shell, that's unnecessary if https://github.com/python/cpython/issues/101283 - # is patched. In Python versions where it is unpatched, and in the rare case the + # is patched. In Python versions where it is unpatched, in the rare case the # ComSpec environment variable is unset, the search for the shell itself is # unsafe. Setting NoDefaultCurrentDirectoryInExePath in all cases, as done here, # is simpler and protects against that. (As above, the "1" can be any value.) @@ -308,6 +313,230 @@ def dict_to_slots_and__excluded_are_none(self: object, d: Mapping[str, Any], exc ## -- End Utilities -- @} + +class _AutoInterrupt: + """Process wrapper that terminates the wrapped process on finalization. + + This kills/interrupts the stored process instance once this instance goes out of + scope. It is used to prevent processes piling up in case iterators stop reading. + + All attributes are wired through to the contained process object. + + The wait method is overridden to perform automatic status code checking and possibly + raise. + """ + + __slots__ = ("proc", "args", "status") + + # If this is non-zero it will override any status code during _terminate, used + # to prevent race conditions in testing. + _status_code_if_terminate: int = 0 + + def __init__(self, proc: Union[None, subprocess.Popen], args: Any) -> None: + self.proc = proc + self.args = args + self.status: Union[int, None] = None + + def _terminate(self) -> None: + """Terminate the underlying process.""" + if self.proc is None: + return + + proc = self.proc + self.proc = None + if proc.stdin: + proc.stdin.close() + if proc.stdout: + proc.stdout.close() + if proc.stderr: + proc.stderr.close() + # Did the process finish already so we have a return code? + try: + if proc.poll() is not None: + self.status = self._status_code_if_terminate or proc.poll() + return + except OSError as ex: + _logger.info("Ignored error after process had died: %r", ex) + + # It can be that nothing really exists anymore... + if os is None or getattr(os, "kill", None) is None: + return + + # Try to kill it. + try: + proc.terminate() + status = proc.wait() # Ensure the process goes away. + + self.status = self._status_code_if_terminate or status + except OSError as ex: + _logger.info("Ignored error after process had died: %r", ex) + # END exception handling + + def __del__(self) -> None: + self._terminate() + + def __getattr__(self, attr: str) -> Any: + return getattr(self.proc, attr) + + # TODO: Bad choice to mimic `proc.wait()` but with different args. + def wait(self, stderr: Union[None, str, bytes] = b"") -> int: + """Wait for the process and return its status code. + + :param stderr: + Previously read value of stderr, in case stderr is already closed. + + :warn: + May deadlock if output or error pipes are used and not handled separately. + + :raise git.exc.GitCommandError: + If the return status is not 0. + """ + if stderr is None: + stderr_b = b"" + stderr_b = force_bytes(data=stderr, encoding="utf-8") + status: Union[int, None] + if self.proc is not None: + status = self.proc.wait() + p_stderr = self.proc.stderr + else: # Assume the underlying proc was killed earlier or never existed. + status = self.status + p_stderr = None + + def read_all_from_possibly_closed_stream(stream: Union[IO[bytes], None]) -> bytes: + if stream: + try: + return stderr_b + force_bytes(stream.read()) + except (OSError, ValueError): + return stderr_b or b"" + else: + return stderr_b or b"" + + # END status handling + + if status != 0: + errstr = read_all_from_possibly_closed_stream(p_stderr) + _logger.debug("AutoInterrupt wait stderr: %r" % (errstr,)) + raise GitCommandError(remove_password_if_present(self.args), status, errstr) + return status + + +_AutoInterrupt.__name__ = "AutoInterrupt" +_AutoInterrupt.__qualname__ = "Git.AutoInterrupt" + + +class _CatFileContentStream: + """Object representing a sized read-only stream returning the contents of + an object. + + This behaves like a stream, but counts the data read and simulates an empty stream + once our sized content region is empty. + + If not all data are read to the end of the object's lifetime, we read the rest to + ensure the underlying stream continues to work. + """ + + __slots__ = ("_stream", "_nbr", "_size") + + def __init__(self, size: int, stream: IO[bytes]) -> None: + self._stream = stream + self._size = size + self._nbr = 0 # Number of bytes read. + + # Special case: If the object is empty, has null bytes, get the final + # newline right away. + if size == 0: + stream.read(1) + # END handle empty streams + + def read(self, size: int = -1) -> bytes: + bytes_left = self._size - self._nbr + if bytes_left == 0: + return b"" + if size > -1: + # Ensure we don't try to read past our limit. + size = min(bytes_left, size) + else: + # They try to read all, make sure it's not more than what remains. + size = bytes_left + # END check early depletion + data = self._stream.read(size) + self._nbr += len(data) + + # Check for depletion, read our final byte to make the stream usable by + # others. + if self._size - self._nbr == 0: + self._stream.read(1) # final newline + # END finish reading + return data + + def readline(self, size: int = -1) -> bytes: + if self._nbr == self._size: + return b"" + + # Clamp size to lowest allowed value. + bytes_left = self._size - self._nbr + if size > -1: + size = min(bytes_left, size) + else: + size = bytes_left + # END handle size + + data = self._stream.readline(size) + self._nbr += len(data) + + # Handle final byte. + if self._size - self._nbr == 0: + self._stream.read(1) + # END finish reading + + return data + + def readlines(self, size: int = -1) -> List[bytes]: + if self._nbr == self._size: + return [] + + # Leave all additional logic to our readline method, we just check the size. + out = [] + nbr = 0 + while True: + line = self.readline() + if not line: + break + out.append(line) + if size > -1: + nbr += len(line) + if nbr > size: + break + # END handle size constraint + # END readline loop + return out + + # skipcq: PYL-E0301 + def __iter__(self) -> "Git.CatFileContentStream": + return self + + def __next__(self) -> bytes: + line = self.readline() + if not line: + raise StopIteration + + return line + + next = __next__ + + def __del__(self) -> None: + bytes_left = self._size - self._nbr + if bytes_left: + # Read and discard - seeking is impossible within a stream. + # This includes any terminating newline. + self._stream.read(bytes_left + 1) + # END handle incomplete read + + +_CatFileContentStream.__name__ = "CatFileContentStream" +_CatFileContentStream.__qualname__ = "Git.CatFileContentStream" + + _USE_SHELL_DEFAULT_MESSAGE = ( "Git.USE_SHELL is deprecated, because only its default value of False is safe. " "It will be removed in a future release." @@ -321,7 +550,7 @@ def dict_to_slots_and__excluded_are_none(self: object, d: Mapping[str, Any], exc ) -def _warn_use_shell(extra_danger: bool) -> None: +def _warn_use_shell(*, extra_danger: bool) -> None: warnings.warn( _USE_SHELL_DANGER_MESSAGE if extra_danger else _USE_SHELL_DEFAULT_MESSAGE, DeprecationWarning, @@ -337,12 +566,12 @@ class _GitMeta(type): def __getattribute(cls, name: str) -> Any: if name == "USE_SHELL": - _warn_use_shell(False) + _warn_use_shell(extra_danger=False) return super().__getattribute__(name) def __setattr(cls, name: str, value: Any) -> Any: if name == "USE_SHELL": - _warn_use_shell(value) + _warn_use_shell(extra_danger=value) super().__setattr__(name, value) if not TYPE_CHECKING: @@ -728,221 +957,9 @@ def check_unsafe_options(cls, options: List[str], unsafe_options: List[str]) -> f"{unsafe_option} is not allowed, use `allow_unsafe_options=True` to allow it." ) - class AutoInterrupt: - """Process wrapper that terminates the wrapped process on finalization. - - This kills/interrupts the stored process instance once this instance goes out of - scope. It is used to prevent processes piling up in case iterators stop reading. - - All attributes are wired through to the contained process object. - - The wait method is overridden to perform automatic status code checking and - possibly raise. - """ - - __slots__ = ("proc", "args", "status") - - # If this is non-zero it will override any status code during _terminate, used - # to prevent race conditions in testing. - _status_code_if_terminate: int = 0 - - def __init__(self, proc: Union[None, subprocess.Popen], args: Any) -> None: - self.proc = proc - self.args = args - self.status: Union[int, None] = None - - def _terminate(self) -> None: - """Terminate the underlying process.""" - if self.proc is None: - return - - proc = self.proc - self.proc = None - if proc.stdin: - proc.stdin.close() - if proc.stdout: - proc.stdout.close() - if proc.stderr: - proc.stderr.close() - # Did the process finish already so we have a return code? - try: - if proc.poll() is not None: - self.status = self._status_code_if_terminate or proc.poll() - return - except OSError as ex: - _logger.info("Ignored error after process had died: %r", ex) - - # It can be that nothing really exists anymore... - if os is None or getattr(os, "kill", None) is None: - return - - # Try to kill it. - try: - proc.terminate() - status = proc.wait() # Ensure the process goes away. - - self.status = self._status_code_if_terminate or status - except OSError as ex: - _logger.info("Ignored error after process had died: %r", ex) - # END exception handling - - def __del__(self) -> None: - self._terminate() - - def __getattr__(self, attr: str) -> Any: - return getattr(self.proc, attr) - - # TODO: Bad choice to mimic `proc.wait()` but with different args. - def wait(self, stderr: Union[None, str, bytes] = b"") -> int: - """Wait for the process and return its status code. - - :param stderr: - Previously read value of stderr, in case stderr is already closed. - - :warn: - May deadlock if output or error pipes are used and not handled - separately. - - :raise git.exc.GitCommandError: - If the return status is not 0. - """ - if stderr is None: - stderr_b = b"" - stderr_b = force_bytes(data=stderr, encoding="utf-8") - status: Union[int, None] - if self.proc is not None: - status = self.proc.wait() - p_stderr = self.proc.stderr - else: # Assume the underlying proc was killed earlier or never existed. - status = self.status - p_stderr = None - - def read_all_from_possibly_closed_stream(stream: Union[IO[bytes], None]) -> bytes: - if stream: - try: - return stderr_b + force_bytes(stream.read()) - except (OSError, ValueError): - return stderr_b or b"" - else: - return stderr_b or b"" - - # END status handling - - if status != 0: - errstr = read_all_from_possibly_closed_stream(p_stderr) - _logger.debug("AutoInterrupt wait stderr: %r" % (errstr,)) - raise GitCommandError(remove_password_if_present(self.args), status, errstr) - return status - - # END auto interrupt - - class CatFileContentStream: - """Object representing a sized read-only stream returning the contents of - an object. - - This behaves like a stream, but counts the data read and simulates an empty - stream once our sized content region is empty. - - If not all data are read to the end of the object's lifetime, we read the - rest to ensure the underlying stream continues to work. - """ - - __slots__ = ("_stream", "_nbr", "_size") - - def __init__(self, size: int, stream: IO[bytes]) -> None: - self._stream = stream - self._size = size - self._nbr = 0 # Number of bytes read. - - # Special case: If the object is empty, has null bytes, get the final - # newline right away. - if size == 0: - stream.read(1) - # END handle empty streams - - def read(self, size: int = -1) -> bytes: - bytes_left = self._size - self._nbr - if bytes_left == 0: - return b"" - if size > -1: - # Ensure we don't try to read past our limit. - size = min(bytes_left, size) - else: - # They try to read all, make sure it's not more than what remains. - size = bytes_left - # END check early depletion - data = self._stream.read(size) - self._nbr += len(data) - - # Check for depletion, read our final byte to make the stream usable by - # others. - if self._size - self._nbr == 0: - self._stream.read(1) # final newline - # END finish reading - return data - - def readline(self, size: int = -1) -> bytes: - if self._nbr == self._size: - return b"" - - # Clamp size to lowest allowed value. - bytes_left = self._size - self._nbr - if size > -1: - size = min(bytes_left, size) - else: - size = bytes_left - # END handle size - - data = self._stream.readline(size) - self._nbr += len(data) - - # Handle final byte. - if self._size - self._nbr == 0: - self._stream.read(1) - # END finish reading - - return data - - def readlines(self, size: int = -1) -> List[bytes]: - if self._nbr == self._size: - return [] - - # Leave all additional logic to our readline method, we just check the size. - out = [] - nbr = 0 - while True: - line = self.readline() - if not line: - break - out.append(line) - if size > -1: - nbr += len(line) - if nbr > size: - break - # END handle size constraint - # END readline loop - return out - - # skipcq: PYL-E0301 - def __iter__(self) -> "Git.CatFileContentStream": - return self - - def __next__(self) -> bytes: - line = self.readline() - if not line: - raise StopIteration - - return line - - next = __next__ + AutoInterrupt: TypeAlias = _AutoInterrupt - def __del__(self) -> None: - bytes_left = self._size - self._nbr - if bytes_left: - # Read and discard - seeking is impossible within a stream. - # This includes any terminating newline. - self._stream.read(bytes_left + 1) - # END handle incomplete read + CatFileContentStream: TypeAlias = _CatFileContentStream def __init__(self, working_dir: Union[None, PathLike] = None) -> None: """Initialize this instance with: @@ -971,7 +988,7 @@ def __init__(self, working_dir: Union[None, PathLike] = None) -> None: def __getattribute__(self, name: str) -> Any: if name == "USE_SHELL": - _warn_use_shell(False) + _warn_use_shell(extra_danger=False) return super().__getattribute__(name) def __getattr__(self, name: str) -> Any: @@ -1269,6 +1286,7 @@ def execute( stdout=stdout_sink, shell=shell, universal_newlines=universal_newlines, + encoding=defenc if universal_newlines else None, **subprocess_kwargs, ) except cmd_not_found_exception as err: @@ -1318,7 +1336,7 @@ def communicate() -> Tuple[AnyStr, AnyStr]: out, err = proc.communicate() watchdog.cancel() if kill_check.is_set(): - err = 'Timeout: the command "%s" did not complete in %d ' "secs." % ( + err = 'Timeout: the command "%s" did not complete in %d secs.' % ( " ".join(redacted_command), timeout, ) diff --git a/git/config.py b/git/config.py index de3508360..5fc099a27 100644 --- a/git/config.py +++ b/git/config.py @@ -496,19 +496,26 @@ def string_decode(v: str) -> str: if mo: # We might just have handled the last line, which could contain a quotation we want to remove. optname, vi, optval = mo.group("option", "vi", "value") + optname = self.optionxform(optname.rstrip()) + if vi in ("=", ":") and ";" in optval and not optval.strip().startswith('"'): pos = optval.find(";") if pos != -1 and optval[pos - 1].isspace(): optval = optval[:pos] optval = optval.strip() - if optval == '""': - optval = "" - # END handle empty string - optname = self.optionxform(optname.rstrip()) - if len(optval) > 1 and optval[0] == '"' and optval[-1] != '"': + + if len(optval) < 2 or optval[0] != '"': + # Does not open quoting. + pass + elif optval[-1] != '"': + # Opens quoting and does not close: appears to start multi-line quoting. is_multi_line = True optval = string_decode(optval[1:]) - # END handle multi-line + elif optval.find("\\", 1, -1) == -1 and optval.find('"', 1, -1) == -1: + # Opens and closes quoting. Single line, and all we need is quote removal. + optval = optval[1:-1] + # TODO: Handle other quoted content, especially well-formed backslash escapes. + # Preserves multiple values for duplicate optnames. cursect.add(optname, optval) else: diff --git a/git/ext/gitdb b/git/ext/gitdb index 3d3e9572d..335c0f661 160000 --- a/git/ext/gitdb +++ b/git/ext/gitdb @@ -1 +1 @@ -Subproject commit 3d3e9572dc452fea53d328c101b3d1440bbefe40 +Subproject commit 335c0f66173eecdc7b2597c2b6c3d1fde795df30 diff --git a/git/index/base.py b/git/index/base.py index 28b60a880..7cc9d3ade 100644 --- a/git/index/base.py +++ b/git/index/base.py @@ -28,6 +28,7 @@ from git.objects import Blob, Commit, Object, Submodule, Tree from git.objects.util import Serializable from git.util import ( + Actor, LazyMixin, LockedFD, join_path_native, @@ -76,7 +77,6 @@ from git.refs.reference import Reference from git.repo import Repo - from git.util import Actor Treeish = Union[Tree, Commit, str, bytes] @@ -508,7 +508,7 @@ def iter_blobs( :param predicate: Function(t) returning ``True`` if tuple(stage, Blob) should be yielded by - the iterator. A default filter, the `~git.index.typ.BlobFilter`, allows you + the iterator. A default filter, the :class:`~git.index.typ.BlobFilter`, allows you to yield blobs only if they match a given list of paths. """ for entry in self.entries.values(): @@ -530,7 +530,10 @@ def unmerged_blobs(self) -> Dict[PathLike, List[Tuple[StageType, Blob]]]: stage. That is, a file removed on the 'other' branch whose entries are at stage 3 will not have a stage 3 entry. """ - is_unmerged_blob = lambda t: t[0] != 0 + + def is_unmerged_blob(t: Tuple[StageType, Blob]) -> bool: + return t[0] != 0 + path_map: Dict[PathLike, List[Tuple[StageType, Blob]]] = {} for stage, blob in self.iter_blobs(is_unmerged_blob): path_map.setdefault(blob.path, []).append((stage, blob)) @@ -653,9 +656,12 @@ def _to_relative_path(self, path: PathLike) -> PathLike: return path if self.repo.bare: raise InvalidGitRepositoryError("require non-bare repository") - if not str(path).startswith(str(self.repo.working_tree_dir)): + if not osp.normpath(str(path)).startswith(str(self.repo.working_tree_dir)): raise ValueError("Absolute path %r is not in git repository at %r" % (path, self.repo.working_tree_dir)) - return os.path.relpath(path, self.repo.working_tree_dir) + result = os.path.relpath(path, self.repo.working_tree_dir) + if str(path).endswith(os.sep) and not result.endswith(os.sep): + result += os.sep + return result def _preprocess_add_items( self, items: Union[PathLike, Sequence[Union[PathLike, Blob, BaseIndexEntry, "Submodule"]]] @@ -687,12 +693,17 @@ def _store_path(self, filepath: PathLike, fprogress: Callable) -> BaseIndexEntry This must be ensured in the calling code. """ st = os.lstat(filepath) # Handles non-symlinks as well. + if S_ISLNK(st.st_mode): # In PY3, readlink is a string, but we need bytes. # In PY2, it was just OS encoded bytes, we assumed UTF-8. - open_stream: Callable[[], BinaryIO] = lambda: BytesIO(force_bytes(os.readlink(filepath), encoding=defenc)) + def open_stream() -> BinaryIO: + return BytesIO(force_bytes(os.readlink(filepath), encoding=defenc)) else: - open_stream = lambda: open(filepath, "rb") + + def open_stream() -> BinaryIO: + return open(filepath, "rb") + with open_stream() as stream: fprogress(filepath, False, filepath) istream = self.repo.odb.store(IStream(Blob.type, st.st_size, stream)) @@ -767,7 +778,7 @@ def add( - path string Strings denote a relative or absolute path into the repository pointing - to an existing file, e.g., ``CHANGES``, `lib/myfile.ext``, + to an existing file, e.g., ``CHANGES``, ``lib/myfile.ext``, ``/home/gitrepo/lib/myfile.ext``. Absolute paths must start with working tree directory of this index's @@ -786,7 +797,7 @@ def add( They are added at stage 0. - - :class:~`git.objects.blob.Blob` or + - :class:`~git.objects.blob.Blob` or :class:`~git.objects.submodule.base.Submodule` object Blobs are added as they are assuming a valid mode is set. @@ -812,7 +823,7 @@ def add( - :class:`~git.index.typ.BaseIndexEntry` or type - Handling equals the one of :class:~`git.objects.blob.Blob` objects, but + Handling equals the one of :class:`~git.objects.blob.Blob` objects, but the stage may be explicitly set. Please note that Index Entries require binary sha's. @@ -995,7 +1006,7 @@ def remove( The path string may include globs, such as ``*.c``. - - :class:~`git.objects.blob.Blob` object + - :class:`~git.objects.blob.Blob` object Only the path portion is used in this case. @@ -1117,8 +1128,8 @@ def commit( message: str, parent_commits: Union[List[Commit], None] = None, head: bool = True, - author: Union[None, "Actor"] = None, - committer: Union[None, "Actor"] = None, + author: Union[None, Actor] = None, + committer: Union[None, Actor] = None, author_date: Union[datetime.datetime, str, None] = None, commit_date: Union[datetime.datetime, str, None] = None, skip_hooks: bool = False, @@ -1333,8 +1344,11 @@ def handle_stderr(proc: "Popen[bytes]", iter_checked_out_files: Iterable[PathLik kwargs["as_process"] = True kwargs["istream"] = subprocess.PIPE proc = self.repo.git.checkout_index(args, **kwargs) + # FIXME: Reading from GIL! - make_exc = lambda: GitCommandError(("git-checkout-index",) + tuple(args), 128, proc.stderr.read()) + def make_exc() -> GitCommandError: + return GitCommandError(("git-checkout-index", *args), 128, proc.stderr.read()) + checked_out_files: List[PathLike] = [] for path in paths: @@ -1443,7 +1457,7 @@ def reset( key = entry_key(path, 0) self.entries[key] = nie[key] except KeyError: - # If key is not in theirs, it musn't be in ours. + # If key is not in theirs, it mustn't be in ours. try: del self.entries[key] except KeyError: diff --git a/git/index/fun.py b/git/index/fun.py index 59cce6ae6..d03ec6759 100644 --- a/git/index/fun.py +++ b/git/index/fun.py @@ -207,7 +207,7 @@ def read_header(stream: IO[bytes]) -> Tuple[int, int]: version, num_entries = unpacked # TODO: Handle version 3: extended data, see read-cache.c. - assert version in (1, 2) + assert version in (1, 2), "Unsupported git index version %i, only 1 and 2 are supported" % version return version, num_entries diff --git a/git/objects/base.py b/git/objects/base.py index eeaebc09b..faf600c6b 100644 --- a/git/objects/base.py +++ b/git/objects/base.py @@ -122,7 +122,7 @@ def new(cls, repo: "Repo", id: Union[str, "Reference"]) -> AnyGitObject: :return: New :class:`Object` instance of a type appropriate to the object type behind `id`. The id of the newly created object will be a binsha even though the - input id may have been a `~git.refs.reference.Reference` or rev-spec. + input id may have been a :class:`~git.refs.reference.Reference` or rev-spec. :param id: :class:`~git.refs.reference.Reference`, rev-spec, or hexsha. @@ -218,7 +218,7 @@ class IndexObject(Object): """Base for all objects that can be part of the index file. The classes representing git object types that can be part of the index file are - :class:`~git.objects.tree.Tree and :class:`~git.objects.blob.Blob`. In addition, + :class:`~git.objects.tree.Tree` and :class:`~git.objects.blob.Blob`. In addition, :class:`~git.objects.submodule.base.Submodule`, which is not really a git object type but can be part of an index file, is also a subclass. """ diff --git a/git/objects/commit.py b/git/objects/commit.py index d957c9051..fbe0ee9c0 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -289,7 +289,7 @@ def name_rev(self) -> str: """ :return: String describing the commits hex sha based on the closest - `~git.refs.reference.Reference`. + :class:`~git.refs.reference.Reference`. :note: Mostly useful for UI purposes. @@ -349,7 +349,7 @@ def iter_items( return cls._iter_from_process_or_stream(repo, proc) def iter_parents(self, paths: Union[PathLike, Sequence[PathLike]] = "", **kwargs: Any) -> Iterator["Commit"]: - R"""Iterate _all_ parents of this commit. + R"""Iterate *all* parents of this commit. :param paths: Optional path or list of paths limiting the :class:`Commit`\s to those that @@ -377,15 +377,25 @@ def stats(self) -> Stats: :return: :class:`Stats` """ - if not self.parents: - text = self.repo.git.diff_tree(self.hexsha, "--", numstat=True, no_renames=True, root=True) - text2 = "" - for line in text.splitlines()[1:]: + + def process_lines(lines: List[str]) -> str: + text = "" + for file_info, line in zip(lines, lines[len(lines) // 2 :]): + change_type = file_info.split("\t")[0][-1] (insertions, deletions, filename) = line.split("\t") - text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) - text = text2 + text += "%s\t%s\t%s\t%s\n" % (change_type, insertions, deletions, filename) + return text + + if not self.parents: + lines = self.repo.git.diff_tree( + self.hexsha, "--", numstat=True, no_renames=True, root=True, raw=True + ).splitlines()[1:] + text = process_lines(lines) else: - text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, "--", numstat=True, no_renames=True) + lines = self.repo.git.diff( + self.parents[0].hexsha, self.hexsha, "--", numstat=True, no_renames=True, raw=True + ).splitlines() + text = process_lines(lines) return Stats._list_from_string(self.repo, text) @property diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index fa60bcdaf..0e55b8fa9 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -353,6 +353,11 @@ def _clone_repo( os.makedirs(module_abspath_dir) module_checkout_path = osp.join(str(repo.working_tree_dir), path) + if url.startswith("../"): + remote_name = repo.active_branch.tracking_branch().remote_name + repo_remote_url = repo.remote(remote_name).url + url = os.path.join(repo_remote_url, url) + clone = git.Repo.clone_from( url, module_checkout_path, diff --git a/git/objects/tag.py b/git/objects/tag.py index a3bb0b882..88671d316 100644 --- a/git/objects/tag.py +++ b/git/objects/tag.py @@ -14,7 +14,7 @@ import sys from git.compat import defenc -from git.util import hex_to_bin +from git.util import Actor, hex_to_bin from . import base from .util import get_object_type_by_name, parse_actor_and_date @@ -30,7 +30,6 @@ if TYPE_CHECKING: from git.repo import Repo - from git.util import Actor from .blob import Blob from .commit import Commit @@ -64,7 +63,7 @@ def __init__( binsha: bytes, object: Union[None, base.Object] = None, tag: Union[None, str] = None, - tagger: Union[None, "Actor"] = None, + tagger: Union[None, Actor] = None, tagged_date: Union[int, None] = None, tagger_tz_offset: Union[int, None] = None, message: Union[str, None] = None, diff --git a/git/objects/tree.py b/git/objects/tree.py index 09184a781..1845d0d0d 100644 --- a/git/objects/tree.py +++ b/git/objects/tree.py @@ -50,7 +50,9 @@ # -------------------------------------------------------- -cmp: Callable[[str, str], int] = lambda a, b: (a > b) - (a < b) + +def cmp(a: str, b: str) -> int: + return (a > b) - (a < b) class TreeModifier: diff --git a/git/objects/util.py b/git/objects/util.py index 5c56e6134..a68d701f5 100644 --- a/git/objects/util.py +++ b/git/objects/util.py @@ -568,11 +568,11 @@ def addToStack( yield rval # Only continue to next level if this is appropriate! - nd = d + 1 - if depth > -1 and nd > depth: + next_d = d + 1 + if depth > -1 and next_d > depth: continue - addToStack(stack, item, branch_first, nd) + addToStack(stack, item, branch_first, next_d) # END for each item on work stack diff --git a/git/refs/log.py b/git/refs/log.py index 17e3a94b3..8f2f2cd38 100644 --- a/git/refs/log.py +++ b/git/refs/log.py @@ -126,7 +126,7 @@ def from_line(cls, line: bytes) -> "RefLogEntry": elif len(fields) == 2: info, msg = fields else: - raise ValueError("Line must have up to two TAB-separated fields." " Got %s" % repr(line_str)) + raise ValueError("Line must have up to two TAB-separated fields. Got %s" % repr(line_str)) # END handle first split oldhexsha = info[:40] diff --git a/git/refs/symbolic.py b/git/refs/symbolic.py index 510850b2e..1b90a3115 100644 --- a/git/refs/symbolic.py +++ b/git/refs/symbolic.py @@ -39,7 +39,6 @@ if TYPE_CHECKING: from git.config import GitConfigParser from git.objects.commit import Actor - from git.refs import Head, TagReference, RemoteReference, Reference from git.refs.log import RefLogEntry from git.repo import Repo @@ -387,17 +386,23 @@ def set_object( # set the commit on our reference return self._get_reference().set_object(object, logmsg) - commit = property( - _get_commit, - set_commit, # type: ignore[arg-type] - doc="Query or set commits directly", - ) + @property + def commit(self) -> "Commit": + """Query or set commits directly""" + return self._get_commit() + + @commit.setter + def commit(self, commit: Union[Commit, "SymbolicReference", str]) -> "SymbolicReference": + return self.set_commit(commit) + + @property + def object(self) -> AnyGitObject: + """Return the object our ref currently refers to""" + return self._get_object() - object = property( - _get_object, - set_object, # type: ignore[arg-type] - doc="Return the object our ref currently refers to", - ) + @object.setter + def object(self, object: Union[AnyGitObject, "SymbolicReference", str]) -> "SymbolicReference": + return self.set_object(object) def _get_reference(self) -> "SymbolicReference": """ @@ -496,12 +501,14 @@ def set_reference( return self # Aliased reference - reference: Union["Head", "TagReference", "RemoteReference", "Reference"] - reference = property( # type: ignore[assignment] - _get_reference, - set_reference, # type: ignore[arg-type] - doc="Returns the Reference we point to", - ) + @property + def reference(self) -> "SymbolicReference": + return self._get_reference() + + @reference.setter + def reference(self, ref: Union[AnyGitObject, "SymbolicReference", str]) -> "SymbolicReference": + return self.set_reference(ref) + ref = reference def is_valid(self) -> bool: diff --git a/git/remote.py b/git/remote.py index 15e360064..20e42b412 100644 --- a/git/remote.py +++ b/git/remote.py @@ -250,7 +250,7 @@ def _from_line(cls, remote: "Remote", line: str) -> "PushInfo": flags |= cls.NEW_TAG elif "[new branch]" in summary: flags |= cls.NEW_HEAD - # uptodate encoded in control character + # `uptodate` encoded in control character else: # Fast-forward or forced update - was encoded in control character, # but we parse the old and new commit. @@ -894,7 +894,7 @@ def _get_fetch_info_from_stderr( None, progress_handler, finalizer=None, - decode_streams=True, + decode_streams=False, kill_after_timeout=kill_after_timeout, ) @@ -1071,7 +1071,7 @@ def fetch( Git.check_unsafe_options(options=list(kwargs.keys()), unsafe_options=self.unsafe_git_fetch_options) proc = self.repo.git.fetch( - "--", self, *args, as_process=True, with_stdout=False, universal_newlines=False, v=verbose, **kwargs + "--", self, *args, as_process=True, with_stdout=False, universal_newlines=True, v=verbose, **kwargs ) res = self._get_fetch_info_from_stderr(proc, progress, kill_after_timeout=kill_after_timeout) if hasattr(self.repo.odb, "update_cache"): @@ -1125,7 +1125,7 @@ def pull( Git.check_unsafe_options(options=list(kwargs.keys()), unsafe_options=self.unsafe_git_pull_options) proc = self.repo.git.pull( - "--", self, refspec, with_stdout=False, as_process=True, universal_newlines=False, v=True, **kwargs + "--", self, refspec, with_stdout=False, as_process=True, universal_newlines=True, v=True, **kwargs ) res = self._get_fetch_info_from_stderr(proc, progress, kill_after_timeout=kill_after_timeout) if hasattr(self.repo.odb, "update_cache"): diff --git a/git/repo/base.py b/git/repo/base.py index 346248ddb..7e918df8c 100644 --- a/git/repo/base.py +++ b/git/repo/base.py @@ -179,7 +179,7 @@ def __init__( R"""Create a new :class:`Repo` instance. :param path: - The path to either the root git directory or the bare git repo:: + The path to either the worktree directory or the .git directory itself:: repo = Repo("/Users/mtrier/Development/git-python") repo = Repo("/Users/mtrier/Development/git-python.git") @@ -354,21 +354,19 @@ def __ne__(self, rhs: object) -> bool: def __hash__(self) -> int: return hash(self.git_dir) - # Description property - def _get_description(self) -> str: + @property + def description(self) -> str: + """The project's description""" filename = osp.join(self.git_dir, "description") with open(filename, "rb") as fp: return fp.read().rstrip().decode(defenc) - def _set_description(self, descr: str) -> None: + @description.setter + def description(self, descr: str) -> None: filename = osp.join(self.git_dir, "description") with open(filename, "wb") as fp: fp.write((descr + "\n").encode(defenc)) - description = property(_get_description, _set_description, doc="the project's description") - del _get_description - del _set_description - @property def working_tree_dir(self) -> Optional[PathLike]: """ @@ -514,7 +512,7 @@ def create_submodule(self, *args: Any, **kwargs: Any) -> Submodule: def iter_submodules(self, *args: Any, **kwargs: Any) -> Iterator[Submodule]: """An iterator yielding Submodule instances. - See the `~git.objects.util.Traversable` interface for a description of `args` + See the :class:`~git.objects.util.Traversable` interface for a description of `args` and `kwargs`. :return: @@ -885,13 +883,14 @@ def _set_daemon_export(self, value: object) -> None: elif not value and fileexists: os.unlink(filename) - daemon_export = property( - _get_daemon_export, - _set_daemon_export, - doc="If True, git-daemon may export this repository", - ) - del _get_daemon_export - del _set_daemon_export + @property + def daemon_export(self) -> bool: + """If True, git-daemon may export this repository""" + return self._get_daemon_export() + + @daemon_export.setter + def daemon_export(self, value: object) -> None: + self._set_daemon_export(value) def _get_alternates(self) -> List[str]: """The list of alternates for this repo from which objects can be retrieved. @@ -929,11 +928,14 @@ def _set_alternates(self, alts: List[str]) -> None: with open(alternates_path, "wb") as f: f.write("\n".join(alts).encode(defenc)) - alternates = property( - _get_alternates, - _set_alternates, - doc="Retrieve a list of alternates paths or set a list paths to be used as alternates", - ) + @property + def alternates(self) -> List[str]: + """Retrieve a list of alternates paths or set a list paths to be used as alternates""" + return self._get_alternates() + + @alternates.setter + def alternates(self, alts: List[str]) -> None: + self._set_alternates(alts) def is_dirty( self, diff --git a/git/repo/fun.py b/git/repo/fun.py index 182cf82ed..1c995c6c6 100644 --- a/git/repo/fun.py +++ b/git/repo/fun.py @@ -301,7 +301,13 @@ def rev_parse(repo: "Repo", rev: str) -> AnyGitObject: # Handle type. if output_type == "commit": - pass # Default. + obj = cast("TagObject", obj) + if obj and obj.type == "tag": + obj = deref_tag(obj) + else: + # Cannot do anything for non-tags. + pass + # END handle tag elif output_type == "tree": try: obj = cast(AnyGitObject, obj) @@ -399,7 +405,7 @@ def rev_parse(repo: "Repo", rev: str) -> AnyGitObject: # END end handle tag except (IndexError, AttributeError) as e: raise BadName( - f"Invalid revision spec '{rev}' - not enough " f"parent commits to reach '{token}{int(num)}'" + f"Invalid revision spec '{rev}' - not enough parent commits to reach '{token}{int(num)}'" ) from e # END exception handling # END parse loop diff --git a/git/types.py b/git/types.py index 584450146..cce184530 100644 --- a/git/types.py +++ b/git/types.py @@ -248,6 +248,7 @@ class Files_TD(TypedDict): insertions: int deletions: int lines: int + change_type: str class Total_TD(TypedDict): diff --git a/git/util.py b/git/util.py index 11f963e02..0aff5eb64 100644 --- a/git/util.py +++ b/git/util.py @@ -464,6 +464,12 @@ def _is_cygwin_git(git_executable: str) -> bool: # Just a name given, not a real path. uname_cmd = osp.join(git_dir, "uname") + + if not (pathlib.Path(uname_cmd).is_file() and os.access(uname_cmd, os.X_OK)): + _logger.debug(f"Failed checking if running in CYGWIN: {uname_cmd} is not an executable") + _is_cygwin_cache[git_executable] = is_cygwin + return is_cygwin + process = subprocess.Popen([uname_cmd], stdout=subprocess.PIPE, universal_newlines=True) uname_out, _ = process.communicate() # retcode = process.poll() @@ -484,7 +490,9 @@ def is_cygwin_git(git_executable: PathLike) -> bool: ... def is_cygwin_git(git_executable: Union[None, PathLike]) -> bool: - if sys.platform == "win32": # TODO: See if we can use `sys.platform != "cygwin"`. + # TODO: when py3.7 support is dropped, use the new interpolation f"{variable=}" + _logger.debug(f"sys.platform={sys.platform!r}, git_executable={git_executable!r}") + if sys.platform != "cygwin": return False elif git_executable is None: return False @@ -910,6 +918,7 @@ class Stats: deletions = number of deleted lines as int insertions = number of inserted lines as int lines = total number of lines changed as int, or deletions + insertions + change_type = type of change as str, A|C|D|M|R|T|U|X|B ``full-stat-dict`` @@ -938,7 +947,7 @@ def _list_from_string(cls, repo: "Repo", text: str) -> "Stats": "files": {}, } for line in text.splitlines(): - (raw_insertions, raw_deletions, filename) = line.split("\t") + (change_type, raw_insertions, raw_deletions, filename) = line.split("\t") insertions = raw_insertions != "-" and int(raw_insertions) or 0 deletions = raw_deletions != "-" and int(raw_deletions) or 0 hsh["total"]["insertions"] += insertions @@ -949,6 +958,7 @@ def _list_from_string(cls, repo: "Repo", text: str) -> "Stats": "insertions": insertions, "deletions": deletions, "lines": insertions + deletions, + "change_type": change_type, } hsh["files"][filename.strip()] = files_dict return Stats(hsh["total"], hsh["files"]) @@ -1198,8 +1208,6 @@ def __getattr__(self, attr: str) -> T_IterableObj: return list.__getattribute__(self, attr) def __getitem__(self, index: Union[SupportsIndex, int, slice, str]) -> T_IterableObj: # type: ignore[override] - assert isinstance(index, (int, str, slice)), "Index of IterableList should be an int or str" - if isinstance(index, int): return list.__getitem__(self, index) elif isinstance(index, slice): @@ -1212,8 +1220,6 @@ def __getitem__(self, index: Union[SupportsIndex, int, slice, str]) -> T_Iterabl # END handle getattr def __delitem__(self, index: Union[SupportsIndex, int, slice, str]) -> None: - assert isinstance(index, (int, str)), "Index of IterableList should be an int or str" - delindex = cast(int, index) if not isinstance(index, int): delindex = -1 diff --git a/pyproject.toml b/pyproject.toml index ee54edb78..58ed81f17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,16 +60,14 @@ lint.select = [ # "UP", # See: https://docs.astral.sh/ruff/rules/#pyupgrade-up ] lint.extend-select = [ - # "A", # See: https://pypi.org/project/flake8-builtins - "B", # See: https://pypi.org/project/flake8-bugbear - "C4", # See: https://pypi.org/project/flake8-comprehensions - "TCH004", # See: https://docs.astral.sh/ruff/rules/runtime-import-in-type-checking-block/ + # "A", # See: https://pypi.org/project/flake8-builtins + "B", # See: https://pypi.org/project/flake8-bugbear + "C4", # See: https://pypi.org/project/flake8-comprehensions + "TC004", # See: https://docs.astral.sh/ruff/rules/runtime-import-in-type-checking-block/ ] lint.ignore = [ - "E203", # Whitespace before ':' - "E731", # Do not assign a `lambda` expression, use a `def` + # If it becomes necessary to ignore any rules, list them here. ] -lint.ignore-init-module-imports = true lint.unfixable = [ "F401", # Module imported but unused ] @@ -78,3 +76,12 @@ lint.unfixable = [ "test/**" = [ "B018", # useless-expression ] +"fuzzing/fuzz-targets/**" = [ + "E402", # environment setup must happen before the `git` module is imported, thus cannot happen at top of file +] + + +[tool.codespell] +ignore-words-list="afile,assertIn,doesnt,gud,uptodate" +#count = true +quiet-level = 3 diff --git a/requirements-dev.txt b/requirements-dev.txt index f626644af..066b192b8 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,8 +1,8 @@ --r requirements.txt --r test-requirements.txt - -# For additional local testing/linting - to be added elsewhere eventually. -ruff -shellcheck -pytest-icdiff -# pytest-profiling +-r requirements.txt +-r test-requirements.txt + +# For additional local testing/linting - to be added elsewhere eventually. +ruff >=0.8 +shellcheck +pytest-icdiff +# pytest-profiling diff --git a/requirements.txt b/requirements.txt index 7159416a9..61d8403b0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ gitdb>=4.0.1,<5 -typing-extensions>=3.7.4.3;python_version<"3.8" +typing-extensions>=3.10.0.2;python_version<"3.10" diff --git a/setup.py b/setup.py index f28fedb85..a7b1eab00 100755 --- a/setup.py +++ b/setup.py @@ -95,7 +95,6 @@ def _stamp_version(filename: str) -> None: # "Development Status :: 7 - Inactive", "Environment :: Console", "Intended Audience :: Developers", - "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", "Operating System :: POSIX", "Operating System :: Microsoft :: Windows", diff --git a/test/fixtures/diff_numstat b/test/fixtures/diff_numstat index 44c6ca2d5..b76e467eb 100644 --- a/test/fixtures/diff_numstat +++ b/test/fixtures/diff_numstat @@ -1,2 +1,3 @@ -29 18 a.txt -0 5 b.txt +M 29 18 a.txt +M 0 5 b.txt +A 7 0 c.txt \ No newline at end of file diff --git a/test/fixtures/git_config_with_empty_quotes b/test/fixtures/git_config_with_empty_quotes new file mode 100644 index 000000000..f11fe4248 --- /dev/null +++ b/test/fixtures/git_config_with_empty_quotes @@ -0,0 +1,2 @@ +[core] + filemode = "" diff --git a/test/fixtures/git_config_with_extra_whitespace b/test/fixtures/git_config_with_extra_whitespace new file mode 100644 index 000000000..0f727cb5d --- /dev/null +++ b/test/fixtures/git_config_with_extra_whitespace @@ -0,0 +1,2 @@ +[init] + defaultBranch = trunk diff --git a/test/fixtures/git_config_with_quotes b/test/fixtures/git_config_with_quotes new file mode 100644 index 000000000..40e6710d9 --- /dev/null +++ b/test/fixtures/git_config_with_quotes @@ -0,0 +1,3 @@ +[user] + name = "Cody Veal" + email = "cveal05@gmail.com" diff --git a/test/fixtures/git_config_with_quotes_escapes b/test/fixtures/git_config_with_quotes_escapes new file mode 100644 index 000000000..33332c221 --- /dev/null +++ b/test/fixtures/git_config_with_quotes_escapes @@ -0,0 +1,9 @@ +[custom] + hasnewline = "first\nsecond" + hasbackslash = "foo\\bar" + hasquote = "ab\"cd" + hastrailingbackslash = "word\\" + hasunrecognized = "p\qrs" + hasunescapedquotes = "ab"cd"e" + ordinary = "hello world" + unquoted = good evening diff --git a/test/fixtures/git_config_with_quotes_whitespace_inside b/test/fixtures/git_config_with_quotes_whitespace_inside new file mode 100644 index 000000000..c6014cc61 --- /dev/null +++ b/test/fixtures/git_config_with_quotes_whitespace_inside @@ -0,0 +1,2 @@ +[core] + commentString = "# " diff --git a/test/fixtures/git_config_with_quotes_whitespace_outside b/test/fixtures/git_config_with_quotes_whitespace_outside new file mode 100644 index 000000000..4b1615a51 --- /dev/null +++ b/test/fixtures/git_config_with_quotes_whitespace_outside @@ -0,0 +1,2 @@ +[init] + defaultBranch = "trunk" diff --git a/test/lib/helper.py b/test/lib/helper.py index 5d91447ea..241d27341 100644 --- a/test/lib/helper.py +++ b/test/lib/helper.py @@ -415,9 +415,15 @@ def __init__(self, env_dir, *, with_pip): if with_pip: # The upgrade_deps parameter to venv.create is 3.9+ only, so do it this way. - command = [self.python, "-m", "pip", "install", "--upgrade", "pip"] - if sys.version_info < (3, 12): - command.append("setuptools") + command = [ + self.python, + "-m", + "pip", + "install", + "--upgrade", + "pip", + 'setuptools; python_version<"3.12"', + ] subprocess.check_output(command) @property diff --git a/test/test_commit.py b/test/test_commit.py index 5832258de..37c66e3e7 100644 --- a/test/test_commit.py +++ b/test/test_commit.py @@ -135,9 +135,12 @@ def test_stats(self): commit = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781") stats = commit.stats - def check_entries(d): + def check_entries(d, has_change_type=False): assert isinstance(d, dict) - for key in ("insertions", "deletions", "lines"): + keys = ("insertions", "deletions", "lines") + if has_change_type: + keys += ("change_type",) + for key in keys: assert key in d # END assertion helper @@ -148,7 +151,7 @@ def check_entries(d): assert "files" in stats.total for _filepath, d in stats.files.items(): - check_entries(d) + check_entries(d, True) # END for each stated file # Check that data is parsed properly. diff --git a/test/test_config.py b/test/test_config.py index 92997422d..8e1007d9e 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -391,13 +391,17 @@ def test_complex_aliases(self): with GitConfigParser(file_obj, read_only=False) as w_config: self.assertEqual( w_config.get("alias", "rbi"), - '"!g() { git rebase -i origin/${1:-master} ; } ; g"', + "!g() { git rebase -i origin/${1:-master} ; } ; g", ) self.assertEqual( file_obj.getvalue(), self._to_memcache(fixture_path(".gitconfig")).getvalue(), ) + def test_config_with_extra_whitespace(self): + cr = GitConfigParser(fixture_path("git_config_with_extra_whitespace"), read_only=True) + self.assertEqual(cr.get("init", "defaultBranch"), "trunk") + def test_empty_config_value(self): cr = GitConfigParser(fixture_path("git_config_with_empty_value"), read_only=True) @@ -406,6 +410,44 @@ def test_empty_config_value(self): with self.assertRaises(cp.NoOptionError): cr.get_value("color", "ui") + def test_config_with_quotes(self): + cr = GitConfigParser(fixture_path("git_config_with_quotes"), read_only=True) + + self.assertEqual(cr.get("user", "name"), "Cody Veal") + self.assertEqual(cr.get("user", "email"), "cveal05@gmail.com") + + def test_config_with_empty_quotes(self): + cr = GitConfigParser(fixture_path("git_config_with_empty_quotes"), read_only=True) + self.assertEqual(cr.get("core", "filemode"), "", "quotes can form a literal empty string as value") + + def test_config_with_quotes_with_literal_whitespace(self): + cr = GitConfigParser(fixture_path("git_config_with_quotes_whitespace_inside"), read_only=True) + self.assertEqual(cr.get("core", "commentString"), "# ") + + def test_config_with_quotes_with_whitespace_outside_value(self): + cr = GitConfigParser(fixture_path("git_config_with_quotes_whitespace_outside"), read_only=True) + self.assertEqual(cr.get("init", "defaultBranch"), "trunk") + + def test_config_with_quotes_containing_escapes(self): + """For now just suppress quote removal. But it would be good to interpret most of these.""" + cr = GitConfigParser(fixture_path("git_config_with_quotes_escapes"), read_only=True) + + # These can eventually be supported by substituting the represented character. + self.assertEqual(cr.get("custom", "hasnewline"), R'"first\nsecond"') + self.assertEqual(cr.get("custom", "hasbackslash"), R'"foo\\bar"') + self.assertEqual(cr.get("custom", "hasquote"), R'"ab\"cd"') + self.assertEqual(cr.get("custom", "hastrailingbackslash"), R'"word\\"') + self.assertEqual(cr.get("custom", "hasunrecognized"), R'"p\qrs"') + + # It is less obvious whether and what to eventually do with this. + self.assertEqual(cr.get("custom", "hasunescapedquotes"), '"ab"cd"e"') + + # Cases where quote removal is clearly safe should happen even after those. + self.assertEqual(cr.get("custom", "ordinary"), "hello world") + + # Cases without quotes should still parse correctly even after those, too. + self.assertEqual(cr.get("custom", "unquoted"), "good evening") + def test_get_values_works_without_requiring_any_other_calls_first(self): file_obj = self._to_memcache(fixture_path("git_config_multiple")) cr = GitConfigParser(file_obj, read_only=True) diff --git a/test/test_exc.py b/test/test_exc.py index c1eae7240..2e979f5a1 100644 --- a/test/test_exc.py +++ b/test/test_exc.py @@ -52,7 +52,7 @@ _streams_n_substrings = ( None, - "steram", + "stream", "ομορφο stream", ) diff --git a/test/test_fun.py b/test/test_fun.py index b8593b400..a456b8aab 100644 --- a/test/test_fun.py +++ b/test/test_fun.py @@ -243,6 +243,7 @@ def test_tree_traversal(self): B_old = self.rorepo.tree("1f66cfbbce58b4b552b041707a12d437cc5f400a") # old base tree # Two very different trees. + entries = traverse_trees_recursive(odb, [B_old.binsha, H.binsha], "") self._assert_tree_entries(entries, 2) @@ -251,7 +252,10 @@ def test_tree_traversal(self): self._assert_tree_entries(oentries, 2) # Single tree. - is_no_tree = lambda i, d: i.type != "tree" + + def is_no_tree(i, _d): + return i.type != "tree" + entries = traverse_trees_recursive(odb, [B.binsha], "") assert len(entries) == len(list(B.traverse(predicate=is_no_tree))) self._assert_tree_entries(entries, 1) diff --git a/test/test_git.py b/test/test_git.py index 94e68ecf0..4a54d0d9b 100644 --- a/test/test_git.py +++ b/test/test_git.py @@ -747,7 +747,7 @@ def test_environment(self, rw_dir): path = osp.join(rw_dir, "failing-script.sh") with open(path, "wt") as stream: - stream.write("#!/usr/bin/env sh\n" "echo FOO\n") + stream.write("#!/usr/bin/env sh\necho FOO\n") os.chmod(path, 0o777) rw_repo = Repo.init(osp.join(rw_dir, "repo")) @@ -762,17 +762,18 @@ def test_environment(self, rw_dir): def test_handle_process_output(self): from git.cmd import handle_process_output, safer_popen - line_count = 5002 - count = [None, 0, 0] + expected_line_count = 5002 + actual_lines = [None, [], []] - def counter_stdout(line): - count[1] += 1 + def stdout_handler(line): + actual_lines[1].append(line) - def counter_stderr(line): - count[2] += 1 + def stderr_handler(line): + actual_lines[2].append(line) cmdline = [ sys.executable, + "-S", # Keep any `CoverageWarning` messages out of the subprocess stderr. fixture_path("cat_file.py"), str(fixture_path("issue-301_stderr")), ] @@ -784,10 +785,10 @@ def counter_stderr(line): shell=False, ) - handle_process_output(proc, counter_stdout, counter_stderr, finalize_process) + handle_process_output(proc, stdout_handler, stderr_handler, finalize_process) - self.assertEqual(count[1], line_count) - self.assertEqual(count[2], line_count) + self.assertEqual(len(actual_lines[1]), expected_line_count, repr(actual_lines[1])) + self.assertEqual(len(actual_lines[2]), expected_line_count, repr(actual_lines[2])) def test_execute_kwargs_set_agrees_with_method(self): parameter_names = inspect.signature(cmd.Git.execute).parameters.keys() diff --git a/test/test_index.py b/test/test_index.py index b92258c92..cf3b90fa6 100644 --- a/test/test_index.py +++ b/test/test_index.py @@ -16,6 +16,7 @@ import subprocess import sys import tempfile +from unittest import mock from gitdb.base import IStream @@ -329,7 +330,10 @@ def test_index_file_from_tree(self, rw_repo): assert len([e for e in three_way_index.entries.values() if e.stage != 0]) # ITERATE BLOBS - merge_required = lambda t: t[0] != 0 + + def merge_required(t): + return t[0] != 0 + merge_blobs = list(three_way_index.iter_blobs(merge_required)) assert merge_blobs assert merge_blobs[0][0] in (1, 2, 3) @@ -1015,10 +1019,31 @@ class Mocked: rel = index._to_relative_path(path) self.assertEqual(rel, os.path.relpath(path, root)) + def test__to_relative_path_absolute_trailing_slash(self): + repo_root = os.path.join(osp.abspath(os.sep), "directory1", "repo_root") + + class Mocked: + bare = False + git_dir = repo_root + working_tree_dir = repo_root + + repo = Mocked() + path = os.path.join(repo_root, f"directory2{os.sep}") + index = IndexFile(repo) + + expected_path = f"directory2{os.sep}" + actual_path = index._to_relative_path(path) + self.assertEqual(expected_path, actual_path) + + with mock.patch("git.index.base.os.path") as ospath_mock: + ospath_mock.relpath.return_value = f"directory2{os.sep}" + actual_path = index._to_relative_path(path) + self.assertEqual(expected_path, actual_path) + @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.Absent, reason="Can't run a hook on Windows without bash.exe.", - rasies=HookExecutionError, + raises=HookExecutionError, ) @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.WslNoDistro, @@ -1077,7 +1102,7 @@ def test_hook_uses_shell_not_from_cwd(self, rw_dir, case): @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.Absent, reason="Can't run a hook on Windows without bash.exe.", - rasies=HookExecutionError, + raises=HookExecutionError, ) @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.WslNoDistro, @@ -1120,7 +1145,7 @@ def test_pre_commit_hook_fail(self, rw_repo): @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.Absent, reason="Can't run a hook on Windows without bash.exe.", - rasies=HookExecutionError, + raises=HookExecutionError, ) @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.Wsl, @@ -1181,6 +1206,18 @@ def test_index_add_pathlike(self, rw_repo): rw_repo.index.add(file) + @with_rw_repo("HEAD") + def test_index_add_non_normalized_path(self, rw_repo): + git_dir = Path(rw_repo.git_dir) + + file = git_dir / "file.txt" + file.touch() + non_normalized_path = file.as_posix() + if os.name != "nt": + non_normalized_path = "/" + non_normalized_path[1:].replace("/", "//") + + rw_repo.index.add(non_normalized_path) + class TestIndexUtils: @pytest.mark.parametrize("file_path_type", [str, Path]) diff --git a/test/test_installation.py b/test/test_installation.py index ae6472e98..7c82bd403 100644 --- a/test/test_installation.py +++ b/test/test_installation.py @@ -2,6 +2,7 @@ # 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ import ast +import functools import os import subprocess @@ -11,50 +12,22 @@ class TestInstallation(TestBase): @with_rw_directory def test_installation(self, rw_dir): - venv = self._set_up_venv(rw_dir) + venv, run = self._set_up_venv(rw_dir) - result = subprocess.run( - [venv.pip, "install", "."], - stdout=subprocess.PIPE, - cwd=venv.sources, - ) - self.assertEqual( - 0, - result.returncode, - msg=result.stderr or result.stdout or "Can't install project", - ) + result = run([venv.pip, "install", "."]) + self._check_result(result, "Can't install project") - result = subprocess.run( - [venv.python, "-c", "import git"], - stdout=subprocess.PIPE, - cwd=venv.sources, - ) - self.assertEqual( - 0, - result.returncode, - msg=result.stderr or result.stdout or "Self-test failed", - ) + result = run([venv.python, "-c", "import git"]) + self._check_result(result, "Self-test failed") - result = subprocess.run( - [venv.python, "-c", "import gitdb; import smmap"], - stdout=subprocess.PIPE, - cwd=venv.sources, - ) - self.assertEqual( - 0, - result.returncode, - msg=result.stderr or result.stdout or "Dependencies not installed", - ) + result = run([venv.python, "-c", "import gitdb; import smmap"]) + self._check_result(result, "Dependencies not installed") # Even IF gitdb or any other dependency is supplied during development by # inserting its location into PYTHONPATH or otherwise patched into sys.path, # make sure it is not wrongly inserted as the *first* entry. - result = subprocess.run( - [venv.python, "-c", "import sys; import git; print(sys.path)"], - stdout=subprocess.PIPE, - cwd=venv.sources, - ) - syspath = result.stdout.decode("utf-8").splitlines()[0] + result = run([venv.python, "-c", "import sys; import git; print(sys.path)"]) + syspath = result.stdout.splitlines()[0] syspath = ast.literal_eval(syspath) self.assertEqual( "", @@ -64,10 +37,37 @@ def test_installation(self, rw_dir): @staticmethod def _set_up_venv(rw_dir): + # Initialize the virtual environment. venv = VirtualEnvironment(rw_dir, with_pip=True) + + # Make its src directory a symlink to our own top-level source tree. os.symlink( os.path.dirname(os.path.dirname(__file__)), venv.sources, target_is_directory=True, ) - return venv + + # Create a convenience function to run commands in it. + run = functools.partial( + subprocess.run, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + cwd=venv.sources, + env={**os.environ, "PYTHONWARNINGS": "error"}, + ) + + return venv, run + + def _check_result(self, result, failure_summary): + self.assertEqual( + 0, + result.returncode, + msg=self._prepare_failure_message(result, failure_summary), + ) + + @staticmethod + def _prepare_failure_message(result, failure_summary): + stdout = result.stdout.rstrip() + stderr = result.stderr.rstrip() + return f"{failure_summary}\n\nstdout:\n{stdout}\n\nstderr:\n{stderr}" diff --git a/test/test_quick_doc.py b/test/test_quick_doc.py index 4ef75f4aa..98658e02f 100644 --- a/test/test_quick_doc.py +++ b/test/test_quick_doc.py @@ -173,7 +173,7 @@ def test_cloned_repo_object(self, local_dir): # [15-test_cloned_repo_object] def print_files_from_git(root, level=0): for entry in root: - print(f'{"-" * 4 * level}| {entry.path}, {entry.type}') + print(f"{'-' * 4 * level}| {entry.path}, {entry.type}") if entry.type == "tree": print_files_from_git(entry, level + 1) diff --git a/test/test_repo.py b/test/test_repo.py index e38da5bb6..bfa1bbb78 100644 --- a/test/test_repo.py +++ b/test/test_repo.py @@ -1064,9 +1064,9 @@ def test_rev_parse(self): # TODO: Dereference tag into a blob 0.1.7^{blob} - quite a special one. # Needs a tag which points to a blob. - # ref^0 returns commit being pointed to, same with ref~0, and ^{} + # ref^0 returns commit being pointed to, same with ref~0, ^{}, and ^{commit} tag = rev_parse("0.1.4") - for token in ("~0", "^0", "^{}"): + for token in ("~0", "^0", "^{}", "^{commit}"): self.assertEqual(tag.object, rev_parse("0.1.4%s" % token)) # END handle multiple tokens diff --git a/test/test_stats.py b/test/test_stats.py index eec73c802..91d2cf6ae 100644 --- a/test/test_stats.py +++ b/test/test_stats.py @@ -14,13 +14,19 @@ def test_list_from_string(self): output = fixture("diff_numstat").decode(defenc) stats = Stats._list_from_string(self.rorepo, output) - self.assertEqual(2, stats.total["files"]) - self.assertEqual(52, stats.total["lines"]) - self.assertEqual(29, stats.total["insertions"]) + self.assertEqual(3, stats.total["files"]) + self.assertEqual(59, stats.total["lines"]) + self.assertEqual(36, stats.total["insertions"]) self.assertEqual(23, stats.total["deletions"]) self.assertEqual(29, stats.files["a.txt"]["insertions"]) self.assertEqual(18, stats.files["a.txt"]["deletions"]) + self.assertEqual("M", stats.files["a.txt"]["change_type"]) self.assertEqual(0, stats.files["b.txt"]["insertions"]) self.assertEqual(5, stats.files["b.txt"]["deletions"]) + self.assertEqual("M", stats.files["b.txt"]["change_type"]) + + self.assertEqual(7, stats.files["c.txt"]["insertions"]) + self.assertEqual(0, stats.files["c.txt"]["deletions"]) + self.assertEqual("A", stats.files["c.txt"]["change_type"]) diff --git a/test/test_submodule.py b/test/test_submodule.py index d88f9dab0..f44f086c2 100644 --- a/test/test_submodule.py +++ b/test/test_submodule.py @@ -753,6 +753,22 @@ def test_add_empty_repo(self, rwdir): ) # END for each checkout mode + @with_rw_directory + @_patch_git_config("protocol.file.allow", "always") + def test_update_submodule_with_relative_path(self, rwdir): + repo_path = osp.join(rwdir, "parent") + repo = git.Repo.init(repo_path) + module_repo_path = osp.join(rwdir, "module") + module_repo = git.Repo.init(module_repo_path) + module_repo.git.commit(m="test", allow_empty=True) + repo.git.submodule("add", "../module", "module") + repo.index.commit("add submodule") + + cloned_repo_path = osp.join(rwdir, "cloned_repo") + cloned_repo = git.Repo.clone_from(repo_path, cloned_repo_path) + + cloned_repo.submodule_update(init=True, recursive=True) + @with_rw_directory @_patch_git_config("protocol.file.allow", "always") def test_list_only_valid_submodules(self, rwdir): diff --git a/test/test_tree.py b/test/test_tree.py index 73158113d..7ba93bd36 100644 --- a/test/test_tree.py +++ b/test/test_tree.py @@ -126,12 +126,18 @@ def test_traverse(self): assert len(list(root)) == len(list(root.traverse(depth=1))) # Only choose trees. - trees_only = lambda i, d: i.type == "tree" + + def trees_only(i, _d): + return i.type == "tree" + trees = list(root.traverse(predicate=trees_only)) assert len(trees) == len([i for i in root.traverse() if trees_only(i, 0)]) # Test prune. - lib_folder = lambda t, d: t.path == "lib" + + def lib_folder(t, _d): + return t.path == "lib" + pruned_trees = list(root.traverse(predicate=trees_only, prune=lib_folder)) assert len(pruned_trees) < len(trees) diff --git a/test/test_util.py b/test/test_util.py index dad2f3dcd..000830f41 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -34,6 +34,7 @@ LockFile, cygpath, decygpath, + is_cygwin_git, get_user_id, remove_password_if_present, rmtree, @@ -349,6 +350,24 @@ def test_decygpath(self, wpath, cpath): assert wcpath == wpath.replace("/", "\\"), cpath +class TestIsCygwinGit: + """Tests for :func:`is_cygwin_git`""" + + def test_on_path_executable(self): + # Currently we assume tests run on Cygwin use Cygwin git. See #533 and #1455 for background. + if sys.platform == "cygwin": + assert is_cygwin_git("git") + else: + assert not is_cygwin_git("git") + + def test_none_executable(self): + assert not is_cygwin_git(None) + + def test_with_missing_uname(self): + """Test for handling when `uname` isn't in the same directory as `git`""" + assert not is_cygwin_git("/bogus_path/git") + + class _Member: """A member of an IterableList."""