From 6e4cee4fa708465cf714e36a9dc8b9b6b94acc0c Mon Sep 17 00:00:00 2001 From: Stefan Gmeiner Date: Thu, 21 Dec 2023 21:33:37 +0100 Subject: [PATCH 001/642] Fix Items of type PathLike --- git/index/base.py | 2 +- test/test_index.py | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/git/index/base.py b/git/index/base.py index 112ad3feb..cffb213a9 100644 --- a/git/index/base.py +++ b/git/index/base.py @@ -940,7 +940,7 @@ def _items_to_rela_paths( for item in items: if isinstance(item, (BaseIndexEntry, (Blob, Submodule))): paths.append(self._to_relative_path(item.path)) - elif isinstance(item, str): + elif isinstance(item, (str, os.PathLike)): paths.append(self._to_relative_path(item)) else: raise TypeError("Invalid item type: %r" % item) diff --git a/test/test_index.py b/test/test_index.py index 6b746b8b4..50d941e83 100644 --- a/test/test_index.py +++ b/test/test_index.py @@ -558,14 +558,16 @@ def test_index_mutation(self, rw_repo): def mixed_iterator(): count = 0 for entry in index.entries.values(): - type_id = count % 4 - if type_id == 0: # path + type_id = count % 5 + if type_id == 0: # path (str) yield entry.path - elif type_id == 1: # blob + elif type_id == 1: # path (PathLike) + yield Path(entry.path) + elif type_id == 2: # blob yield Blob(rw_repo, entry.binsha, entry.mode, entry.path) - elif type_id == 2: # BaseIndexEntry + elif type_id == 3: # BaseIndexEntry yield BaseIndexEntry(entry[:4]) - elif type_id == 3: # IndexEntry + elif type_id == 4: # IndexEntry yield entry else: raise AssertionError("Invalid Type") From 53e73830f64e13a88b0e246fb825944a6fe2848e Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 22 Dec 2023 00:57:14 -0500 Subject: [PATCH 002/642] Remove explicit PushInfo/FetchInfo inheritance from object I had missed these in f78587f (#1725). --- git/remote.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/git/remote.py b/git/remote.py index 4055dba2e..98a421b3a 100644 --- a/git/remote.py +++ b/git/remote.py @@ -130,7 +130,7 @@ def to_progress_instance( return progress -class PushInfo(IterableObj, object): +class PushInfo(IterableObj): """ Carries information about the result of a push operation of a single head:: @@ -300,7 +300,7 @@ def raise_if_error(self) -> None: raise self.error -class FetchInfo(IterableObj, object): +class FetchInfo(IterableObj): """ Carries information about the results of a fetch operation of a single head:: From 96fc3547cf62c5ade1477c24566c8b34254a1507 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 22 Dec 2023 01:55:30 -0500 Subject: [PATCH 003/642] Add tests for current Submodule.iter_items behavior Where the behavior is intended. In the case of an invalid hash (or IOError, which in Python 2 was a subclass of OSError but now is just another name for it), the behavior of just yielding no items may be unintuitive, since on most other errors an exception is raised. However, examining the code reveals this behavior is clearly intentional. Furthrmore, it may be reasonable for applications to rely on it, and it may be convenient in some situations. For backward compatibility, it probably can't be changed significantly. This adds tests that show both an error that does raise an error-representing exception -- a well-formed hash not present in the repository raising ValueError with a suitable message -- and an error that silently causes the iterator to yield zero items. --- test/test_submodule.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/test_submodule.py b/test/test_submodule.py index 4dc89f98f..5226d7a6e 100644 --- a/test/test_submodule.py +++ b/test/test_submodule.py @@ -688,6 +688,17 @@ def test_root_module(self, rwrepo): # gitdb: has either 1 or 2 submodules depending on the version. assert len(nsm.children()) >= 1 and nsmc.module_exists() + def test_iter_items_from_nonexistent_hash(self): + it = Submodule.iter_items(self.rorepo, "b4ecbfaa90c8be6ed6d9fb4e57cc824663ae15b4") + with self.assertRaisesRegex(ValueError, r"\bcould not be resolved\b"): + next(it) + + def test_iter_items_from_invalid_hash(self): + """Check legacy behavaior on BadName (also applies to IOError, i.e. OSError).""" + it = Submodule.iter_items(self.rorepo, "xyz") + with self.assertRaises(StopIteration): + next(it) + @with_rw_repo(k_no_subm_tag, bare=False) def test_first_submodule(self, rwrepo): assert len(list(rwrepo.iter_submodules())) == 0 From f5dc1c4713dfb937d45d10a595ac879d6e76481c Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 22 Dec 2023 02:16:28 -0500 Subject: [PATCH 004/642] Expand "invalid hash" test to assert normal StopIteration Returning an explicit value from a generator function causes that value to be bound to the `value` attribute of the StopIteration exception. This is available as the result of "yield from" when it is used as an expression; or by explicitly catching StopIteration, binding the StopIteration exception to a variable, and accessing the attribute. This feature of generators is rarely used. The `return iter([])` statement in Submodule.iter_items uses this feature, causing the resulting StopIteration exception object to have a `value` attribute that refers to a separate second iterator that also yields no values (#1779). From context, this behavior is clearly not the goal; a bare return statement should be used here (which has the same effect except for the `value` attribute of the StopIteration exception). The code had used a bare return prior to 82b131c (#1282), when `return` was changed to `return iter([])`. That was part of a change that added numerous type annotations. It looks like it was either a mistake, or possibly an attempt to work around an old bug in a static type checker. This commit extends the test_iter_items_from_invalid_hash test to assert that the `value` attribute of the StopIteration is its usual default value of None. This commit only extends the test; it does not fix the bug. --- test/test_submodule.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_submodule.py b/test/test_submodule.py index 5226d7a6e..993f6b57e 100644 --- a/test/test_submodule.py +++ b/test/test_submodule.py @@ -696,8 +696,9 @@ def test_iter_items_from_nonexistent_hash(self): def test_iter_items_from_invalid_hash(self): """Check legacy behavaior on BadName (also applies to IOError, i.e. OSError).""" it = Submodule.iter_items(self.rorepo, "xyz") - with self.assertRaises(StopIteration): + with self.assertRaises(StopIteration) as ctx: next(it) + self.assertIsNone(ctx.exception.value) @with_rw_repo(k_no_subm_tag, bare=False) def test_first_submodule(self, rwrepo): From c3c008c4971f9d4189dc08e88334a207ce14298c Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 22 Dec 2023 02:44:56 -0500 Subject: [PATCH 005/642] In Submodule.iter_items, don't attach second empty iterator This fixes the minor bug where a separate empty iterator was bound to the StopIteration exception raised as a result of returning from the generator function (#1779). This change does not cause what exceptions are raised from GitPython code in any situations, nor how many items any iterators yield. --- git/objects/submodule/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index 651d9535c..49dfedf9a 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -1401,7 +1401,7 @@ def iter_items( pc = repo.commit(parent_commit) # Parent commit instance parser = cls._config_parser(repo, pc, read_only=True) except (IOError, BadName): - return iter([]) + return # END handle empty iterator for sms in parser.sections(): From dfee31f2100d7f4a653c69c4a5a505607fe328e1 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 22 Dec 2023 03:48:57 -0500 Subject: [PATCH 006/642] Improve self-documentation of IterableObj and related classes - Fill in the missing part of the explanation of why to favor iter_items over list_items in IterableObj and Iterable (#1775). - Move the explanation of how subclasses must treat arguments from the list_items methods to the iter_items methods, because the iter_items methdos are the ones that are abstract and must be overridden by a well-behaved subclass, and also because, since the iter_items methods are preferred for use, they should be the place where less duplicated shared documentation resides. - Subtantially reword the existing documentation for clarity, especially regarding the signifance of extra args and kwargs. - Put the iter_items method before (i.e. above) the list_items method (in each of the IterableObj and Iterable classes), because that method is the one that should be used more often, and because it is also now the one with the more detailed docstring. - Remove and old comment on a return type that said exactly the exact same thing as the annotation. - In Iterable, note deprecation more consistently (and thus in more places). - Rewrite the IterableClassWatcher class docstring to explain exactly what that metaclass achieves. --- git/util.py | 84 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 51 insertions(+), 33 deletions(-) diff --git a/git/util.py b/git/util.py index 0a5da7d71..5acc001f7 100644 --- a/git/util.py +++ b/git/util.py @@ -1183,7 +1183,8 @@ def __delitem__(self, index: Union[SupportsIndex, int, slice, str]) -> None: class IterableClassWatcher(type): - """Metaclass that watches.""" + """Metaclass that issues :class:`DeprecationWarning` when :class:`git.util.Iterable` + is subclassed.""" def __init__(cls, name: str, bases: Tuple, clsdict: Dict) -> None: for base in bases: @@ -1199,23 +1200,42 @@ def __init__(cls, name: str, bases: Tuple, clsdict: Dict) -> None: class Iterable(metaclass=IterableClassWatcher): - """Defines an interface for iterable items, so there is a uniform way to retrieve - and iterate items within the git repository.""" + """Deprecated, use :class:`IterableObj` instead. + + Defines an interface for iterable items, so there is a uniform way to retrieve + and iterate items within the git repository. + """ __slots__ = () _id_attribute_ = "attribute that most suitably identifies your instance" @classmethod - def list_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Any: + def iter_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Any: + # return typed to be compatible with subtypes e.g. Remote + """Deprecated, use :class:`IterableObj` instead. + + Find (all) items of this type. + + Subclasses can specify ``args`` and ``kwargs`` differently, and may use them for + filtering. However, when the method is called with no additional positional or + keyword arguments, subclasses are obliged to to yield all items. + + :return: Iterator yielding Items """ - Deprecated, use IterableObj instead. + raise NotImplementedError("To be implemented by Subclass") + + @classmethod + def list_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Any: + """Deprecated, use :class:`IterableObj` instead. + + Find (all) items of this type and collect them into a list. - Find all items of this type - subclasses can specify args and kwargs differently. - If no args are given, subclasses are obliged to return all items if no additional - arguments arg given. + For more information about the arguments, see :meth:`list_items`. - :note: Favor the iter_items method as it will + :note: Favor the :meth:`iter_items` method as it will avoid eagerly collecting + all items. When there are many items, that can slow performance and increase + memory usage. :return: list(Item,...) list of item instances """ @@ -1223,15 +1243,6 @@ def list_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Any: out_list.extend(cls.iter_items(repo, *args, **kwargs)) return out_list - @classmethod - def iter_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Any: - # return typed to be compatible with subtypes e.g. Remote - """For more information about the arguments, see list_items. - - :return: Iterator yielding Items - """ - raise NotImplementedError("To be implemented by Subclass") - @runtime_checkable class IterableObj(Protocol): @@ -1246,13 +1257,30 @@ class IterableObj(Protocol): _id_attribute_: str @classmethod - def list_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> IterableList[T_IterableObj]: + @abstractmethod + def iter_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Iterator[T_IterableObj]: + # Return-typed to be compatible with subtypes e.g. Remote. + """Find (all) items of this type. + + Subclasses can specify ``args`` and ``kwargs`` differently, and may use them for + filtering. However, when the method is called with no additional positional or + keyword arguments, subclasses are obliged to to yield all items. + + For more information about the arguments, see list_items. + + :return: Iterator yielding Items """ - Find all items of this type - subclasses can specify args and kwargs differently. - If no args are given, subclasses are obliged to return all items if no additional - arguments arg given. + raise NotImplementedError("To be implemented by Subclass") + + @classmethod + def list_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> IterableList[T_IterableObj]: + """Find (all) items of this type and collect them into a list. + + For more information about the arguments, see :meth:`list_items`. - :note: Favor the iter_items method as it will + :note: Favor the :meth:`iter_items` method as it will avoid eagerly collecting + all items. When there are many items, that can slow performance and increase + memory usage. :return: list(Item,...) list of item instances """ @@ -1260,16 +1288,6 @@ def list_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> IterableList[T_I out_list.extend(cls.iter_items(repo, *args, **kwargs)) return out_list - @classmethod - @abstractmethod - def iter_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Iterator[T_IterableObj]: # Iterator[T_IterableObj]: - # Return-typed to be compatible with subtypes e.g. Remote. - """For more information about the arguments, see list_items. - - :return: Iterator yielding Items - """ - raise NotImplementedError("To be implemented by Subclass") - # } END classes From 94a85d1159e6374e901df4e3bd284cf55b623e66 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 22 Dec 2023 14:31:28 -0500 Subject: [PATCH 007/642] Convert constant and attribute comments to docstrings These are "non-reified docstrings" as described in #1734. They are not made part of the data model, but most editors will display them, including on symbols present in code of other projects that use the GitPython library. A number of these have already been added, but this adds what will probably be most of the remaining ones. For the most part, this doesn't create documentation where there wasn't any, but instead converts existing comments to "docstrings." In a handful of cases, they are expanded, reworded, or a docstring added. This also fixes some small style inconsistencies that were missed in #1725, and moves a comment that had become inadvertently displaced due to autoformatting to the item it was meant for. The major omission here is HIDE_WINDOWS_KNOWN_ERRORS and HIDE_WINDOWS_FREEZE_ERRORS. This doesn't convert the comments above them to "docstrings," for a few reasons. They are not specific to either of the symbols, they are oriented toward considerations that are not really relevant except when developing GitPython itself, and they are out of date. Also, because HIDE_WINDOWS_KNOWN_ERRORS is listed in __all__, increasing the level of documentation for it might be taken as a committment to preserve some aspect of its current behavior, which could interfere with progress on #790. So I've kept those comments as comments, and unchanged, for now. --- git/cmd.py | 25 ++++++++++++++++--------- git/config.py | 20 ++++++++++++-------- git/diff.py | 2 +- git/index/base.py | 6 ++++-- git/index/fun.py | 4 +++- git/objects/submodule/base.py | 11 ++++++----- git/repo/base.py | 31 ++++++++++++++++++++----------- git/util.py | 4 ++-- 8 files changed, 64 insertions(+), 39 deletions(-) diff --git a/git/cmd.py b/git/cmd.py index 27148d3d6..9518c2c8c 100644 --- a/git/cmd.py +++ b/git/cmd.py @@ -273,23 +273,30 @@ def __setstate__(self, d: Dict[str, Any]) -> None: # CONFIGURATION - git_exec_name = "git" # Default that should work on Linux and Windows. + git_exec_name = "git" + """Default git command that should work on Linux, Windows, and other systems.""" - # Enables debugging of GitPython's git commands. GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) + """Enables debugging of GitPython's git commands.""" - # If True, a shell will be used when executing git commands. - # This should only be desirable on Windows, see https://github.com/gitpython-developers/GitPython/pull/126 - # and check `git/test_repo.py:TestRepo.test_untracked_files()` TC for an example where it is required. - # Override this value using `Git.USE_SHELL = True`. USE_SHELL = False + """If True, a shell will be used when executing git commands. + + This should only be desirable on Windows, see https://github.com/gitpython-developers/GitPython/pull/126 + and check `git/test_repo.py:TestRepo.test_untracked_files()` TC for an example where it is required. + + Override this value using ``Git.USE_SHELL = True``. + """ - # Provide the full path to the git executable. Otherwise it assumes git is in the path. _git_exec_env_var = "GIT_PYTHON_GIT_EXECUTABLE" _refresh_env_var = "GIT_PYTHON_REFRESH" + GIT_PYTHON_GIT_EXECUTABLE = None - # Note that the git executable is actually found during the refresh step in - # the top level __init__. + """Provide the full path to the git executable. Otherwise it assumes git is in the path. + + Note that the git executable is actually found during the refresh step in + the top level ``__init__``. + """ @classmethod def refresh(cls, path: Union[None, PathLike] = None) -> bool: diff --git a/git/config.py b/git/config.py index 5708a7385..2730ddaf3 100644 --- a/git/config.py +++ b/git/config.py @@ -65,12 +65,14 @@ log.addHandler(logging.NullHandler()) -# The configuration level of a configuration file. CONFIG_LEVELS: ConfigLevels_Tup = ("system", "user", "global", "repository") +"""The configuration level of a configuration file.""" -# Section pattern to detect conditional includes. -# https://git-scm.com/docs/git-config#_conditional_includes CONDITIONAL_INCLUDE_REGEXP = re.compile(r"(?<=includeIf )\"(gitdir|gitdir/i|onbranch):(.+)\"") +"""Section pattern to detect conditional includes. + +See: https://git-scm.com/docs/git-config#_conditional_includes +""" class MetaParserBuilder(abc.ABCMeta): # noqa: B024 @@ -283,12 +285,14 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): """ # { Configuration - # The lock type determines the type of lock to use in new configuration readers. - # They must be compatible to the LockFile interface. - # A suitable alternative would be the BlockingLockFile t_lock = LockFile - re_comment = re.compile(r"^\s*[#;]") + """The lock type determines the type of lock to use in new configuration readers. + They must be compatible to the LockFile interface. + A suitable alternative would be the :class:`~git.util.BlockingLockFile`. + """ + + re_comment = re.compile(r"^\s*[#;]") # } END configuration optvalueonly_source = r"\s*(?P