From 7528316877f2d0a9db3d36b556717eaa82da68b9 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sat, 17 Feb 2024 14:56:51 +0100 Subject: [PATCH 1/6] upath.implementations.local: remove dependency on packaging (#187) Close #186 --- upath/implementations/local.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/upath/implementations/local.py b/upath/implementations/local.py index dd7dcce2..038872ad 100644 --- a/upath/implementations/local.py +++ b/upath/implementations/local.py @@ -11,9 +11,6 @@ from typing import MutableMapping from urllib.parse import SplitResult -from fsspec import __version__ as fsspec_version -from packaging.version import Version - from upath._flavour import FSSpecFlavour as _FSSpecFlavour from upath.core import UPath @@ -24,7 +21,21 @@ "WindowsUPath", ] -_LISTDIR_WORKS_ON_FILES = Version(fsspec_version) >= Version("2024.2.0") +_LISTDIR_WORKS_ON_FILES: bool | None = None + + +def _check_listdir_works_on_files() -> bool: + global _LISTDIR_WORKS_ON_FILES + from fsspec.implementations.local import LocalFileSystem + + fs = LocalFileSystem() + try: + fs.ls(__file__) + except NotADirectoryError: + _LISTDIR_WORKS_ON_FILES = w = False + else: + _LISTDIR_WORKS_ON_FILES = w = True + return w class LocalPath(UPath): @@ -49,6 +60,8 @@ class FilePath(LocalPath): __slots__ = () def iterdir(self): + if _LISTDIR_WORKS_ON_FILES is None: + _check_listdir_works_on_files() if _LISTDIR_WORKS_ON_FILES and self.is_file(): raise NotADirectoryError(f"{self}") return super().iterdir() From 3bab4c04ded80f9d3eeff6b2c261eb1bef97fc70 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sat, 17 Feb 2024 16:24:18 +0100 Subject: [PATCH 2/6] Fix UPath.__hash__ (#188) * tests: add test cases for hash * upath: fix __hash__ and __eq__ --- upath/core.py | 14 +++++++++----- upath/tests/cases.py | 16 ++++++++++++++++ 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/upath/core.py b/upath/core.py index be5b3abe..c541fb2a 100644 --- a/upath/core.py +++ b/upath/core.py @@ -559,19 +559,23 @@ def is_reserved(self): return False def __eq__(self, other): + """UPaths are considered equal if their protocol, path and + storage_options are equal.""" if not isinstance(other, UPath): return NotImplemented return ( self.path == other.path + and self.protocol == other.protocol and self.storage_options == other.storage_options - and ( - get_filesystem_class(self.protocol) - == get_filesystem_class(other.protocol) - ) ) def __hash__(self): - return hash((self.path, self.storage_options, self.protocol)) + """The returned hash is based on the protocol and path only. + + Note: in the future, if hash collisions become an issue, we + can add `fsspec.utils.tokenize(storage_options)` + """ + return hash((self.protocol, self.path)) def relative_to(self, other, /, *_deprecated, walk_up=False): if isinstance(other, UPath) and self.storage_options != other.storage_options: diff --git a/upath/tests/cases.py b/upath/tests/cases.py index f08a52eb..5037cce0 100644 --- a/upath/tests/cases.py +++ b/upath/tests/cases.py @@ -498,3 +498,19 @@ def test_access_to_private_api(self): assert isinstance(p._root, str) p = UPath(str(self.path), **self.path.storage_options) assert isinstance(p._parts, (list, tuple)) + + def test_hashable(self): + assert hash(self.path) + + def test_storage_options_dont_affect_hash(self): + p0 = UPath(str(self.path), test_extra=1, **self.path.storage_options) + p1 = UPath(str(self.path), test_extra=2, **self.path.storage_options) + assert hash(p0) == hash(p1) + + def test_eq(self): + p0 = UPath(str(self.path), test_extra=1, **self.path.storage_options) + p1 = UPath(str(self.path), test_extra=1, **self.path.storage_options) + p2 = UPath(str(self.path), test_extra=2, **self.path.storage_options) + assert p0 == p1 + assert p0 != p2 + assert p1 != p2 From 1a117b3a9a1be96836991c954522f3f6be45bb60 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 18 Feb 2024 18:31:08 +0100 Subject: [PATCH 3/6] Implement UPath.joinuri (#189) * tests: add tests for query passthrough and joinuri * upath._flavour: add upath_urijoin * upath: add UPath.joinuri method * upath: UPath().name returns last non-empty part --- upath/_flavour.py | 62 ++++++++++++++++++++++++ upath/core.py | 24 +++++++++ upath/tests/implementations/test_http.py | 42 ++++++++++++++++ 3 files changed, 128 insertions(+) diff --git a/upath/_flavour.py b/upath/_flavour.py index 3b64e0fb..aba592ed 100644 --- a/upath/_flavour.py +++ b/upath/_flavour.py @@ -27,6 +27,7 @@ __all__ = [ "FSSpecFlavour", + "upath_urijoin", ] @@ -299,3 +300,64 @@ def splitroot(p): return splitroot else: raise NotImplementedError(f"unsupported module: {mod!r}") + + +def upath_urijoin(base: str, uri: str) -> str: + """Join a base URI and a possibly relative URI to form an absolute + interpretation of the latter.""" + # see: + # https://github.com/python/cpython/blob/ae6c01d9d2/Lib/urllib/parse.py#L539-L605 + # modifications: + # - removed allow_fragments parameter + # - all schemes are considered to allow relative paths + # - all schemes are considered to allow netloc (revisit this) + # - no bytes support (removes encoding and decoding) + if not base: + return uri + if not uri: + return base + + bs = urlsplit(base, scheme="") + us = urlsplit(uri, scheme=bs.scheme) + + if us.scheme != bs.scheme: # or us.scheme not in uses_relative: + return uri + # if us.scheme in uses_netloc: + if us.netloc: + return us.geturl() + else: + us = us._replace(netloc=bs.netloc) + # end if + if not us.path and not us.fragment: + us = us._replace(path=bs.path, fragment=bs.fragment) + if not us.query: + us = us._replace(query=bs.query) + return us.geturl() + + base_parts = bs.path.split("/") + if base_parts[-1] != "": + del base_parts[-1] + + if us.path[:1] == "/": + segments = us.path.split("/") + else: + segments = base_parts + us.path.split("/") + segments[1:-1] = filter(None, segments[1:-1]) + + resolved_path = [] + + for seg in segments: + if seg == "..": + try: + resolved_path.pop() + except IndexError: + pass + elif seg == ".": + continue + else: + resolved_path.append(seg) + + if segments[-1] in (".", ".."): + resolved_path.append("") + + return us._replace(path="/".join(resolved_path) or "/").geturl() diff --git a/upath/core.py b/upath/core.py index c541fb2a..31343c1f 100644 --- a/upath/core.py +++ b/upath/core.py @@ -20,6 +20,7 @@ from upath._compat import str_remove_prefix from upath._compat import str_remove_suffix from upath._flavour import FSSpecFlavour +from upath._flavour import upath_urijoin from upath._protocol import get_upath_protocol from upath._stat import UPathStatResult from upath.registry import get_upath_class @@ -253,6 +254,18 @@ def fs(self) -> AbstractFileSystem: def path(self) -> str: return super().__str__() + def joinuri(self, uri: str | os.PathLike[str]) -> UPath: + """Join with urljoin behavior for UPath instances""" + # short circuit if the new uri uses a different protocol + other_protocol = get_upath_protocol(uri) + if other_protocol and other_protocol != self._protocol: + return UPath(uri) + return UPath( + upath_urijoin(str(self), str(uri)), + protocol=other_protocol or self._protocol, + **self.storage_options, + ) + # === upath.UPath CUSTOMIZABLE API ================================ @classmethod @@ -590,6 +603,17 @@ def is_relative_to(self, other, /, *_deprecated): return False return super().is_relative_to(other, *_deprecated) + @property + def name(self): + tail = self._tail + if not tail: + return "" + name = tail[-1] + if not name and len(tail) >= 2: + return tail[-2] + else: + return name + # === pathlib.Path ================================================ def stat(self, *, follow_symlinks=True) -> UPathStatResult: diff --git a/upath/tests/implementations/test_http.py b/upath/tests/implementations/test_http.py index 75417800..6effd5c4 100644 --- a/upath/tests/implementations/test_http.py +++ b/upath/tests/implementations/test_http.py @@ -143,3 +143,45 @@ def test_empty_parts(args, parts): pth = UPath(args) pth_parts = pth.parts assert pth_parts == parts + + +def test_query_parameters_passthrough(): + pth = UPath("http://example.com/?a=1&b=2") + assert pth.parts == ("http://example.com/", "?a=1&b=2") + + +@pytest.mark.parametrize( + "base,rel,expected", + [ + ( + "http://www.example.com/a/b/index.html", + "image.png?version=1", + "http://www.example.com/a/b/image.png?version=1", + ), + ( + "http://www.example.com/a/b/index.html", + "../image.png", + "http://www.example.com/a/image.png", + ), + ( + "http://www.example.com/a/b/index.html", + "/image.png", + "http://www.example.com/image.png", + ), + ( + "http://www.example.com/a/b/index.html", + "ftp://other.com/image.png", + "ftp://other.com/image.png", + ), + ( + "http://www.example.com/a/b/index.html", + "//other.com/image.png", + "http://other.com/image.png", + ), + ], +) +def test_joinuri_behavior(base, rel, expected): + p0 = UPath(base) + pr = p0.joinuri(rel) + pe = UPath(expected) + assert pr == pe From 72707084d02759550e93026f0578dd68cd767974 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 18 Feb 2024 18:43:00 +0100 Subject: [PATCH 4/6] tests: add query str passthrough test (#190) --- upath/tests/test_core.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/upath/tests/test_core.py b/upath/tests/test_core.py index 9baf6e6d..d9a42d65 100644 --- a/upath/tests/test_core.py +++ b/upath/tests/test_core.py @@ -375,3 +375,16 @@ def test_normalize(unnormalized, normalized): pass assert expected == result assert str(expected) == str(result) + + +@pytest.mark.parametrize( + "uri,query_str", + [ + ("s3://bucket/folder?versionId=1", "?versionId=1"), + ("http://example.com/abc?p=2", "?p=2"), + ], +) +def test_query_string(uri, query_str): + p = UPath(uri) + assert str(p).endswith(query_str) + assert p.path.endswith(query_str) From 4e2afca679b021b52acf0c67fa990b4585cc5fa7 Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 18 Feb 2024 18:59:27 +0100 Subject: [PATCH 5/6] Add more s3 tests (#191) * tests: add s3 test with hash and space characters Close #164. * tests: ensure joinpath behavior on s3 consistent with pathlib Close #167. --- upath/tests/implementations/test_s3.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/upath/tests/implementations/test_s3.py b/upath/tests/implementations/test_s3.py index 9b57f013..391bd4fb 100644 --- a/upath/tests/implementations/test_s3.py +++ b/upath/tests/implementations/test_s3.py @@ -113,3 +113,14 @@ def s3_with_plus_chr_name(s3_server): for dir, _, keys in s3.walk(bucket): for key in keys: s3.rm(f"{dir}/{key}") + + +def test_path_with_hash_and_space(): + assert "with#hash and space" in UPath("s3://bucket/with#hash and space/abc").parts + + +def test_pathlib_consistent_join(): + b0 = UPath("s3://mybucket/withkey/").joinpath("subfolder/myfile.txt") + b1 = UPath("s3://mybucket/withkey").joinpath("subfolder/myfile.txt") + assert b0 == b1 + assert "s3://mybucket/withkey/subfolder/myfile.txt" == str(b0) == str(b1) From 75e0a4da7fbfccf9d3e43d23d5cdbd141191a5df Mon Sep 17 00:00:00 2001 From: Andreas Poehlmann Date: Sun, 18 Feb 2024 19:12:32 +0100 Subject: [PATCH 6/6] Prepare release v0.2.1 (#192) * upath: add docstrings to UPath specific methods * upath: update classifiers in setup.py * upath: update changelog --- CHANGELOG.md | 12 +++++++++++- setup.cfg | 1 + upath/core.py | 4 ++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 798a8f72..3d095b20 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ... +## [0.2.1] - 2024-02-18 +### Added +- upath: added `UPath.joinuri()` (#189) + +### Fixed +- fixed `UPath` instances not hashable (#188) +- fixed missing `packaging` dependency (#187) +- fixed pypi package classifiers + ## [0.2.0] - 2024-02-13 ### Added - upath: support Python 3.12 (#152) @@ -109,7 +118,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - started a changelog to keep track of significant changes -[Unreleased]: https://github.com/fsspec/universal_pathlib/compare/v0.2.0...HEAD +[Unreleased]: https://github.com/fsspec/universal_pathlib/compare/v0.2.1...HEAD +[0.2.0]: https://github.com/fsspec/universal_pathlib/compare/v0.2.0...v0.2.1 [0.2.0]: https://github.com/fsspec/universal_pathlib/compare/v0.1.4...v0.2.0 [0.1.4]: https://github.com/fsspec/universal_pathlib/compare/v0.1.3...v0.1.4 [0.1.3]: https://github.com/fsspec/universal_pathlib/compare/v0.1.2...v0.1.3 diff --git a/setup.cfg b/setup.cfg index 8a6dd66a..bf310953 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,6 +16,7 @@ classifiers = Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 + Programming Language :: Python :: 3.12 Development Status :: 4 - Beta [options] diff --git a/upath/core.py b/upath/core.py index 31343c1f..bea2eba6 100644 --- a/upath/core.py +++ b/upath/core.py @@ -234,14 +234,17 @@ def __init__( @property def protocol(self) -> str: + """The fsspec protocol for the path.""" return self._protocol @property def storage_options(self) -> Mapping[str, Any]: + """The fsspec storage options for the path.""" return MappingProxyType(self._storage_options) @property def fs(self) -> AbstractFileSystem: + """The cached fsspec filesystem instance for the path.""" try: return self._fs_cached except AttributeError: @@ -252,6 +255,7 @@ def fs(self) -> AbstractFileSystem: @property def path(self) -> str: + """The path that a fsspec filesystem can use.""" return super().__str__() def joinuri(self, uri: str | os.PathLike[str]) -> UPath: