diff --git a/CHANGELOG.md b/CHANGELOG.md index 798a8f72..3d095b20 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ... +## [0.2.1] - 2024-02-18 +### Added +- upath: added `UPath.joinuri()` (#189) + +### Fixed +- fixed `UPath` instances not hashable (#188) +- fixed missing `packaging` dependency (#187) +- fixed pypi package classifiers + ## [0.2.0] - 2024-02-13 ### Added - upath: support Python 3.12 (#152) @@ -109,7 +118,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - started a changelog to keep track of significant changes -[Unreleased]: https://github.com/fsspec/universal_pathlib/compare/v0.2.0...HEAD +[Unreleased]: https://github.com/fsspec/universal_pathlib/compare/v0.2.1...HEAD +[0.2.0]: https://github.com/fsspec/universal_pathlib/compare/v0.2.0...v0.2.1 [0.2.0]: https://github.com/fsspec/universal_pathlib/compare/v0.1.4...v0.2.0 [0.1.4]: https://github.com/fsspec/universal_pathlib/compare/v0.1.3...v0.1.4 [0.1.3]: https://github.com/fsspec/universal_pathlib/compare/v0.1.2...v0.1.3 diff --git a/setup.cfg b/setup.cfg index 8a6dd66a..bf310953 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,6 +16,7 @@ classifiers = Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 + Programming Language :: Python :: 3.12 Development Status :: 4 - Beta [options] diff --git a/upath/_flavour.py b/upath/_flavour.py index 3b64e0fb..aba592ed 100644 --- a/upath/_flavour.py +++ b/upath/_flavour.py @@ -27,6 +27,7 @@ __all__ = [ "FSSpecFlavour", + "upath_urijoin", ] @@ -299,3 +300,64 @@ def splitroot(p): return splitroot else: raise NotImplementedError(f"unsupported module: {mod!r}") + + +def upath_urijoin(base: str, uri: str) -> str: + """Join a base URI and a possibly relative URI to form an absolute + interpretation of the latter.""" + # see: + # https://github.com/python/cpython/blob/ae6c01d9d2/Lib/urllib/parse.py#L539-L605 + # modifications: + # - removed allow_fragments parameter + # - all schemes are considered to allow relative paths + # - all schemes are considered to allow netloc (revisit this) + # - no bytes support (removes encoding and decoding) + if not base: + return uri + if not uri: + return base + + bs = urlsplit(base, scheme="") + us = urlsplit(uri, scheme=bs.scheme) + + if us.scheme != bs.scheme: # or us.scheme not in uses_relative: + return uri + # if us.scheme in uses_netloc: + if us.netloc: + return us.geturl() + else: + us = us._replace(netloc=bs.netloc) + # end if + if not us.path and not us.fragment: + us = us._replace(path=bs.path, fragment=bs.fragment) + if not us.query: + us = us._replace(query=bs.query) + return us.geturl() + + base_parts = bs.path.split("/") + if base_parts[-1] != "": + del base_parts[-1] + + if us.path[:1] == "/": + segments = us.path.split("/") + else: + segments = base_parts + us.path.split("/") + segments[1:-1] = filter(None, segments[1:-1]) + + resolved_path = [] + + for seg in segments: + if seg == "..": + try: + resolved_path.pop() + except IndexError: + pass + elif seg == ".": + continue + else: + resolved_path.append(seg) + + if segments[-1] in (".", ".."): + resolved_path.append("") + + return us._replace(path="/".join(resolved_path) or "/").geturl() diff --git a/upath/core.py b/upath/core.py index be5b3abe..bea2eba6 100644 --- a/upath/core.py +++ b/upath/core.py @@ -20,6 +20,7 @@ from upath._compat import str_remove_prefix from upath._compat import str_remove_suffix from upath._flavour import FSSpecFlavour +from upath._flavour import upath_urijoin from upath._protocol import get_upath_protocol from upath._stat import UPathStatResult from upath.registry import get_upath_class @@ -233,14 +234,17 @@ def __init__( @property def protocol(self) -> str: + """The fsspec protocol for the path.""" return self._protocol @property def storage_options(self) -> Mapping[str, Any]: + """The fsspec storage options for the path.""" return MappingProxyType(self._storage_options) @property def fs(self) -> AbstractFileSystem: + """The cached fsspec filesystem instance for the path.""" try: return self._fs_cached except AttributeError: @@ -251,8 +255,21 @@ def fs(self) -> AbstractFileSystem: @property def path(self) -> str: + """The path that a fsspec filesystem can use.""" return super().__str__() + def joinuri(self, uri: str | os.PathLike[str]) -> UPath: + """Join with urljoin behavior for UPath instances""" + # short circuit if the new uri uses a different protocol + other_protocol = get_upath_protocol(uri) + if other_protocol and other_protocol != self._protocol: + return UPath(uri) + return UPath( + upath_urijoin(str(self), str(uri)), + protocol=other_protocol or self._protocol, + **self.storage_options, + ) + # === upath.UPath CUSTOMIZABLE API ================================ @classmethod @@ -559,19 +576,23 @@ def is_reserved(self): return False def __eq__(self, other): + """UPaths are considered equal if their protocol, path and + storage_options are equal.""" if not isinstance(other, UPath): return NotImplemented return ( self.path == other.path + and self.protocol == other.protocol and self.storage_options == other.storage_options - and ( - get_filesystem_class(self.protocol) - == get_filesystem_class(other.protocol) - ) ) def __hash__(self): - return hash((self.path, self.storage_options, self.protocol)) + """The returned hash is based on the protocol and path only. + + Note: in the future, if hash collisions become an issue, we + can add `fsspec.utils.tokenize(storage_options)` + """ + return hash((self.protocol, self.path)) def relative_to(self, other, /, *_deprecated, walk_up=False): if isinstance(other, UPath) and self.storage_options != other.storage_options: @@ -586,6 +607,17 @@ def is_relative_to(self, other, /, *_deprecated): return False return super().is_relative_to(other, *_deprecated) + @property + def name(self): + tail = self._tail + if not tail: + return "" + name = tail[-1] + if not name and len(tail) >= 2: + return tail[-2] + else: + return name + # === pathlib.Path ================================================ def stat(self, *, follow_symlinks=True) -> UPathStatResult: diff --git a/upath/implementations/local.py b/upath/implementations/local.py index dd7dcce2..038872ad 100644 --- a/upath/implementations/local.py +++ b/upath/implementations/local.py @@ -11,9 +11,6 @@ from typing import MutableMapping from urllib.parse import SplitResult -from fsspec import __version__ as fsspec_version -from packaging.version import Version - from upath._flavour import FSSpecFlavour as _FSSpecFlavour from upath.core import UPath @@ -24,7 +21,21 @@ "WindowsUPath", ] -_LISTDIR_WORKS_ON_FILES = Version(fsspec_version) >= Version("2024.2.0") +_LISTDIR_WORKS_ON_FILES: bool | None = None + + +def _check_listdir_works_on_files() -> bool: + global _LISTDIR_WORKS_ON_FILES + from fsspec.implementations.local import LocalFileSystem + + fs = LocalFileSystem() + try: + fs.ls(__file__) + except NotADirectoryError: + _LISTDIR_WORKS_ON_FILES = w = False + else: + _LISTDIR_WORKS_ON_FILES = w = True + return w class LocalPath(UPath): @@ -49,6 +60,8 @@ class FilePath(LocalPath): __slots__ = () def iterdir(self): + if _LISTDIR_WORKS_ON_FILES is None: + _check_listdir_works_on_files() if _LISTDIR_WORKS_ON_FILES and self.is_file(): raise NotADirectoryError(f"{self}") return super().iterdir() diff --git a/upath/tests/cases.py b/upath/tests/cases.py index f08a52eb..5037cce0 100644 --- a/upath/tests/cases.py +++ b/upath/tests/cases.py @@ -498,3 +498,19 @@ def test_access_to_private_api(self): assert isinstance(p._root, str) p = UPath(str(self.path), **self.path.storage_options) assert isinstance(p._parts, (list, tuple)) + + def test_hashable(self): + assert hash(self.path) + + def test_storage_options_dont_affect_hash(self): + p0 = UPath(str(self.path), test_extra=1, **self.path.storage_options) + p1 = UPath(str(self.path), test_extra=2, **self.path.storage_options) + assert hash(p0) == hash(p1) + + def test_eq(self): + p0 = UPath(str(self.path), test_extra=1, **self.path.storage_options) + p1 = UPath(str(self.path), test_extra=1, **self.path.storage_options) + p2 = UPath(str(self.path), test_extra=2, **self.path.storage_options) + assert p0 == p1 + assert p0 != p2 + assert p1 != p2 diff --git a/upath/tests/implementations/test_http.py b/upath/tests/implementations/test_http.py index 75417800..6effd5c4 100644 --- a/upath/tests/implementations/test_http.py +++ b/upath/tests/implementations/test_http.py @@ -143,3 +143,45 @@ def test_empty_parts(args, parts): pth = UPath(args) pth_parts = pth.parts assert pth_parts == parts + + +def test_query_parameters_passthrough(): + pth = UPath("http://example.com/?a=1&b=2") + assert pth.parts == ("http://example.com/", "?a=1&b=2") + + +@pytest.mark.parametrize( + "base,rel,expected", + [ + ( + "http://www.example.com/a/b/index.html", + "image.png?version=1", + "http://www.example.com/a/b/image.png?version=1", + ), + ( + "http://www.example.com/a/b/index.html", + "../image.png", + "http://www.example.com/a/image.png", + ), + ( + "http://www.example.com/a/b/index.html", + "/image.png", + "http://www.example.com/image.png", + ), + ( + "http://www.example.com/a/b/index.html", + "ftp://other.com/image.png", + "ftp://other.com/image.png", + ), + ( + "http://www.example.com/a/b/index.html", + "//other.com/image.png", + "http://other.com/image.png", + ), + ], +) +def test_joinuri_behavior(base, rel, expected): + p0 = UPath(base) + pr = p0.joinuri(rel) + pe = UPath(expected) + assert pr == pe diff --git a/upath/tests/implementations/test_s3.py b/upath/tests/implementations/test_s3.py index 9b57f013..391bd4fb 100644 --- a/upath/tests/implementations/test_s3.py +++ b/upath/tests/implementations/test_s3.py @@ -113,3 +113,14 @@ def s3_with_plus_chr_name(s3_server): for dir, _, keys in s3.walk(bucket): for key in keys: s3.rm(f"{dir}/{key}") + + +def test_path_with_hash_and_space(): + assert "with#hash and space" in UPath("s3://bucket/with#hash and space/abc").parts + + +def test_pathlib_consistent_join(): + b0 = UPath("s3://mybucket/withkey/").joinpath("subfolder/myfile.txt") + b1 = UPath("s3://mybucket/withkey").joinpath("subfolder/myfile.txt") + assert b0 == b1 + assert "s3://mybucket/withkey/subfolder/myfile.txt" == str(b0) == str(b1) diff --git a/upath/tests/test_core.py b/upath/tests/test_core.py index 9baf6e6d..d9a42d65 100644 --- a/upath/tests/test_core.py +++ b/upath/tests/test_core.py @@ -375,3 +375,16 @@ def test_normalize(unnormalized, normalized): pass assert expected == result assert str(expected) == str(result) + + +@pytest.mark.parametrize( + "uri,query_str", + [ + ("s3://bucket/folder?versionId=1", "?versionId=1"), + ("http://example.com/abc?p=2", "?p=2"), + ], +) +def test_query_string(uri, query_str): + p = UPath(uri) + assert str(p).endswith(query_str) + assert p.path.endswith(query_str)