-
Notifications
You must be signed in to change notification settings - Fork 146
Added check for anon on File read methods #1783
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
60dc069
35bd1cf
ff89538
de598c5
d9ff11a
2179a70
de253dd
7e3364f
5eda853
f6f1b5f
572e755
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -63,13 +63,45 @@ class BucketStatus(NamedTuple): | |
| error: str | None = None | ||
|
|
||
|
|
||
| def _anon_fallback(method): | ||
| """Retry a Client method once with anonymous access on PermissionError. | ||
|
|
||
| Only marks the bucket as anon-needed if the retry actually succeeds, so | ||
| genuinely inaccessible buckets keep raising clean errors instead of | ||
| being silently cached as anon. | ||
| """ | ||
|
|
||
| @functools.wraps(method) | ||
| def wrapper(self, *args, **kwargs): | ||
| try: | ||
| return method(self, *args, **kwargs) | ||
| except PermissionError: | ||
| if self.fs_kwargs.get("anon") or self._bucket_needs_anon(self.name): | ||
| raise | ||
| saved_fs = self._fs | ||
| self._fs = type(self).create_fs(**{**self.fs_kwargs, "anon": True}) | ||
| try: | ||
| result = method(self, *args, **kwargs) | ||
| except PermissionError: | ||
| self._fs = saved_fs | ||
| raise | ||
| self._mark_bucket_anon(self.name) | ||
|
shcheklein marked this conversation as resolved.
Outdated
|
||
| return result | ||
|
|
||
| return wrapper | ||
|
shcheklein marked this conversation as resolved.
Outdated
|
||
|
|
||
|
|
||
| class Client(ABC): | ||
| MAX_THREADS = multiprocessing.cpu_count() | ||
| FS_CLASS: ClassVar[type["AbstractFileSystem"]] | ||
| PREFIX: ClassVar[str] | ||
| protocol: ClassVar[str] | ||
| # client_config keys this backend treats as credentials. | ||
| CREDENTIAL_KEYS: ClassVar[frozenset[str]] = frozenset() | ||
| # Process-local cache of (protocol, bucket) pairs that have been | ||
| # resolved as needing anonymous access. Populated only after an anon | ||
| # retry actually succeeds. | ||
| _ANON_BUCKETS: ClassVar[set[tuple[str, str]]] = set() | ||
|
shcheklein marked this conversation as resolved.
Outdated
|
||
|
|
||
| @classmethod | ||
| def has_explicit_credentials(cls, client_config: dict | None) -> bool: | ||
|
|
@@ -78,6 +110,14 @@ def has_explicit_credentials(cls, client_config: dict | None) -> bool: | |
| return False | ||
| return any(k in client_config for k in cls.CREDENTIAL_KEYS) | ||
|
|
||
| @classmethod | ||
| def _bucket_needs_anon(cls, name: str) -> bool: | ||
| return (cls.protocol, name) in cls._ANON_BUCKETS | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can there be a distinction by prefix - a prefix inside a bucket allows anon access, other prefix doesn't?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. True, this can be edge case.
My suggestion is to just don't use cache when creds are explicitlty set and that's it. |
||
|
|
||
| @classmethod | ||
| def _mark_bucket_anon(cls, name: str) -> None: | ||
| cls._ANON_BUCKETS.add((cls.protocol, name)) | ||
|
|
||
| def __init__(self, name: str, fs_kwargs: dict[str, Any], cache: Cache) -> None: | ||
| self.name = name | ||
| self.fs_kwargs = fs_kwargs | ||
|
|
@@ -232,7 +272,10 @@ def split_url(cls, url: str) -> tuple[str, str]: | |
| @property | ||
| def fs(self) -> "AbstractFileSystem": | ||
| if not self._fs: | ||
| self._fs = self.create_fs(**self.fs_kwargs) | ||
| kwargs = dict(self.fs_kwargs) | ||
| if self._bucket_needs_anon(self.name): | ||
| kwargs["anon"] = True | ||
| self._fs = self.create_fs(**kwargs) | ||
| return self._fs | ||
|
|
||
| def url( | ||
|
|
@@ -251,6 +294,7 @@ async def get_current_etag(self, file: "File") -> str: | |
| info = await self.fs._info(full_path, **self._file_info_kwargs(file.version)) | ||
| return self.info_to_file(info, file.path).etag | ||
|
|
||
| @_anon_fallback | ||
|
shcheklein marked this conversation as resolved.
Outdated
|
||
| def get_file_info(self, path: str, version_id: str | None = None) -> "File": | ||
| self.validate_file_path(path) | ||
| full_path = self.get_uri(path) | ||
|
|
@@ -435,6 +479,7 @@ def do_instantiate_object(self, file: "File", dst: str) -> None: | |
| # Default to copy if reflinks are not supported | ||
| shutil.copy2(src, dst) | ||
|
|
||
| @_anon_fallback | ||
| def open_object( | ||
| self, file: "File", use_cache: bool = True, cb: Callback = DEFAULT_CALLBACK | ||
| ) -> BinaryIO: | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.