diff --git a/README.md b/README.md index 2e0f15e..fd00aae 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ local_file_path = my_file.copy(force_overwrite=False) # Returns the path of the my_dir = AnyPath("https://account_name.blob.core.windows.net/container_name/path/to/dir") my_dir.exists() # True if my_path exists, otherwise False parent, name, stem = my_dir.parent, my_dir.name, my_dir.stem -files_in_dir: List[AnyPath] = my_dir.listdir() # List of AnyPath instances for files in the directory +files_in_dir: List[AnyPath] = my_dir.rglob('*') # List of AnyPath instances for files in the directory my_file = AnyPath("s3://bucket/path/to/file.txt") my_file.is_file() # True if my_path exists, otherwise False diff --git a/anypathlib/anypath.py b/anypathlib/anypath.py index 78b5ce5..10098a1 100644 --- a/anypathlib/anypath.py +++ b/anypathlib/anypath.py @@ -96,9 +96,6 @@ def is_file(self) -> bool: def exists(self) -> bool: return self.path_handler.exists(self.base_path) - def listdir(self) -> List['AnyPath']: - return [AnyPath(p) for p in self.path_handler.listdir(self.base_path)] - def remove(self): self.path_handler.remove(self.base_path) @@ -114,6 +111,15 @@ def stem(self) -> str: def name(self) -> str: return self.path_handler.name(self.base_path) + def iterdir(self) -> List['AnyPath']: + return [AnyPath(p) for p in self.path_handler.iterdir(self.base_path)] + + def glob(self, pattern: str) -> List['AnyPath']: + return [AnyPath(p) for p in self.path_handler.glob(self.base_path, pattern)] + + def rglob(self, pattern: str) -> List['AnyPath']: + return [AnyPath(p) for p in self.path_handler.rglob(self.base_path, pattern)] + def __get_local_path(self, target_path: Optional[Path] = None, force_overwrite: bool = False, verbose: bool = False) -> Optional[Path]: if target_path is None: diff --git a/anypathlib/cli.py b/anypathlib/cli.py index 84a1037..848b6bd 100644 --- a/anypathlib/cli.py +++ b/anypathlib/cli.py @@ -28,9 +28,9 @@ def exists(path): @click.command() @click.option('-p', 'path', required=True, type=click.STRING, help='Path to list') -def listdir(path): +def iterdir(path): """List the directory. """ - click.echo(AnyPath(path).listdir()) + click.echo(AnyPath(path).iterdir()) @click.command() @@ -42,7 +42,7 @@ def remove(path): cli.add_command(copy) cli.add_command(exists) -cli.add_command(listdir) +cli.add_command(iterdir) cli.add_command(remove) if __name__ == '__main__': diff --git a/anypathlib/path_handlers/azure_handler.py b/anypathlib/path_handlers/azure_handler.py index 87574c5..3892146 100644 --- a/anypathlib/path_handlers/azure_handler.py +++ b/anypathlib/path_handlers/azure_handler.py @@ -1,7 +1,7 @@ +import fnmatch import os -import shutil from concurrent.futures import ThreadPoolExecutor -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path from typing import Optional, List, Tuple from urllib.parse import urlparse @@ -11,7 +11,7 @@ from azure.identity import DefaultAzureCredential from azure.mgmt.storage import StorageManagementClient -from azure.storage.blob import BlobServiceClient +from azure.storage.blob import BlobServiceClient, ContainerClient from loguru import logger @@ -24,20 +24,38 @@ class AzureStoragePath: container_name: str blob_name: str connection_string: Optional[str] = None + _blob_service_client: Optional[BlobServiceClient] = field(init=False, default=None) + _container_client: Optional[ContainerClient] = field(init=False, default=None) def __post_init__(self): if self.connection_string is None: self.connection_string = AzureHandler.get_connection_string(self.storage_account) + self._container_client = None + self._blob_service_client = None @property def http_url(self) -> str: - return f'https://{self.storage_account}.blob.core.windows.net/{self.container_name}/{self.blob_name}' + return f'https://{self.storage_account}.{AzureHandler.AZURE_URL_SUFFIX}/{self.container_name}/{self.blob_name}' + + @property + def blob_service_client(self) -> BlobServiceClient: + if self._blob_service_client is None: + self._blob_service_client = BlobServiceClient.from_connection_string(self.connection_string) + return self._blob_service_client + + @property + def container_client(cls) -> ContainerClient: + if cls._container_client is None: + cls._container_client = cls.blob_service_client.get_container_client(cls.container_name) + + return cls._container_client class AzureHandler(BasePathHandler): DEFAULT_SUBSCRIPTION_ID = os.environ.get('AZURE_SUBSCRIPTION_ID', None) DEFAULT_GROUP_NAME = os.environ.get('AZURE_RESOURCE_GROUP_NAME', None) + AZURE_URL_SUFFIX = r'blob.core.windows.net' @classmethod def refresh_credentials(cls): @@ -58,9 +76,7 @@ def is_dir(cls, url: str) -> bool: @classmethod def is_file(cls, url: str) -> bool: storage_path = cls.http_to_storage_params(url) - blob_service_client = BlobServiceClient( - account_url=f"https://{storage_path.storage_account}.blob.core.windows.net") - container_client = blob_service_client.get_container_client(storage_path.container_name) + container_client = storage_path.container_client blob_client = container_client.get_blob_client(storage_path.blob_name) try: @@ -73,9 +89,7 @@ def is_file(cls, url: str) -> bool: @classmethod def exists(cls, url: str) -> bool: storage_path = cls.http_to_storage_params(url) - - blob_service_client = BlobServiceClient.from_connection_string(storage_path.connection_string) - container_client = blob_service_client.get_container_client(container=storage_path.container_name) + container_client = storage_path.container_client return len([p for p in container_client.list_blobs(name_starts_with=storage_path.blob_name)]) > 0 @classmethod @@ -165,7 +179,7 @@ def download_file(cls, url: str, target_path: Path, force_overwrite: bool = True azure_storage_path = cls.http_to_storage_params(url) # Construct the Blob Service Client blob_service_client = BlobServiceClient( - account_url=f"https://{azure_storage_path.storage_account}.blob.core.windows.net") + account_url=f"https://{azure_storage_path.storage_account}.{cls.AZURE_URL_SUFFIX}") # Get a client to interact with the specified container and blob blob_client = blob_service_client.get_blob_client(container=azure_storage_path.container_name, @@ -182,12 +196,11 @@ def download_file(cls, url: str, target_path: Path, force_overwrite: bool = True @classmethod def upload_file(cls, local_path: str, target_url: str): - azure_storage_path = cls.http_to_storage_params(target_url) """Upload a single file to Azure Blob Storage.""" - blob_service_client = BlobServiceClient.from_connection_string(azure_storage_path.connection_string) - + azure_storage_path = cls.http_to_storage_params(target_url) + blob_service_client = azure_storage_path.blob_service_client + container_client = azure_storage_path.container_client # Check if the container exists and create if it does not - container_client = blob_service_client.get_container_client(azure_storage_path.container_name) try: container_client.get_container_properties() except Exception as e: @@ -200,28 +213,11 @@ def upload_file(cls, local_path: str, target_url: str): with open(local_path, "rb") as data: blob_client.upload_blob(data, overwrite=True) - @classmethod - def listdir(cls, url: str, with_prefix: bool = True) -> List[str]: - """List a directory (all blobs with the same prefix) from Azure Blob Storage.""" - azure_storage_path = cls.http_to_storage_params(url) - blob_service_client = BlobServiceClient.from_connection_string(azure_storage_path.connection_string) - container_client = blob_service_client.get_container_client(container=azure_storage_path.container_name) - blob_names = [] - for blob in container_client.list_blobs(name_starts_with=azure_storage_path.blob_name): - blob_name = blob.name if with_prefix else blob.name.replace(azure_storage_path.blob_name, '') - blob_azure_path = AzureStoragePath(storage_account=azure_storage_path.storage_account, - container_name=azure_storage_path.container_name, blob_name=blob_name, - connection_string=azure_storage_path.connection_string) - blob_names.append(blob_azure_path.http_url) - items = [item for item in blob_names if item != url] - return items - @classmethod def remove_directory(cls, url: str): """Remove a directory (all blobs with the same prefix) from Azure Blob Storage.""" azure_storage_path = cls.http_to_storage_params(url) - blob_service_client = BlobServiceClient.from_connection_string(azure_storage_path.connection_string) - container_client = blob_service_client.get_container_client(container=azure_storage_path.container_name) + container_client = azure_storage_path.container_client for blob in container_client.list_blobs(name_starts_with=azure_storage_path.blob_name): container_client.delete_blob(blob.name) @@ -232,8 +228,7 @@ def remove(cls, url: str, allow_missing: bool = False): cls.remove_directory(url) else: azure_storage_path = cls.http_to_storage_params(url) - blob_service_client = BlobServiceClient.from_connection_string(azure_storage_path.connection_string) - container_client = blob_service_client.get_container_client(container=azure_storage_path.container_name) + container_client = azure_storage_path.container_client try: container_client.delete_blob(azure_storage_path.blob_name) except ResourceNotFoundError as e: @@ -247,8 +242,7 @@ def download_directory(cls, url: str, force_overwrite: bool, target_dir: Path, v assert target_dir.is_dir() azure_storage_path = cls.http_to_storage_params(url) - blob_service_client = BlobServiceClient.from_connection_string(azure_storage_path.connection_string) - container_client = blob_service_client.get_container_client(container=azure_storage_path.container_name) + container_client = azure_storage_path.container_client local_paths = [] if verbose: @@ -274,9 +268,8 @@ def download_directory(cls, url: str, force_overwrite: bool, target_dir: Path, v def upload_directory(cls, local_dir: Path, target_url: str, verbose: bool): """Upload a directory to Azure Blob Storage.""" azure_storage_path = cls.http_to_storage_params(target_url) - blob_service_client = BlobServiceClient.from_connection_string(azure_storage_path.connection_string) # Check if the container exists and create if it does not - container_client = blob_service_client.get_container_client(azure_storage_path.container_name) + container_client = azure_storage_path.container_client try: container_client.get_container_properties() except Exception as e: @@ -284,12 +277,11 @@ def upload_directory(cls, local_dir: Path, target_url: str, verbose: bool): container_client.create_container() def upload_file_wrapper(local_path: str, blob_name: str): - azure_url = rf'azure://{azure_storage_path.storage_account}/{azure_storage_path.container_name}/{blob_name}' + azure_url = rf'https://{azure_storage_path.storage_account}.{cls.AZURE_URL_SUFFIX}/{azure_storage_path.container_name}/{blob_name}' cls.upload_file(local_path=local_path, target_url=azure_url) # Collect all files to upload files_to_upload = [] - # for file_path in local_dir.iterdir(): for file_path in local_dir.rglob('*'): if not file_path.is_file(): continue @@ -314,13 +306,8 @@ def copy(cls, source_url: str, target_url: str): source_storage_path = cls.http_to_storage_params(source_url) target_storage_path = cls.http_to_storage_params(target_url) - source_blob_service_client = BlobServiceClient.from_connection_string( - source_storage_path.connection_string) - target_blob_service_client = BlobServiceClient.from_connection_string( - target_storage_path.connection_string) - - source_container_client = source_blob_service_client.get_container_client( - source_storage_path.container_name) + target_blob_service_client = target_storage_path.blob_service_client + source_container_client = source_storage_path.container_client blobs_to_rename = source_container_client.list_blobs(name_starts_with=source_storage_path.blob_name) @@ -349,7 +336,7 @@ def parent(cls, url: str) -> str: if blob_path_parts[-1] == "": blob_path_parts = blob_path_parts[:-1] blob_path = '/'.join(blob_path_parts[:-1]) - parent_url = f'https://{account_name}.blob.core.windows.net/{container_name}/{blob_path}/' + parent_url = f'https://{account_name}.{cls.AZURE_URL_SUFFIX}/{container_name}/{blob_path}/' return parent_url @classmethod @@ -369,3 +356,32 @@ def stem(cls, url: str) -> str: blob_path_parts = blob_path_parts[:-1] blob_name = blob_path_parts[-1] return Path(blob_name).stem + + @classmethod + def iterdir(cls, url: str) -> List[str]: + return cls.glob(url, pattern='*') + + @classmethod + def glob(cls, url: str, pattern: str) -> List[str]: + storage_path = cls.http_to_storage_params(url) + container_client = storage_path.container_client + blob_names = [blob.name for blob in + container_client.walk_blobs(name_starts_with=storage_path.blob_name, delimiter='/')] + all_blobs = [ + f"https://{storage_path.storage_account}.{cls.AZURE_URL_SUFFIX}/{storage_path.container_name}/{blob}" for + blob in blob_names] + matched_blobs = [blob for blob in all_blobs if fnmatch.fnmatch(blob, pattern)] + return matched_blobs + + @classmethod + def rglob(cls, url: str, pattern: str) -> List[str]: + storage_path = cls.http_to_storage_params(url) + container_client = storage_path.container_client + blobs = [blob for blob in container_client.list_blob_names(name_starts_with=storage_path.blob_name)] + all_blobs = [ + f"https://{storage_path.storage_account}.{cls.AZURE_URL_SUFFIX}/{storage_path.container_name}/{blob}" for + blob in blobs] + matched_blobs = [blob for blob in all_blobs if fnmatch.fnmatch(blob, pattern)] + all_dirs = list(set([cls.parent(url) for url in matched_blobs])) + dirs_under_url = [dir.rstrip('/') for dir in all_dirs if dir.startswith(url) and dir != url] + return matched_blobs + dirs_under_url diff --git a/anypathlib/path_handlers/base_path_handler.py b/anypathlib/path_handlers/base_path_handler.py index 004d3e6..a27147b 100644 --- a/anypathlib/path_handlers/base_path_handler.py +++ b/anypathlib/path_handlers/base_path_handler.py @@ -9,10 +9,6 @@ class BasePathHandler(ABC): def download_file(cls, url: str, target_path: Path, force_overwrite: bool = True) -> Path: pass - @classmethod - @abstractmethod - def listdir(cls, url: str) -> List[str]: - pass @classmethod @abstractmethod @@ -74,3 +70,27 @@ def name(cls, url: str) -> str: @abstractmethod def stem(cls, url: str) -> str: pass + + @classmethod + @abstractmethod + def iterdir(cls, url: str) -> List[str]: + """ + Lists all files and directories directly under the given directory + """ + pass + + @classmethod + @abstractmethod + def glob(cls, url: str, pattern: str) -> List[str]: + """ + Finds all the paths matching a specific pattern, which can include wildcards, but does not search recursively + """ + pass + + @classmethod + @abstractmethod + def rglob(cls, url: str, pattern: str) -> List[str]: + """ + Finds all the paths matching a specific pattern, including wildcards, and searches recursively in all subdirectories + """ + pass diff --git a/anypathlib/path_handlers/local_handler.py b/anypathlib/path_handlers/local_handler.py index 104a2d1..51489c4 100644 --- a/anypathlib/path_handlers/local_handler.py +++ b/anypathlib/path_handlers/local_handler.py @@ -61,10 +61,6 @@ def download_directory(cls, url: str, force_overwrite: bool, target_dir: Path, v def download_file(cls, url: str, target_path: Path, force_overwrite: bool = True) -> Path: return cls.copy_path(url=url, target_path=target_path, force_overwrite=force_overwrite) - @classmethod - def listdir(cls, url: str) -> List[str]: - return [str(p) for p in Path(url).rglob('*')] - @classmethod def relative_path(cls, url: str) -> str: return Path(url).relative_to(Path(url).anchor).as_posix() @@ -80,3 +76,15 @@ def stem(cls, url: str) -> str: @classmethod def name(cls, url: str) -> str: return Path(url).name + + @classmethod + def iterdir(cls, url: str) -> List[str]: + return [str(p) for p in Path(url).iterdir()] + + @classmethod + def glob(cls, url: str, pattern: str) -> List[str]: + return [str(p) for p in Path(url).glob(pattern)] + + @classmethod + def rglob(cls, url: str, pattern: str) -> List[str]: + return [str(p) for p in Path(url).rglob(pattern)] diff --git a/anypathlib/path_handlers/s3_handler.py b/anypathlib/path_handlers/s3_handler.py index f47314c..0a1bb8b 100644 --- a/anypathlib/path_handlers/s3_handler.py +++ b/anypathlib/path_handlers/s3_handler.py @@ -1,3 +1,4 @@ +import fnmatch import os from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path @@ -95,15 +96,6 @@ def download_file(cls, url: str, target_path: Path, force_overwrite: bool = True cls.s3_client.download_file(Bucket=bucket, Key=key, Filename=local_file_path.absolute().as_posix()) return local_file_path - @classmethod - def listdir(cls, url: str) -> List[str]: - bucket, key = cls.get_bucket_and_key_from_uri(url) - s3_resource = boto3.resource('s3') - bucket = s3_resource.Bucket(bucket) - items = [cls.get_full_path(bucket=bucket.name, key=obj.key) for obj in bucket.objects.filter(Prefix=key)] - items = [item for item in items if item != url] - return items - @classmethod def remove(cls, url: str): bucket, key = cls.get_bucket_and_key_from_uri(url) @@ -182,8 +174,8 @@ def upload_directory(cls, local_dir: Path, target_url: str, verbose: bool = Fals for root, dirs, files in progress_bar: for file in files: - local_path = os.path.join(root, file) - s3_key = f'{key}/{os.path.relpath(local_path, local_dir)}' + local_path = Path(root) / file + s3_key = f'{key.rstrip("/")}/{local_path.relative_to(local_dir).as_posix()}' cls.s3_client.upload_file(local_path, bucket, s3_key) if verbose: @@ -216,3 +208,40 @@ def copy_and_delete(obj): future.result() # If needed, handle result or exceptions here except Exception as exc: print(f'Operation generated an exception: {exc}') + + @classmethod + def _get_bucket_objects(cls, url: str) -> List[str]: + bucket, key = cls.get_bucket_and_key_from_uri(url) + s3_resource = boto3.resource('s3') + bucket_obj = s3_resource.Bucket(bucket) + return [cls.get_full_path(bucket=bucket, key=obj.key) for obj in bucket_obj.objects.filter(Prefix=key)] + + @classmethod + def iterdir(cls, url: str) -> List[str]: + return cls.glob(url, pattern='*') + + @classmethod + def _get_dirs_under_url(cls, base_url: str, url_list: List[str]) -> List[str]: + all_dirs = list(set([cls.parent(url) for url in url_list])) + dirs_under_url = [dir.rstrip('/') for dir in all_dirs if dir.startswith(base_url) and dir != base_url] + return dirs_under_url + + @classmethod + def glob(cls, url: str, pattern: str) -> List[str]: + objects = cls._get_bucket_objects(url) + matched_objects = [obj for obj in objects if fnmatch.fnmatch(obj, pattern)] + # return only top level matched objects + top_level_objects = [obj for obj in matched_objects if obj.count('/') == url.rstrip('/').count('/') + 1] + all_subdirs = cls._get_dirs_under_url(base_url=url, url_list=matched_objects) + subdirs_in_top_level = [dir for dir in all_subdirs if dir.count('/') == url.rstrip('/').count('/') + 1] + return top_level_objects + subdirs_in_top_level + + @classmethod + def rglob(cls, url: str, pattern: str) -> List[str]: + """ + Finds all the paths matching a specific pattern, including wildcards, and searches recursively in all subdirectories + """ + objects = cls._get_bucket_objects(url) + matched_objects = [obj for obj in objects if fnmatch.fnmatch(obj, pattern)] + dirs = cls._get_dirs_under_url(base_url=url, url_list=matched_objects) + return matched_objects + dirs diff --git a/tests/fixtures_anypath.py b/tests/fixtures_anypath.py index fad7442..3067b81 100644 --- a/tests/fixtures_anypath.py +++ b/tests/fixtures_anypath.py @@ -31,6 +31,16 @@ def temp_dir_with_files(): yield tmpdir, list(tmpdir.iterdir()) +@pytest.fixture +def temp_nested_dir(): + with tempfile.TemporaryDirectory() as tmpdirname: + tmpdir = Path(tmpdirname) + nested = tempfile.TemporaryDirectory(dir=tmpdirname) + create_files_in_directory(tmpdir) + create_files_in_directory(Path(nested.name)) + yield tmpdir, list(tmpdir.iterdir()), list(Path(nested.name).iterdir()) + + @pytest.fixture def temp_local_dir(): with tempfile.TemporaryDirectory() as tmpdirname: diff --git a/tests/test_anypath_flows.py b/tests/test_anypath_flows.py index ad0b3af..06167f0 100644 --- a/tests/test_anypath_flows.py +++ b/tests/test_anypath_flows.py @@ -7,16 +7,16 @@ @pytest.mark.usefixtures("temp_dir_with_files", "clean_remote_dir") @pytest.mark.parametrize("path_type", [PathType.azure, PathType.s3, PathType.local]) -def test_exists_copy_exists_listdir_remove_exists(path_type: PathType, temp_dir_with_files, clean_remote_dir): +def test_exists_copy_exists_rglob_remove_exists(path_type: PathType, temp_dir_with_files, clean_remote_dir): remote_base_dir = clean_remote_dir local_dir_path, local_dir_files = temp_dir_with_files - remote_dir = remote_base_dir + 'test_exists_copy_exists_listdir_remove_exists/' + remote_dir = remote_base_dir + 'test_exists_copy_exists_rglob_remove_exists/' local_any_path = AnyPath(local_dir_path) target_any_path = AnyPath(remote_dir) assert not target_any_path.exists() local_any_path.copy(target=target_any_path, force_overwrite=True) assert target_any_path.exists() - target_dir_files = target_any_path.listdir() + target_dir_files = target_any_path.rglob('*') assert sorted([remote_file.name for remote_file in target_dir_files]) == sorted( [local_dir_file.name for local_dir_file in local_dir_files]) target_any_path.remove() diff --git a/tests/test_cli.py b/tests/test_cli.py index e8847de..6184292 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -51,9 +51,9 @@ def test_exists_command_false(temp_dir_with_files, cli_runner): @pytest.mark.usefixtures("temp_dir_with_files", 'cli_runner') -def test_listdir_command_with_files(temp_dir_with_files, cli_runner): +def test_iterdir_command_with_files(temp_dir_with_files, cli_runner): local_dir_path, local_dir_files = temp_dir_with_files - result = cli_runner.invoke(cli, ['listdir', '-p', local_dir_path]) + result = cli_runner.invoke(cli, ['iterdir', '-p', local_dir_path]) assert result.exit_code == 0 for file in local_dir_files: @@ -61,8 +61,8 @@ def test_listdir_command_with_files(temp_dir_with_files, cli_runner): @pytest.mark.usefixtures("temp_local_dir", 'cli_runner') -def test_listdir_command_empty(temp_local_dir, cli_runner): - result = cli_runner.invoke(cli, ['listdir', '-p', temp_local_dir]) +def test_iterdir_command_empty(temp_local_dir, cli_runner): + result = cli_runner.invoke(cli, ['iterdir', '-p', temp_local_dir]) assert result.exit_code == 0 assert result.output.strip() == '[]' diff --git a/tests/test_download_from_cloud.py b/tests/test_download_from_cloud.py index f991740..b00d285 100644 --- a/tests/test_download_from_cloud.py +++ b/tests/test_download_from_cloud.py @@ -14,6 +14,6 @@ def test_copy_to_local_from_cloud(path_type: PathType, temp_dir_with_files, temp remote_dir = clean_remote_dir cloud_handler.upload_directory(local_dir=local_dir_path, target_url=remote_dir, verbose=False) local_download_dir = AnyPath(remote_dir).copy(target=AnyPath(temp_local_dir), force_overwrite=True) - remote_files = AnyPath(remote_dir).listdir() + remote_files = AnyPath(remote_dir).rglob('*') assert sorted([fn.name for fn in remote_files]) == sorted( - [fn.name for fn in local_download_dir.listdir()]) + [fn.name for fn in local_download_dir.rglob('*')]) diff --git a/tests/test_iterdir_glob_rglob.py b/tests/test_iterdir_glob_rglob.py new file mode 100644 index 0000000..a06efa1 --- /dev/null +++ b/tests/test_iterdir_glob_rglob.py @@ -0,0 +1,21 @@ +import pytest +from anypathlib import PathType, AnyPath +from tests.tests_urls import PATH_TYPE_TO_HANDLER +from fixtures_anypath import temp_dir_with_files, clean_remote_dir, temp_nested_dir + + +@pytest.mark.usefixtures("temp_nested_dir", "clean_remote_dir") +@pytest.mark.parametrize("path_type", [PathType.local, PathType.azure, PathType.s3]) +def test_rglob_glob_iterdir(path_type: PathType, temp_nested_dir, clean_remote_dir): + cloud_handler = PATH_TYPE_TO_HANDLER[path_type] + local_dir_path, local_files_top_level, local_nested_files = temp_nested_dir + all_local_files = local_files_top_level + local_nested_files + remote_dir = clean_remote_dir + cloud_handler.upload_directory(local_dir=local_dir_path, target_url=remote_dir, verbose=False) + remote_all_files = AnyPath(remote_dir).rglob(pattern='*') + assert sorted([fn.name for fn in remote_all_files]) == sorted([fn.name for fn in all_local_files]) + remote_files_top_level_glob = AnyPath(remote_dir).glob(pattern='*') + assert sorted([fn.name for fn in remote_files_top_level_glob]) == sorted([fn.name for fn in local_files_top_level]) + remote_files_top_level_iterdir = AnyPath(remote_dir).iterdir() + assert sorted([fn.name for fn in remote_files_top_level_iterdir]) == sorted( + [fn.name for fn in local_files_top_level]) diff --git a/tests/test_str_path_interoperability.py b/tests/test_str_path_interoperability.py index 9f1b803..64e299d 100644 --- a/tests/test_str_path_interoperability.py +++ b/tests/test_str_path_interoperability.py @@ -26,5 +26,5 @@ def test_copy_targets(path_type: PathType, target_type, temp_dir_with_files, tem remote_dir = clean_remote_dir cloud_handler.upload_directory(local_dir=local_dir_path, target_url=remote_dir, verbose=False) local_download_dir = AnyPath(remote_dir).copy(target=temp_local_dir, force_overwrite=True) - remote_files = AnyPath(remote_dir).listdir() - assert sorted([fn.name for fn in remote_files]) == sorted([fn.name for fn in local_download_dir.listdir()]) + remote_files = AnyPath(remote_dir).rglob('*') + assert sorted([fn.name for fn in remote_files]) == sorted([fn.name for fn in local_download_dir.rglob('*')]) diff --git a/tests/test_upload_to_cloud.py b/tests/test_upload_to_cloud.py index 5889503..9add7a0 100644 --- a/tests/test_upload_to_cloud.py +++ b/tests/test_upload_to_cloud.py @@ -13,7 +13,7 @@ def test_copy_from_local_to_cloud(path_type: PathType, temp_dir_with_files, temp remote_dir = clean_remote_dir local_anypath = AnyPath(local_dir_path) local_anypath.copy(target=AnyPath(remote_dir)) - remote_dir_files = cloud_handler.listdir(remote_dir) + remote_dir_files = cloud_handler.rglob(remote_dir, pattern='*') cloud_handler.remove(remote_dir) assert sorted([remote_file.split('/')[-1] for remote_file in remote_dir_files]) == sorted( [local_dir_file.name for local_dir_file in local_dir_files])