From dc4b51b75d39f8f96aa59e0dbbe59f9cfe2ad7f2 Mon Sep 17 00:00:00 2001 From: "Jeremy A. Prescott" Date: Tue, 13 Sep 2022 11:07:42 +0200 Subject: [PATCH] Jeremy lig 1745 paginate export endpoints and use (#922) closes lig-1745 - new filename2readUrls mapping function `export_filenames_and_read_urls_by_tag_id` - paginate and correctly type all export endpoints --- lightly/api/api_workflow_datasets.py | 7 +- lightly/api/api_workflow_download_dataset.py | 86 ++- lightly/api/utils.py | 30 + .../swagger_client/__init__.py | 2 + .../swagger_client/api/tags_api.py | 159 ++++- .../swagger_client/models/__init__.py | 2 + .../models/filename_and_read_url.py | 151 ++++ .../models/filename_and_read_urls.py | 95 +++ .../mocked_api_workflow_client.py | 645 +++++++++++++----- .../test_api_workflow_download_dataset.py | 11 +- 10 files changed, 982 insertions(+), 206 deletions(-) create mode 100644 lightly/openapi_generated/swagger_client/models/filename_and_read_url.py create mode 100644 lightly/openapi_generated/swagger_client/models/filename_and_read_urls.py diff --git a/lightly/api/api_workflow_datasets.py b/lightly/api/api_workflow_datasets.py index 8e64f91d5..cdde07fb3 100644 --- a/lightly/api/api_workflow_datasets.py +++ b/lightly/api/api_workflow_datasets.py @@ -1,4 +1,5 @@ from typing import List +from xmlrpc.client import Boolean from lightly.openapi_generated.swagger_client.models.create_entity_response import CreateEntityResponse from lightly.openapi_generated.swagger_client.models.dataset_create_request import DatasetCreateRequest @@ -22,7 +23,7 @@ def _get_current_dataset(self) -> DatasetData: """ return self.get_dataset_by_id(self.dataset_id) - def dataset_exists(self, dataset_id: str): + def dataset_exists(self, dataset_id: str) -> bool: """Returns True if a dataset with dataset_id exists. """ try: self.get_dataset_by_id(dataset_id) @@ -30,9 +31,9 @@ def dataset_exists(self, dataset_id: str): except ApiException: return False - def get_dataset_by_id(self, dataset_id: str): + def get_dataset_by_id(self, dataset_id: str) -> DatasetData: """Returns the dataset for the given dataset id. """ - dataset = self._datasets_api.get_dataset_by_id(dataset_id) + dataset: DatasetData = self._datasets_api.get_dataset_by_id(dataset_id) return dataset def get_datasets(self, shared: bool = False) -> List[DatasetData]: diff --git a/lightly/api/api_workflow_download_dataset.py b/lightly/api/api_workflow_download_dataset.py index f9c5f7ff5..066e70dd0 100644 --- a/lightly/api/api_workflow_download_dataset.py +++ b/lightly/api/api_workflow_download_dataset.py @@ -6,12 +6,16 @@ from urllib.request import Request, urlopen from PIL import Image +from lightly.api.utils import paginate_endpoint, retry from torch.utils.hipify.hipify_python import bcolors from concurrent.futures.thread import ThreadPoolExecutor from lightly.api.bitmask import BitMask from lightly.openapi_generated.swagger_client.models.image_type import ImageType +from lightly.openapi_generated.swagger_client.models.filename_and_read_url import FilenameAndReadUrl +from lightly.openapi_generated.swagger_client.models.label_box_data_row import LabelBoxDataRow +from lightly.openapi_generated.swagger_client.models.label_studio_task import LabelStudioTask @@ -153,7 +157,7 @@ def lambda_(i): def export_label_studio_tasks_by_tag_id( self, tag_id: str, - ) -> List[Dict]: + ) -> List[LabelStudioTask]: """Exports samples in a format compatible with Label Studio. The format is documented here: @@ -167,16 +171,18 @@ def export_label_studio_tasks_by_tag_id( A list of dictionaries in a format compatible with Label Studio. """ - label_studio_tasks = self._tags_api.export_tag_to_label_studio_tasks( - self.dataset_id, - tag_id + label_studio_tasks = paginate_endpoint( + self._tags_api.export_tag_to_label_studio_tasks, + page_size=20000, + dataset_id=self.dataset_id, + tag_id=tag_id ) return label_studio_tasks def export_label_studio_tasks_by_tag_name( self, tag_name: str, - ) -> List[Dict]: + ) -> List[LabelStudioTask]: """Exports samples in a format compatible with Label Studio. The format is documented here: @@ -205,7 +211,7 @@ def export_label_studio_tasks_by_tag_name( def export_label_box_data_rows_by_tag_id( self, tag_id: str, - ) -> List[Dict]: + ) -> List[LabelBoxDataRow]: """Exports samples in a format compatible with Labelbox. The format is documented here: @@ -219,16 +225,18 @@ def export_label_box_data_rows_by_tag_id( A list of dictionaries in a format compatible with Labelbox. """ - label_box_data_rows = self._tags_api.export_tag_to_label_box_data_rows( - self.dataset_id, - tag_id, + label_box_data_rows = paginate_endpoint( + self._tags_api.export_tag_to_label_box_data_rows, + page_size=20000, + dataset_id=self.dataset_id, + tag_id=tag_id ) return label_box_data_rows def export_label_box_data_rows_by_tag_name( self, tag_name: str, - ) -> List[Dict]: + ) -> List[LabelBoxDataRow]: """Exports samples in a format compatible with Labelbox. The format is documented here: @@ -269,9 +277,10 @@ def export_filenames_by_tag_id( A list of the samples filenames within a certain tag. """ - filenames = self._tags_api.export_tag_to_basic_filenames( - self.dataset_id, - tag_id, + filenames = retry( + self._tags_api.export_tag_to_basic_filenames, + dataset_id=self.dataset_id, + tag_id=tag_id, ) return filenames @@ -299,4 +308,53 @@ def export_filenames_by_tag_name( """ tag = self.get_tag_by_name(tag_name) - return self.export_filenames_by_tag_id(tag.id) \ No newline at end of file + return self.export_filenames_by_tag_id(tag.id) + + + def export_filenames_and_read_urls_by_tag_id( + self, + tag_id: str, + ) -> List[FilenameAndReadUrl]: + """Export the samples filenames to map with their readURL. + + Args: + tag_id: + Id of the tag which should exported. + + Returns: + A list of mappings of the samples filenames and readURLs within a certain tag. + + """ + mappings = paginate_endpoint( + self._tags_api.export_tag_to_basic_filenames_and_read_urls, + page_size=20000, + dataset_id=self.dataset_id, + tag_id=tag_id + ) + return mappings + + def export_filenames_and_read_urls_by_tag_name( + self, + tag_name: str, + ) -> List[FilenameAndReadUrl]: + """Export the samples filenames to map with their readURL. + + Args: + tag_name: + Name of the tag which should exported. + + Returns: + A list of mappings of the samples filenames and readURLs within a certain tag. + + Examples: + >>> # write json file which can be used to access the actual file contents. + >>> mappings = client.export_filenames_and_read_urls_by_tag_name( + >>> 'initial-tag' + >>> ) + >>> + >>> with open('my-readURL-mappings.json', 'w') as f: + >>> json.dump(mappings, f) + + """ + tag = self.get_tag_by_name(tag_name) + return self.export_filenames_and_read_urls_by_tag_id(tag.id) \ No newline at end of file diff --git a/lightly/api/utils.py b/lightly/api/utils.py index 7c0220a2b..23da69288 100644 --- a/lightly/api/utils.py +++ b/lightly/api/utils.py @@ -8,6 +8,7 @@ import time import random from enum import Enum +from typing import List import numpy as np from PIL import Image, ImageFilter @@ -64,6 +65,35 @@ def retry(func, *args, **kwargs): f'Maximum retries exceeded! Original exception: {type(e)}: {str(e)}') from e + +def paginate_endpoint(fn, page_size=5000, *args, **kwargs) -> List: + """Paginates an API endpoint + + Args: + fn: + The endpoint which will be paginated until there is not any more data + page_size: + The size of the pages to pull + """ + entries: List = [] + offset = 0 + has_more = True + while has_more: + chunk = retry( + fn, page_offset=offset * page_size, page_size=page_size, *args, **kwargs + ) + # if we don't find more data, stop pagination otherwise get next chunk + if len(chunk) == 0: + has_more = False + else: + entries.extend(chunk) + offset += 1 + + return entries + + + + def getenv(key: str, default: str): """Return the value of the environment variable key if it exists, or default if it doesn’t. diff --git a/lightly/openapi_generated/swagger_client/__init__.py b/lightly/openapi_generated/swagger_client/__init__.py index 2dd91d024..6f195683f 100644 --- a/lightly/openapi_generated/swagger_client/__init__.py +++ b/lightly/openapi_generated/swagger_client/__init__.py @@ -108,6 +108,8 @@ from lightly.openapi_generated.swagger_client.models.embedding_id_is_processed_body import EmbeddingIdIsProcessedBody from lightly.openapi_generated.swagger_client.models.file_name_format import FileNameFormat from lightly.openapi_generated.swagger_client.models.file_output_format import FileOutputFormat +from lightly.openapi_generated.swagger_client.models.filename_and_read_url import FilenameAndReadUrl +from lightly.openapi_generated.swagger_client.models.filename_and_read_urls import FilenameAndReadUrls from lightly.openapi_generated.swagger_client.models.general_job_result import GeneralJobResult from lightly.openapi_generated.swagger_client.models.image_type import ImageType from lightly.openapi_generated.swagger_client.models.initial_tag_create_request import InitialTagCreateRequest diff --git a/lightly/openapi_generated/swagger_client/api/tags_api.py b/lightly/openapi_generated/swagger_client/api/tags_api.py index d8f4f9238..081b278db 100644 --- a/lightly/openapi_generated/swagger_client/api/tags_api.py +++ b/lightly/openapi_generated/swagger_client/api/tags_api.py @@ -471,6 +471,8 @@ def export_tag_to_basic_filenames(self, dataset_id, tag_id, **kwargs): # noqa: :param bool include_meta_data: if true, will also include metadata :param FileOutputFormat format: :param bool preview_example: if true, will generate a preview example of how the structure will look + :param float page_size: pagination size/limit of the number of samples to return + :param float page_offset: pagination offset :return: str If the method is called asynchronously, returns the request thread. @@ -500,12 +502,14 @@ def export_tag_to_basic_filenames_with_http_info(self, dataset_id, tag_id, **kwa :param bool include_meta_data: if true, will also include metadata :param FileOutputFormat format: :param bool preview_example: if true, will generate a preview example of how the structure will look + :param float page_size: pagination size/limit of the number of samples to return + :param float page_offset: pagination offset :return: str If the method is called asynchronously, returns the request thread. """ - all_params = ['dataset_id', 'tag_id', 'expires_in', 'access_control', 'file_name_format', 'include_meta_data', 'format', 'preview_example'] # noqa: E501 + all_params = ['dataset_id', 'tag_id', 'expires_in', 'access_control', 'file_name_format', 'include_meta_data', 'format', 'preview_example', 'page_size', 'page_offset'] # noqa: E501 all_params.append('async_req') all_params.append('_return_http_data_only') all_params.append('_preload_content') @@ -550,6 +554,10 @@ def export_tag_to_basic_filenames_with_http_info(self, dataset_id, tag_id, **kwa query_params.append(('format', params['format'])) # noqa: E501 if 'preview_example' in params: query_params.append(('previewExample', params['preview_example'])) # noqa: E501 + if 'page_size' in params: + query_params.append(('pageSize', params['page_size'])) # noqa: E501 + if 'page_offset' in params: + query_params.append(('pageOffset', params['page_offset'])) # noqa: E501 header_params = {} @@ -580,6 +588,125 @@ def export_tag_to_basic_filenames_with_http_info(self, dataset_id, tag_id, **kwa _request_timeout=params.get('_request_timeout'), collection_formats=collection_formats) + def export_tag_to_basic_filenames_and_read_urls(self, dataset_id, tag_id, **kwargs): # noqa: E501 + """export_tag_to_basic_filenames_and_read_urls # noqa: E501 + + Export the samples filenames to map with their readURL. # noqa: E501 + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.export_tag_to_basic_filenames_and_read_urls(dataset_id, tag_id, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param MongoObjectID dataset_id: ObjectId of the dataset (required) + :param MongoObjectID tag_id: ObjectId of the tag (required) + :param FileOutputFormat format: + :param bool preview_example: if true, will generate a preview example of how the structure will look + :param float page_size: pagination size/limit of the number of samples to return + :param float page_offset: pagination offset + :return: FilenameAndReadUrls + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('async_req'): + return self.export_tag_to_basic_filenames_and_read_urls_with_http_info(dataset_id, tag_id, **kwargs) # noqa: E501 + else: + (data) = self.export_tag_to_basic_filenames_and_read_urls_with_http_info(dataset_id, tag_id, **kwargs) # noqa: E501 + return data + + def export_tag_to_basic_filenames_and_read_urls_with_http_info(self, dataset_id, tag_id, **kwargs): # noqa: E501 + """export_tag_to_basic_filenames_and_read_urls # noqa: E501 + + Export the samples filenames to map with their readURL. # noqa: E501 + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please pass async_req=True + >>> thread = api.export_tag_to_basic_filenames_and_read_urls_with_http_info(dataset_id, tag_id, async_req=True) + >>> result = thread.get() + + :param async_req bool + :param MongoObjectID dataset_id: ObjectId of the dataset (required) + :param MongoObjectID tag_id: ObjectId of the tag (required) + :param FileOutputFormat format: + :param bool preview_example: if true, will generate a preview example of how the structure will look + :param float page_size: pagination size/limit of the number of samples to return + :param float page_offset: pagination offset + :return: FilenameAndReadUrls + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['dataset_id', 'tag_id', 'format', 'preview_example', 'page_size', 'page_offset'] # noqa: E501 + all_params.append('async_req') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in six.iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method export_tag_to_basic_filenames_and_read_urls" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'dataset_id' is set + if self.api_client.client_side_validation and ('dataset_id' not in params or + params['dataset_id'] is None): # noqa: E501 + raise ValueError("Missing the required parameter `dataset_id` when calling `export_tag_to_basic_filenames_and_read_urls`") # noqa: E501 + # verify the required parameter 'tag_id' is set + if self.api_client.client_side_validation and ('tag_id' not in params or + params['tag_id'] is None): # noqa: E501 + raise ValueError("Missing the required parameter `tag_id` when calling `export_tag_to_basic_filenames_and_read_urls`") # noqa: E501 + + collection_formats = {} + + path_params = {} + if 'dataset_id' in params: + path_params['datasetId'] = params['dataset_id'] # noqa: E501 + if 'tag_id' in params: + path_params['tagId'] = params['tag_id'] # noqa: E501 + + query_params = [] + if 'format' in params: + query_params.append(('format', params['format'])) # noqa: E501 + if 'preview_example' in params: + query_params.append(('previewExample', params['preview_example'])) # noqa: E501 + if 'page_size' in params: + query_params.append(('pageSize', params['page_size'])) # noqa: E501 + if 'page_offset' in params: + query_params.append(('pageOffset', params['page_offset'])) # noqa: E501 + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + # HTTP header `Accept` + header_params['Accept'] = self.api_client.select_header_accept( + ['application/json']) # noqa: E501 + + # Authentication setting + auth_settings = ['ApiKeyAuth', 'auth0Bearer'] # noqa: E501 + + return self.api_client.call_api( + '/v1/datasets/{datasetId}/tags/{tagId}/export/basic/filenamesAndReadUrls', 'GET', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='FilenameAndReadUrls', # noqa: E501 + auth_settings=auth_settings, + async_req=params.get('async_req'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + def export_tag_to_label_box_data_rows(self, dataset_id, tag_id, **kwargs): # noqa: E501 """export_tag_to_label_box_data_rows # noqa: E501 @@ -598,6 +725,8 @@ def export_tag_to_label_box_data_rows(self, dataset_id, tag_id, **kwargs): # no :param bool include_meta_data: if true, will also include metadata :param FileOutputFormat format: :param bool preview_example: if true, will generate a preview example of how the structure will look + :param float page_size: pagination size/limit of the number of samples to return + :param float page_offset: pagination offset :return: LabelBoxDataRows If the method is called asynchronously, returns the request thread. @@ -627,12 +756,14 @@ def export_tag_to_label_box_data_rows_with_http_info(self, dataset_id, tag_id, * :param bool include_meta_data: if true, will also include metadata :param FileOutputFormat format: :param bool preview_example: if true, will generate a preview example of how the structure will look + :param float page_size: pagination size/limit of the number of samples to return + :param float page_offset: pagination offset :return: LabelBoxDataRows If the method is called asynchronously, returns the request thread. """ - all_params = ['dataset_id', 'tag_id', 'expires_in', 'access_control', 'file_name_format', 'include_meta_data', 'format', 'preview_example'] # noqa: E501 + all_params = ['dataset_id', 'tag_id', 'expires_in', 'access_control', 'file_name_format', 'include_meta_data', 'format', 'preview_example', 'page_size', 'page_offset'] # noqa: E501 all_params.append('async_req') all_params.append('_return_http_data_only') all_params.append('_preload_content') @@ -677,6 +808,10 @@ def export_tag_to_label_box_data_rows_with_http_info(self, dataset_id, tag_id, * query_params.append(('format', params['format'])) # noqa: E501 if 'preview_example' in params: query_params.append(('previewExample', params['preview_example'])) # noqa: E501 + if 'page_size' in params: + query_params.append(('pageSize', params['page_size'])) # noqa: E501 + if 'page_offset' in params: + query_params.append(('pageOffset', params['page_offset'])) # noqa: E501 header_params = {} @@ -725,6 +860,8 @@ def export_tag_to_label_studio_tasks(self, dataset_id, tag_id, **kwargs): # noq :param bool include_meta_data: if true, will also include metadata :param FileOutputFormat format: :param bool preview_example: if true, will generate a preview example of how the structure will look + :param float page_size: pagination size/limit of the number of samples to return + :param float page_offset: pagination offset :return: LabelStudioTasks If the method is called asynchronously, returns the request thread. @@ -754,12 +891,14 @@ def export_tag_to_label_studio_tasks_with_http_info(self, dataset_id, tag_id, ** :param bool include_meta_data: if true, will also include metadata :param FileOutputFormat format: :param bool preview_example: if true, will generate a preview example of how the structure will look + :param float page_size: pagination size/limit of the number of samples to return + :param float page_offset: pagination offset :return: LabelStudioTasks If the method is called asynchronously, returns the request thread. """ - all_params = ['dataset_id', 'tag_id', 'expires_in', 'access_control', 'file_name_format', 'include_meta_data', 'format', 'preview_example'] # noqa: E501 + all_params = ['dataset_id', 'tag_id', 'expires_in', 'access_control', 'file_name_format', 'include_meta_data', 'format', 'preview_example', 'page_size', 'page_offset'] # noqa: E501 all_params.append('async_req') all_params.append('_return_http_data_only') all_params.append('_preload_content') @@ -804,6 +943,10 @@ def export_tag_to_label_studio_tasks_with_http_info(self, dataset_id, tag_id, ** query_params.append(('format', params['format'])) # noqa: E501 if 'preview_example' in params: query_params.append(('previewExample', params['preview_example'])) # noqa: E501 + if 'page_size' in params: + query_params.append(('pageSize', params['page_size'])) # noqa: E501 + if 'page_offset' in params: + query_params.append(('pageOffset', params['page_offset'])) # noqa: E501 header_params = {} @@ -852,6 +995,8 @@ def export_tag_to_sama_tasks(self, dataset_id, tag_id, **kwargs): # noqa: E501 :param bool include_meta_data: if true, will also include metadata :param FileOutputFormat format: :param bool preview_example: if true, will generate a preview example of how the structure will look + :param float page_size: pagination size/limit of the number of samples to return + :param float page_offset: pagination offset :return: SamaTasks If the method is called asynchronously, returns the request thread. @@ -881,12 +1026,14 @@ def export_tag_to_sama_tasks_with_http_info(self, dataset_id, tag_id, **kwargs): :param bool include_meta_data: if true, will also include metadata :param FileOutputFormat format: :param bool preview_example: if true, will generate a preview example of how the structure will look + :param float page_size: pagination size/limit of the number of samples to return + :param float page_offset: pagination offset :return: SamaTasks If the method is called asynchronously, returns the request thread. """ - all_params = ['dataset_id', 'tag_id', 'expires_in', 'access_control', 'file_name_format', 'include_meta_data', 'format', 'preview_example'] # noqa: E501 + all_params = ['dataset_id', 'tag_id', 'expires_in', 'access_control', 'file_name_format', 'include_meta_data', 'format', 'preview_example', 'page_size', 'page_offset'] # noqa: E501 all_params.append('async_req') all_params.append('_return_http_data_only') all_params.append('_preload_content') @@ -931,6 +1078,10 @@ def export_tag_to_sama_tasks_with_http_info(self, dataset_id, tag_id, **kwargs): query_params.append(('format', params['format'])) # noqa: E501 if 'preview_example' in params: query_params.append(('previewExample', params['preview_example'])) # noqa: E501 + if 'page_size' in params: + query_params.append(('pageSize', params['page_size'])) # noqa: E501 + if 'page_offset' in params: + query_params.append(('pageOffset', params['page_offset'])) # noqa: E501 header_params = {} diff --git a/lightly/openapi_generated/swagger_client/models/__init__.py b/lightly/openapi_generated/swagger_client/models/__init__.py index f93b5f5b6..4633bfe20 100644 --- a/lightly/openapi_generated/swagger_client/models/__init__.py +++ b/lightly/openapi_generated/swagger_client/models/__init__.py @@ -86,6 +86,8 @@ from lightly.openapi_generated.swagger_client.models.embedding_id_is_processed_body import EmbeddingIdIsProcessedBody from lightly.openapi_generated.swagger_client.models.file_name_format import FileNameFormat from lightly.openapi_generated.swagger_client.models.file_output_format import FileOutputFormat +from lightly.openapi_generated.swagger_client.models.filename_and_read_url import FilenameAndReadUrl +from lightly.openapi_generated.swagger_client.models.filename_and_read_urls import FilenameAndReadUrls from lightly.openapi_generated.swagger_client.models.general_job_result import GeneralJobResult from lightly.openapi_generated.swagger_client.models.image_type import ImageType from lightly.openapi_generated.swagger_client.models.initial_tag_create_request import InitialTagCreateRequest diff --git a/lightly/openapi_generated/swagger_client/models/filename_and_read_url.py b/lightly/openapi_generated/swagger_client/models/filename_and_read_url.py new file mode 100644 index 000000000..34c778860 --- /dev/null +++ b/lightly/openapi_generated/swagger_client/models/filename_and_read_url.py @@ -0,0 +1,151 @@ +# coding: utf-8 + +""" + Lightly API + + Lightly.ai enables you to do self-supervised learning in an easy and intuitive way. The lightly.ai OpenAPI spec defines how one can interact with our REST API to unleash the full potential of lightly.ai # noqa: E501 + + OpenAPI spec version: 1.0.0 + Contact: support@lightly.ai + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + + +import pprint +import re # noqa: F401 + +import six + +from lightly.openapi_generated.swagger_client.configuration import Configuration + + +class FilenameAndReadUrl(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + 'file_name': 'str', + 'read_url': 'ReadUrl' + } + + attribute_map = { + 'file_name': 'fileName', + 'read_url': 'readUrl' + } + + def __init__(self, file_name=None, read_url=None, _configuration=None): # noqa: E501 + """FilenameAndReadUrl - a model defined in Swagger""" # noqa: E501 + if _configuration is None: + _configuration = Configuration() + self._configuration = _configuration + + self._file_name = None + self._read_url = None + self.discriminator = None + + self.file_name = file_name + self.read_url = read_url + + @property + def file_name(self): + """Gets the file_name of this FilenameAndReadUrl. # noqa: E501 + + + :return: The file_name of this FilenameAndReadUrl. # noqa: E501 + :rtype: str + """ + return self._file_name + + @file_name.setter + def file_name(self, file_name): + """Sets the file_name of this FilenameAndReadUrl. + + + :param file_name: The file_name of this FilenameAndReadUrl. # noqa: E501 + :type: str + """ + if self._configuration.client_side_validation and file_name is None: + raise ValueError("Invalid value for `file_name`, must not be `None`") # noqa: E501 + + self._file_name = file_name + + @property + def read_url(self): + """Gets the read_url of this FilenameAndReadUrl. # noqa: E501 + + + :return: The read_url of this FilenameAndReadUrl. # noqa: E501 + :rtype: ReadUrl + """ + return self._read_url + + @read_url.setter + def read_url(self, read_url): + """Sets the read_url of this FilenameAndReadUrl. + + + :param read_url: The read_url of this FilenameAndReadUrl. # noqa: E501 + :type: ReadUrl + """ + if self._configuration.client_side_validation and read_url is None: + raise ValueError("Invalid value for `read_url`, must not be `None`") # noqa: E501 + + self._read_url = read_url + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + if issubclass(FilenameAndReadUrl, dict): + for key, value in self.items(): + result[key] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, FilenameAndReadUrl): + return False + + return self.to_dict() == other.to_dict() + + def __ne__(self, other): + """Returns true if both objects are not equal""" + if not isinstance(other, FilenameAndReadUrl): + return True + + return self.to_dict() != other.to_dict() diff --git a/lightly/openapi_generated/swagger_client/models/filename_and_read_urls.py b/lightly/openapi_generated/swagger_client/models/filename_and_read_urls.py new file mode 100644 index 000000000..4d6a376a2 --- /dev/null +++ b/lightly/openapi_generated/swagger_client/models/filename_and_read_urls.py @@ -0,0 +1,95 @@ +# coding: utf-8 + +""" + Lightly API + + Lightly.ai enables you to do self-supervised learning in an easy and intuitive way. The lightly.ai OpenAPI spec defines how one can interact with our REST API to unleash the full potential of lightly.ai # noqa: E501 + + OpenAPI spec version: 1.0.0 + Contact: support@lightly.ai + Generated by: https://github.com/swagger-api/swagger-codegen.git +""" + + +import pprint +import re # noqa: F401 + +import six + +from lightly.openapi_generated.swagger_client.configuration import Configuration + + +class FilenameAndReadUrls(object): + """NOTE: This class is auto generated by the swagger code generator program. + + Do not edit the class manually. + """ + + """ + Attributes: + swagger_types (dict): The key is attribute name + and the value is attribute type. + attribute_map (dict): The key is attribute name + and the value is json key in definition. + """ + swagger_types = { + } + + attribute_map = { + } + + def __init__(self, _configuration=None): # noqa: E501 + """FilenameAndReadUrls - a model defined in Swagger""" # noqa: E501 + if _configuration is None: + _configuration = Configuration() + self._configuration = _configuration + self.discriminator = None + + def to_dict(self): + """Returns the model properties as a dict""" + result = {} + + for attr, _ in six.iteritems(self.swagger_types): + value = getattr(self, attr) + if isinstance(value, list): + result[attr] = list(map( + lambda x: x.to_dict() if hasattr(x, "to_dict") else x, + value + )) + elif hasattr(value, "to_dict"): + result[attr] = value.to_dict() + elif isinstance(value, dict): + result[attr] = dict(map( + lambda item: (item[0], item[1].to_dict()) + if hasattr(item[1], "to_dict") else item, + value.items() + )) + else: + result[attr] = value + if issubclass(FilenameAndReadUrls, dict): + for key, value in self.items(): + result[key] = value + + return result + + def to_str(self): + """Returns the string representation of the model""" + return pprint.pformat(self.to_dict()) + + def __repr__(self): + """For `print` and `pprint`""" + return self.to_str() + + def __eq__(self, other): + """Returns true if both objects are equal""" + if not isinstance(other, FilenameAndReadUrls): + return False + + return self.to_dict() == other.to_dict() + + def __ne__(self, other): + """Returns true if both objects are not equal""" + if not isinstance(other, FilenameAndReadUrls): + return True + + return self.to_dict() != other.to_dict() diff --git a/tests/api_workflow/mocked_api_workflow_client.py b/tests/api_workflow/mocked_api_workflow_client.py index ccb8c3029..d2ec8c6ae 100644 --- a/tests/api_workflow/mocked_api_workflow_client.py +++ b/tests/api_workflow/mocked_api_workflow_client.py @@ -11,23 +11,64 @@ import requests from requests import Response from lightly.openapi_generated.swagger_client.api.docker_api import DockerApi -from lightly.openapi_generated.swagger_client.models.create_docker_worker_registry_entry_request import CreateDockerWorkerRegistryEntryRequest -from lightly.openapi_generated.swagger_client.models.datasource_processed_until_timestamp_response import DatasourceProcessedUntilTimestampResponse -from lightly.openapi_generated.swagger_client.models.docker_run_data import DockerRunData -from lightly.openapi_generated.swagger_client.models.docker_run_scheduled_create_request import DockerRunScheduledCreateRequest -from lightly.openapi_generated.swagger_client.models.docker_run_scheduled_data import DockerRunScheduledData -from lightly.openapi_generated.swagger_client.models.docker_run_scheduled_priority import DockerRunScheduledPriority -from lightly.openapi_generated.swagger_client.models.docker_run_scheduled_state import DockerRunScheduledState -from lightly.openapi_generated.swagger_client.models.docker_run_state import DockerRunState -from lightly.openapi_generated.swagger_client.models.docker_worker_config_create_request import DockerWorkerConfigCreateRequest -from lightly.openapi_generated.swagger_client.models.docker_worker_registry_entry_data import DockerWorkerRegistryEntryData -from lightly.openapi_generated.swagger_client.models.docker_worker_state import DockerWorkerState -from lightly.openapi_generated.swagger_client.models.docker_worker_type import DockerWorkerType +from lightly.openapi_generated.swagger_client.models.create_docker_worker_registry_entry_request import ( + CreateDockerWorkerRegistryEntryRequest, +) +from lightly.openapi_generated.swagger_client.models.datasource_processed_until_timestamp_response import ( + DatasourceProcessedUntilTimestampResponse, +) +from lightly.openapi_generated.swagger_client.models.docker_run_data import ( + DockerRunData, +) +from lightly.openapi_generated.swagger_client.models.docker_run_scheduled_create_request import ( + DockerRunScheduledCreateRequest, +) +from lightly.openapi_generated.swagger_client.models.docker_run_scheduled_data import ( + DockerRunScheduledData, +) +from lightly.openapi_generated.swagger_client.models.docker_run_scheduled_priority import ( + DockerRunScheduledPriority, +) +from lightly.openapi_generated.swagger_client.models.docker_run_scheduled_state import ( + DockerRunScheduledState, +) +from lightly.openapi_generated.swagger_client.models.docker_run_state import ( + DockerRunState, +) +from lightly.openapi_generated.swagger_client.models.docker_worker_config_create_request import ( + DockerWorkerConfigCreateRequest, +) +from lightly.openapi_generated.swagger_client.models.docker_worker_registry_entry_data import ( + DockerWorkerRegistryEntryData, +) +from lightly.openapi_generated.swagger_client.models.docker_worker_state import ( + DockerWorkerState, +) +from lightly.openapi_generated.swagger_client.models.docker_worker_type import ( + DockerWorkerType, +) +from lightly.openapi_generated.swagger_client.models.filename_and_read_url import ( + FilenameAndReadUrl, +) +from lightly.openapi_generated.swagger_client.models.label_box_data_row import ( + LabelBoxDataRow, +) +from lightly.openapi_generated.swagger_client.models.label_studio_task import ( + LabelStudioTask, +) +from lightly.openapi_generated.swagger_client.models.label_studio_task_data import ( + LabelStudioTaskData, +) + from lightly.openapi_generated.swagger_client.models.tag_creator import TagCreator -from lightly.openapi_generated.swagger_client.models.dataset_create_request import DatasetCreateRequest +from lightly.openapi_generated.swagger_client.models.dataset_create_request import ( + DatasetCreateRequest, +) from lightly.openapi_generated.swagger_client.models.dataset_data import DatasetData -from lightly.openapi_generated.swagger_client.models.sample_partial_mode import SamplePartialMode +from lightly.openapi_generated.swagger_client.models.sample_partial_mode import ( + SamplePartialMode, +) from lightly.openapi_generated.swagger_client.api.datasets_api import DatasetsApi from lightly.openapi_generated.swagger_client.api.datasources_api import DatasourcesApi from lightly.openapi_generated.swagger_client.models.timestamp import Timestamp @@ -39,34 +80,84 @@ from typing import * -from lightly.openapi_generated.swagger_client import ScoresApi, \ - CreateEntityResponse, SamplesApi, SampleCreateRequest, \ - InitialTagCreateRequest, ApiClient, VersioningApi, QuotaApi, \ - TagArithmeticsRequest, TagBitMaskResponse, SampleWriteUrls, SampleData, SampleDataModes, DatasourceRawSamplesMetadataData, Trigger2dEmbeddingJobRequest, SampleUpdateRequest +from lightly.openapi_generated.swagger_client import ( + ScoresApi, + CreateEntityResponse, + SamplesApi, + SampleCreateRequest, + InitialTagCreateRequest, + ApiClient, + VersioningApi, + QuotaApi, + TagArithmeticsRequest, + TagBitMaskResponse, + SampleWriteUrls, + SampleData, + SampleMetaData, + SampleDataModes, + DatasourceRawSamplesMetadataData, + Trigger2dEmbeddingJobRequest, + SampleUpdateRequest, +) from lightly.openapi_generated.swagger_client.api.embeddings_api import EmbeddingsApi -from lightly.openapi_generated.swagger_client.api.collaboration_api import CollaborationApi +from lightly.openapi_generated.swagger_client.api.collaboration_api import ( + CollaborationApi, +) from lightly.openapi_generated.swagger_client.api.jobs_api import JobsApi from lightly.openapi_generated.swagger_client.api.mappings_api import MappingsApi from lightly.openapi_generated.swagger_client.api.samplings_api import SamplingsApi from lightly.openapi_generated.swagger_client.api.tags_api import TagsApi -from lightly.openapi_generated.swagger_client.models.async_task_data import AsyncTaskData -from lightly.openapi_generated.swagger_client.models.dataset_embedding_data import DatasetEmbeddingData -from lightly.openapi_generated.swagger_client.models.job_result_type import JobResultType +from lightly.openapi_generated.swagger_client.models.async_task_data import ( + AsyncTaskData, +) +from lightly.openapi_generated.swagger_client.models.dataset_embedding_data import ( + DatasetEmbeddingData, +) +from lightly.openapi_generated.swagger_client.models.job_result_type import ( + JobResultType, +) from lightly.openapi_generated.swagger_client.models.job_state import JobState -from lightly.openapi_generated.swagger_client.models.job_status_data import JobStatusData -from lightly.openapi_generated.swagger_client.models.job_status_data_result import JobStatusDataResult -from lightly.openapi_generated.swagger_client.models.sampling_create_request import SamplingCreateRequest +from lightly.openapi_generated.swagger_client.models.job_status_data import ( + JobStatusData, +) +from lightly.openapi_generated.swagger_client.models.job_status_data_result import ( + JobStatusDataResult, +) +from lightly.openapi_generated.swagger_client.models.sampling_create_request import ( + SamplingCreateRequest, +) from lightly.openapi_generated.swagger_client.models.tag_data import TagData -from lightly.openapi_generated.swagger_client.models.write_csv_url_data import WriteCSVUrlData -from lightly.openapi_generated.swagger_client.models.datasource_config import DatasourceConfig -from lightly.openapi_generated.swagger_client.models.datasource_config_base import DatasourceConfigBase -from lightly.openapi_generated.swagger_client.models.datasource_processed_until_timestamp_request import DatasourceProcessedUntilTimestampRequest -from lightly.openapi_generated.swagger_client.models.datasource_raw_samples_data import DatasourceRawSamplesData -from lightly.openapi_generated.swagger_client.models.datasource_raw_samples_data_row import DatasourceRawSamplesDataRow -from lightly.openapi_generated.swagger_client.models.datasource_raw_samples_predictions_data import DatasourceRawSamplesPredictionsData -from lightly.openapi_generated.swagger_client.models.shared_access_config_create_request import SharedAccessConfigCreateRequest -from lightly.openapi_generated.swagger_client.models.shared_access_config_data import SharedAccessConfigData -from lightly.openapi_generated.swagger_client.models.shared_access_type import SharedAccessType +from lightly.openapi_generated.swagger_client.models.write_csv_url_data import ( + WriteCSVUrlData, +) +from lightly.openapi_generated.swagger_client.models.datasource_config import ( + DatasourceConfig, +) +from lightly.openapi_generated.swagger_client.models.datasource_config_base import ( + DatasourceConfigBase, +) +from lightly.openapi_generated.swagger_client.models.datasource_processed_until_timestamp_request import ( + DatasourceProcessedUntilTimestampRequest, +) +from lightly.openapi_generated.swagger_client.models.datasource_raw_samples_data import ( + DatasourceRawSamplesData, +) +from lightly.openapi_generated.swagger_client.models.datasource_raw_samples_data_row import ( + DatasourceRawSamplesDataRow, +) +from lightly.openapi_generated.swagger_client.models.datasource_raw_samples_predictions_data import ( + DatasourceRawSamplesPredictionsData, +) +from lightly.openapi_generated.swagger_client.models.shared_access_config_create_request import ( + SharedAccessConfigCreateRequest, +) +from lightly.openapi_generated.swagger_client.models.shared_access_config_data import ( + SharedAccessConfigData, +) +from lightly.openapi_generated.swagger_client.models.shared_access_type import ( + SharedAccessType, +) + def _check_dataset_id(dataset_id: str): assert isinstance(dataset_id, str) @@ -81,27 +172,30 @@ def __init__(self, api_client): EmbeddingsApi.__init__(self, api_client=api_client) self.embeddings = [ DatasetEmbeddingData( - id='embedding_id_xyz', - name='embedding_name_xxyyzz', + id="embedding_id_xyz", + name="embedding_name_xxyyzz", is_processed=True, created_at=0, ), DatasetEmbeddingData( - id='embedding_id_xyz_2', - name='default', + id="embedding_id_xyz_2", + name="default", is_processed=True, created_at=0, - ) - + ), ] def get_embeddings_csv_write_url_by_id(self, dataset_id: str, **kwargs): _check_dataset_id(dataset_id) assert isinstance(dataset_id, str) - response_ = WriteCSVUrlData(signed_write_url="signed_write_url_valid", embedding_id="embedding_id_xyz") + response_ = WriteCSVUrlData( + signed_write_url="signed_write_url_valid", embedding_id="embedding_id_xyz" + ) return response_ - def get_embeddings_by_dataset_id(self, dataset_id, **kwargs) -> List[DatasetEmbeddingData]: + def get_embeddings_by_dataset_id( + self, dataset_id, **kwargs + ) -> List[DatasetEmbeddingData]: _check_dataset_id(dataset_id) assert isinstance(dataset_id, str) return self.embeddings @@ -112,11 +206,13 @@ def trigger2d_embeddings_job(self, body, dataset_id, embedding_id, **kwargs): def get_embeddings_csv_read_url_by_id(self, dataset_id, embedding_id, **kwargs): _check_dataset_id(dataset_id) - return 'https://my-embedding-read-url.com' + return "https://my-embedding-read-url.com" class MockedSamplingsApi(SamplingsApi): - def trigger_sampling_by_id(self, body: SamplingCreateRequest, dataset_id, embedding_id, **kwargs): + def trigger_sampling_by_id( + self, body: SamplingCreateRequest, dataset_id, embedding_id, **kwargs + ): _check_dataset_id(dataset_id) assert isinstance(body, SamplingCreateRequest) assert isinstance(dataset_id, str) @@ -134,13 +230,26 @@ def get_job_status_by_id(self, job_id, **kwargs): assert isinstance(job_id, str) self.no_calls += 1 if self.no_calls > 3: - result = JobStatusDataResult(type=JobResultType.SAMPLING, data="selection_tag_id_xyz") - response_ = JobStatusData(id="id_", status=JobState.FINISHED, wait_time_till_next_poll=0, - created_at=1234, finished_at=1357, result=result) + result = JobStatusDataResult( + type=JobResultType.SAMPLING, data="selection_tag_id_xyz" + ) + response_ = JobStatusData( + id="id_", + status=JobState.FINISHED, + wait_time_till_next_poll=0, + created_at=1234, + finished_at=1357, + result=result, + ) else: result = None - response_ = JobStatusData(id="id_", status=JobState.RUNNING, wait_time_till_next_poll=0.001, - created_at=1234, result=result) + response_ = JobStatusData( + id="id_", + status=JobState.RUNNING, + wait_time_till_next_poll=0.001, + created_at=1234, + result=result, + ) return response_ @@ -156,42 +265,89 @@ def get_tag_by_tag_id(self, dataset_id, tag_id, **kwargs): _check_dataset_id(dataset_id) assert isinstance(dataset_id, str) assert isinstance(tag_id, str) - response_ = TagData(id=tag_id, dataset_id=dataset_id, prev_tag_id="initial-tag", bit_mask_data="0x80bda23e9", - name='second-tag', tot_size=15, created_at=1577836800, changes=dict()) + response_ = TagData( + id=tag_id, + dataset_id=dataset_id, + prev_tag_id="initial-tag", + bit_mask_data="0x80bda23e9", + name="second-tag", + tot_size=15, + created_at=1577836800, + changes=dict(), + ) return response_ def get_tags_by_dataset_id(self, dataset_id, **kwargs): _check_dataset_id(dataset_id) - if dataset_id == 'xyz-no-tags': + if dataset_id == "xyz-no-tags": return [] - tag_1 = TagData(id='inital_tag_id', dataset_id=dataset_id, prev_tag_id=None, - bit_mask_data="0xF", name='initial-tag', tot_size=4, - created_at=1577836800, changes=dict()) - tag_2 = TagData(id='query_tag_id_xyz', dataset_id=dataset_id, prev_tag_id="initial-tag", - bit_mask_data="0xF", name='query_tag_name_xyz', tot_size=4, - created_at=1577836800, changes=dict()) - tag_3 = TagData(id='preselected_tag_id_xyz', dataset_id=dataset_id, prev_tag_id="initial-tag", - bit_mask_data="0x1", name='preselected_tag_name_xyz', tot_size=4, - created_at=1577836800, changes=dict()) - tag_4 = TagData(id='selected_tag_xyz', dataset_id=dataset_id, prev_tag_id="preselected_tag_id_xyz", - bit_mask_data="0x3", name='selected_tag_xyz', tot_size=4, - created_at=1577836800, changes=dict()) - tag_5 = TagData(id='tag_with_integer_name', dataset_id=dataset_id, prev_tag_id=None, - bit_mask_data='0x1', name='1000', tot_size=4, - created_at=1577836800, changes=dict()) + tag_1 = TagData( + id="inital_tag_id", + dataset_id=dataset_id, + prev_tag_id=None, + bit_mask_data="0xF", + name="initial-tag", + tot_size=4, + created_at=1577836800, + changes=dict(), + ) + tag_2 = TagData( + id="query_tag_id_xyz", + dataset_id=dataset_id, + prev_tag_id="initial-tag", + bit_mask_data="0xF", + name="query_tag_name_xyz", + tot_size=4, + created_at=1577836800, + changes=dict(), + ) + tag_3 = TagData( + id="preselected_tag_id_xyz", + dataset_id=dataset_id, + prev_tag_id="initial-tag", + bit_mask_data="0x1", + name="preselected_tag_name_xyz", + tot_size=4, + created_at=1577836800, + changes=dict(), + ) + tag_4 = TagData( + id="selected_tag_xyz", + dataset_id=dataset_id, + prev_tag_id="preselected_tag_id_xyz", + bit_mask_data="0x3", + name="selected_tag_xyz", + tot_size=4, + created_at=1577836800, + changes=dict(), + ) + tag_5 = TagData( + id="tag_with_integer_name", + dataset_id=dataset_id, + prev_tag_id=None, + bit_mask_data="0x1", + name="1000", + tot_size=4, + created_at=1577836800, + changes=dict(), + ) tags = [tag_1, tag_2, tag_3, tag_4, tag_5] no_tags_to_return = getattr(self, "no_tags", 5) tags = tags[:no_tags_to_return] return tags - def perform_tag_arithmetics(self, body: TagArithmeticsRequest, dataset_id, **kwargs): + def perform_tag_arithmetics( + self, body: TagArithmeticsRequest, dataset_id, **kwargs + ): _check_dataset_id(dataset_id) - if (body.new_tag_name is None) or (body.new_tag_name == ''): + if (body.new_tag_name is None) or (body.new_tag_name == ""): return TagBitMaskResponse(bit_mask_data="0x2") else: return CreateEntityResponse(id="tag-arithmetic-created") - def perform_tag_arithmetics_bitmask(self, body: TagArithmeticsRequest, dataset_id, **kwargs): + def perform_tag_arithmetics_bitmask( + self, body: TagArithmeticsRequest, dataset_id, **kwargs + ): _check_dataset_id(dataset_id) return TagBitMaskResponse(bit_mask_data="0x2") @@ -199,11 +355,18 @@ def upsize_tags_by_dataset_id(self, body, dataset_id, **kwargs): _check_dataset_id(dataset_id) assert body.upsize_tag_creator == TagCreator.USER_PIP - def create_tag_by_dataset_id(self, body, dataset_id, **kwargs): + def create_tag_by_dataset_id(self, body, dataset_id, **kwargs) -> TagData: _check_dataset_id(dataset_id) - tag = TagData(id='inital_tag_id', dataset_id=dataset_id, prev_tag_id=body['prev_tag_id'], - bit_mask_data=body['bit_mask_data'], name=body['name'], tot_size=10, - created_at=1577836800, changes=dict()) + tag = TagData( + id="inital_tag_id", + dataset_id=dataset_id, + prev_tag_id=body["prev_tag_id"], + bit_mask_data=body["bit_mask_data"], + name=body["name"], + tot_size=10, + created_at=1577836800, + changes=dict(), + ) return tag def delete_tag_by_tag_id(self, dataset_id, tag_id, **kwargs): @@ -214,14 +377,82 @@ def delete_tag_by_tag_id(self, dataset_id, tag_id, **kwargs): # assert that tag is a leaf assert all([tag.prev_tag_id != tag_id for tag in tags]) - def export_tag_to_label_studio_tasks(self, dataset_id: str, tag_id: str): - return [{'id': 0, 'data': {'image': 'https://api.lightly.ai/v1/datasets/62383ab8f9cb290cd83ab5f9/samples/62383cb7e6a0f29e3f31e213/readurlRedirect?type=full&CENSORED', 'lightlyFileName': '2008_006249_jpg.rf.fdd64460945ca901aa3c7e48ffceea83.jpg', 'lightlyMetaInfo': {'type': 'IMAGE', 'datasetId': '62383ab8f9cb290cd83ab5f9', 'fileName': '2008_006249_jpg.rf.fdd64460945ca901aa3c7e48ffceea83.jpg', 'exif': {}, 'index': 0, 'createdAt': 1647852727873, 'lastModifiedAt': 1647852727873, 'metaData': {'sharpness': 27.31265790443818, 'sizeInBytes': 48224, 'snr': 2.1969673926211217, 'mean': [0.24441662557257224, 0.4460417517905863, 0.6960984853824035], 'shape': [167, 500, 3], 'std': [0.12448681278605961, 0.09509570033043004, 0.0763725998175394], 'sumOfSquares': [6282.243860049413, 17367.702452895475, 40947.22059208768], 'sumOfValues': [20408.78823530978, 37244.486274513954, 58124.22352943069]}}}}] + def export_tag_to_label_studio_tasks( + self, dataset_id: str, tag_id: str, **kwargs + ) -> List[LabelStudioTask]: + if kwargs["page_offset"] and kwargs["page_offset"] > 0: + return [] + return [ + LabelStudioTask( + id = 0, + data = LabelStudioTaskData( + image = "https://api.lightly.ai/v1/datasets/62383ab8f9cb290cd83ab5f9/samples/62383cb7e6a0f29e3f31e213/readurlRedirect?type=full&CENSORED", + lightly_file_name = "2008_006249_jpg.rf.fdd64460945ca901aa3c7e48ffceea83.jpg", + lightly_meta_info = SampleData( + id = "sample_id_0", + type = "IMAGE", + dataset_id = dataset_id, + file_name = "2008_006249_jpg.rf.fdd64460945ca901aa3c7e48ffceea83.jpg", + exif = {}, + index = 0, + created_at = 1647852727873, + last_modified_at = 1647852727873, + meta_data = SampleMetaData( + sharpness = 27.31265790443818, + size_in_bytes = 48224, + snr = 2.1969673926211217, + mean = [ + 0.24441662557257224, + 0.4460417517905863, + 0.6960984853824035, + ], + shape = [167, 500, 3], + std = [ + 0.12448681278605961, + 0.09509570033043004, + 0.0763725998175394, + ], + sum_of_squares = [ + 6282.243860049413, + 17367.702452895475, + 40947.22059208768, + ], + sum_of_values = [ + 20408.78823530978, + 37244.486274513954, + 58124.22352943069, + ], + ), + ), + ) + ) + ] + + def export_tag_to_label_box_data_rows( + self, dataset_id: str, tag_id: str, **kwargs + ) -> List[LabelBoxDataRow]: + if kwargs["page_offset"] and kwargs["page_offset"] > 0: + return [] + return [ + LabelBoxDataRow( + external_id = "2008_007291_jpg.rf.2fca436925b52ea33cf897125a34a2fb.jpg", + image_url = "https://api.lightly.ai/v1/datasets/62383ab8f9cb290cd83ab5f9/samples/62383cb7e6a0f29e3f31e233/readurlRedirect?type=CENSORED", + ) + ] + def export_tag_to_basic_filenames_and_read_urls( + self, dataset_id: str, tag_id: str, **kwargs + ) -> List[FilenameAndReadUrl]: + if kwargs["page_offset"] and kwargs["page_offset"] > 0: + return [] + return [ + FilenameAndReadUrl( + file_name = "export-basic-test-sample-0.png", + read_url = "https://storage.googleapis.com/somwhere/export-basic-test-sample-0.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=CENSORED", + ) + ] - def export_tag_to_label_box_data_rows(self, dataset_id: str, tag_id: str): - return [{'externalId': '2008_007291_jpg.rf.2fca436925b52ea33cf897125a34a2fb.jpg', 'imageUrl': 'https://api.lightly.ai/v1/datasets/62383ab8f9cb290cd83ab5f9/samples/62383cb7e6a0f29e3f31e233/readurlRedirect?type=CENSORED'}] - - def export_tag_to_basic_filenames(self, dataset_id: str, tag_id: str): + def export_tag_to_basic_filenames(self, dataset_id: str, tag_id: str) -> str: return """ IMG_2276_jpeg_jpg.rf.7411b1902c81bad8cdefd2cc4eb3a97b.jpg IMG_2285_jpeg_jpg.rf.4a93d99b9f0b6cccfb27bf2f4a13b99e.jpg @@ -230,8 +461,9 @@ def export_tag_to_basic_filenames(self, dataset_id: str, tag_id: str): class MockedScoresApi(ScoresApi): - def create_or_update_active_learning_score_by_tag_id(self, body, dataset_id, tag_id, **kwargs) -> \ - CreateEntityResponse: + def create_or_update_active_learning_score_by_tag_id( + self, body, dataset_id, tag_id, **kwargs + ) -> CreateEntityResponse: _check_dataset_id(dataset_id) if len(body.scores) > 0 and not isinstance(body.scores[0], float): raise AttributeError @@ -245,15 +477,14 @@ def __init__(self, samples_api, *args, **kwargs): MappingsApi.__init__(self, *args, **kwargs) self.n_samples = N_FILES_ON_SERVER - sample_names = [f'img_{i}.jpg' for i in range(self.n_samples)] + sample_names = [f"img_{i}.jpg" for i in range(self.n_samples)] sample_names.reverse() self.sample_names = sample_names - def get_sample_mappings_by_dataset_id(self, dataset_id, field, **kwargs): - if dataset_id == 'xyz-no-tags': + if dataset_id == "xyz-no-tags": return [] - return self.sample_names[:self.n_samples] + return self.sample_names[: self.n_samples] class MockedSamplesApi(SamplesApi): @@ -261,43 +492,39 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.sample_create_requests: List[SampleCreateRequest] = [] - def get_samples_by_dataset_id( - self, dataset_id, **kwargs - ) -> List[SampleData]: + def get_samples_by_dataset_id(self, dataset_id, **kwargs) -> List[SampleData]: samples = [] for i, body in enumerate(self.sample_create_requests): sample = SampleData( - id=f'{i}_xyz', - dataset_id='dataset_id_xyz', + id=f"{i}_xyz", + dataset_id="dataset_id_xyz", file_name=body.file_name, - type='Images', + type="Images", ) samples.append(sample) return samples - + def get_samples_partial_by_dataset_id( self, - dataset_id = 'dataset_id_xyz', + dataset_id="dataset_id_xyz", mode: SamplePartialMode = SamplePartialMode.FULL, - **kwargs + **kwargs, ) -> List[SampleData]: samples = [] for i, body in enumerate(self.sample_create_requests): - if mode==SamplePartialMode.IDS: - sample = SampleDataModes( - id=f'{i}_xyz' - ) - elif mode==SamplePartialMode.FILENAMES: + if mode == SamplePartialMode.IDS: + sample = SampleDataModes(id=f"{i}_xyz") + elif mode == SamplePartialMode.FILENAMES: sample = SampleDataModes( - id=f'{i}_xyz', + id=f"{i}_xyz", file_name=body.file_name, ) else: sample = SampleDataModes( - id=f'{i}_xyz', - dataset_id=dataset_id, + id=f"{i}_xyz", + dataset_id=dataset_id, file_name=body.file_name, - type='Images', + type="Images", ) samples.append(sample) return samples @@ -309,7 +536,9 @@ def create_sample_by_dataset_id(self, body, dataset_id, **kwargs): self.sample_create_requests.append(body) return response_ - def get_sample_image_write_url_by_id(self, dataset_id, sample_id, is_thumbnail, **kwargs): + def get_sample_image_write_url_by_id( + self, dataset_id, sample_id, is_thumbnail, **kwargs + ): _check_dataset_id(dataset_id) url = f"{sample_id}_write_url" return url @@ -319,7 +548,9 @@ def get_sample_image_read_url_by_id(self, dataset_id, sample_id, type, **kwargs) url = f"{sample_id}_write_url" return url - def get_sample_image_write_urls_by_id(self, dataset_id, sample_id, **kwargs) -> SampleWriteUrls: + def get_sample_image_write_urls_by_id( + self, dataset_id, sample_id, **kwargs + ) -> SampleWriteUrls: _check_dataset_id(dataset_id) thumb_url = f"{sample_id}_thumb_write_url" full_url = f"{sample_id}_full_write_url" @@ -336,14 +567,15 @@ def __init__(self, api_client): no_datasets = 3 self.default_datasets = [ DatasetData( - name=f"dataset_{i}", - id=f"dataset_{i}_id", + name=f"dataset_{i}", + id=f"dataset_{i}_id", last_modified_at=i, - type="", img_type="full", - size_in_bytes=-1, - n_samples=-1, + type="", + img_type="full", + size_in_bytes=-1, + n_samples=-1, created_at=-1, - user_id='user_0', + user_id="user_0", ) for i in range(no_datasets) ] @@ -364,26 +596,28 @@ def dataset_exists(self, dataset_id: str): def create_dataset(self, body: DatasetCreateRequest, **kwargs): assert isinstance(body, DatasetCreateRequest) id = body.name + "_id" - if body.name == 'xyz-no-tags': - id = 'xyz-no-tags' + if body.name == "xyz-no-tags": + id = "xyz-no-tags" dataset = DatasetData( - id=id, - name=body.name, + id=id, + name=body.name, last_modified_at=len(self.datasets) + 1, - type="Images", - size_in_bytes=-1, - n_samples=-1, + type="Images", + size_in_bytes=-1, + n_samples=-1, created_at=-1, - user_id='user_0', + user_id="user_0", ) self.datasets += [dataset] response_ = CreateEntityResponse(id=id) return response_ - def get_dataset_by_id(self, dataset_id): _check_dataset_id(dataset_id) - dataset = next((dataset for dataset in self.default_datasets if dataset_id == dataset.id), None) + dataset = next( + (dataset for dataset in self.default_datasets if dataset_id == dataset.id), + None, + ) if dataset is None: raise ApiException() return dataset @@ -394,7 +628,9 @@ def register_dataset_upload_by_id(self, body, dataset_id): def delete_dataset_by_id(self, dataset_id, **kwargs): _check_dataset_id(dataset_id) - datasets_without_that_id = [dataset for dataset in self.datasets if dataset.id != dataset_id] + datasets_without_that_id = [ + dataset for dataset in self.datasets if dataset.id != dataset_id + ] assert len(datasets_without_that_id) == len(self.datasets) - 1 self.datasets = datasets_without_that_id @@ -410,8 +646,12 @@ def __init__(self, api_client=None): def reset(self): - local_datasource = DatasourceConfigBase(type='LOCAL', full_path='', purpose='INPUT_OUTPUT').to_dict() - azure_datasource = DatasourceConfigBase(type='AZURE', full_path='', purpose='INPUT_OUTPUT').to_dict() + local_datasource = DatasourceConfigBase( + type="LOCAL", full_path="", purpose="INPUT_OUTPUT" + ).to_dict() + azure_datasource = DatasourceConfigBase( + type="AZURE", full_path="", purpose="INPUT_OUTPUT" + ).to_dict() self._datasources = { "dataset_id_xyz": local_datasource, @@ -422,9 +662,7 @@ def reset(self): def _default_samples(self): return [ - DatasourceRawSamplesDataRow( - file_name=f"file_{i}", read_url=f"url_{i}" - ) + DatasourceRawSamplesDataRow(file_name=f"file_{i}", read_url=f"url_{i}") for i in range(self._num_samples) ] @@ -442,17 +680,20 @@ def get_datasource_processed_until_timestamp_by_dataset_id( return DatasourceProcessedUntilTimestampResponse(timestamp) def get_list_of_raw_samples_from_datasource_by_dataset_id( - self, - dataset_id, - cursor: str = None, - _from: int = None, - to: int = None, - relevant_filenames_file_name: str = -1, - use_redirected_read_url: bool = False, + self, + dataset_id, + cursor: str = None, + _from: int = None, + to: int = None, + relevant_filenames_file_name: str = -1, + use_redirected_read_url: bool = False, ) -> DatasourceRawSamplesData: if relevant_filenames_file_name == -1: samples = self._samples[dataset_id] - elif isinstance(relevant_filenames_file_name, str) and len(relevant_filenames_file_name) > 0: + elif ( + isinstance(relevant_filenames_file_name, str) + and len(relevant_filenames_file_name) > 0 + ): samples = self._samples[dataset_id][::2] else: raise RuntimeError("DATASET_DATASOURCE_RELEVANT_FILENAMES_INVALID") @@ -482,7 +723,8 @@ def get_list_of_raw_samples_from_datasource_by_dataset_id( ) def get_list_of_raw_samples_predictions_from_datasource_by_dataset_id( - self, dataset_id: str, + self, + dataset_id: str, task_name: str, cursor: str = None, _from: int = None, @@ -547,24 +789,24 @@ def get_list_of_raw_samples_metadata_from_datasource_by_dataset_id( data=samples, ) - - def get_prediction_file_read_url_from_datasource_by_dataset_id(self, *args, **kwargs): - return 'https://my-read-url.com' - + def get_prediction_file_read_url_from_datasource_by_dataset_id( + self, *args, **kwargs + ): + return "https://my-read-url.com" def update_datasource_by_dataset_id( self, body: DatasourceConfig, dataset_id: str, **kwargs ) -> None: - # TODO: Enable assert once we switch/update to new api code generator. - # assert isinstance(body, DatasourceConfig) - self._datasources[dataset_id] = body # type: ignore + # TODO: Enable assert once we switch/update to new api code generator. + # assert isinstance(body, DatasourceConfig) + self._datasources[dataset_id] = body # type: ignore def update_datasource_processed_until_timestamp_by_dataset_id( self, body, dataset_id, **kwargs ) -> None: assert isinstance(body, DatasourceProcessedUntilTimestampRequest) to = body.processed_until_timestamp - self._processed_until_timestamp[dataset_id] = to # type: ignore + self._processed_until_timestamp[dataset_id] = to # type: ignore class MockedComputeWorkerApi(DockerApi): @@ -608,22 +850,22 @@ def __init__(self, api_client=None): def register_docker_worker(self, body, **kwargs): assert isinstance(body, CreateDockerWorkerRegistryEntryRequest) - return CreateEntityResponse(id='worker-id-123') + return CreateEntityResponse(id="worker-id-123") def delete_docker_worker_registry_entry_by_id(self, worker_id, **kwargs): - assert worker_id == 'worker-id-123' + assert worker_id == "worker-id-123" def get_docker_worker_registry_entries(self, **kwargs): return self._registered_workers def create_docker_worker_config(self, body, **kwargs): assert isinstance(body, DockerWorkerConfigCreateRequest) - return CreateEntityResponse(id='worker-config-id-123') + return CreateEntityResponse(id="worker-config-id-123") def create_docker_run_scheduled_by_dataset_id(self, body, dataset_id, **kwargs): assert isinstance(body, DockerRunScheduledCreateRequest) _check_dataset_id(dataset_id) - return CreateEntityResponse(id=f'scheduled-run-id-123-dataset-{dataset_id}') + return CreateEntityResponse(id=f"scheduled-run-id-123-dataset-{dataset_id}") def get_docker_runs(self, **kwargs): return self._compute_worker_runs @@ -641,54 +883,84 @@ def get_latest_pip_version(self, **kwargs): def get_minimum_compatible_pip_version(self, **kwargs): return "1.0.0" + class MockedQuotaApi(QuotaApi): def get_quota_maximum_dataset_size(self, **kwargs): return "60000" + def mocked_request_put(dst_url: str, data=IOBase) -> Response: assert isinstance(dst_url, str) content_bytes: bytes = data.read() - content_str: str = content_bytes.decode('utf-8') - assert content_str.startswith('filenames') + content_str: str = content_bytes.decode("utf-8") + assert content_str.startswith("filenames") response_ = Response() response_.status_code = 200 return response_ class MockedApiClient(ApiClient): - def request(self, method, url, query_params=None, headers=None, - post_params=None, body=None, _preload_content=True, - _request_timeout=None): - raise ValueError("ERROR: calling ApiClient.request(), but this should be mocked.") + def request( + self, + method, + url, + query_params=None, + headers=None, + post_params=None, + body=None, + _preload_content=True, + _request_timeout=None, + ): + raise ValueError( + "ERROR: calling ApiClient.request(), but this should be mocked." + ) - def call_api(self, resource_path, method, - path_params=None, query_params=None, header_params=None, - body=None, post_params=None, files=None, - response_type=None, auth_settings=None, async_req=None, - _return_http_data_only=None, collection_formats=None, - _preload_content=True, _request_timeout=None): - raise ValueError("ERROR: calling ApiClient.call_api(), but this should be mocked.") + def call_api( + self, + resource_path, + method, + path_params=None, + query_params=None, + header_params=None, + body=None, + post_params=None, + files=None, + response_type=None, + auth_settings=None, + async_req=None, + _return_http_data_only=None, + collection_formats=None, + _preload_content=True, + _request_timeout=None, + ): + raise ValueError( + "ERROR: calling ApiClient.call_api(), but this should be mocked." + ) class MockedAPICollaboration(CollaborationApi): - def create_or_update_shared_access_config_by_dataset_id(self, body, dataset_id, **kwargs): + def create_or_update_shared_access_config_by_dataset_id( + self, body, dataset_id, **kwargs + ): assert isinstance(body, SharedAccessConfigCreateRequest) - return CreateEntityResponse(id='access-share-config') + return CreateEntityResponse(id="access-share-config") def get_shared_access_configs_by_dataset_id(self, dataset_id, **kwargs): write_config = SharedAccessConfigData( - id='some-id', - owner='owner-id', - users=["user1@gmail.com", "user2@something.com"], - organizations=['some-id'], + id="some-id", + owner="owner-id", + users=["user1@gmail.com", "user2@something.com"], + organizations=["some-id"], created_at=Timestamp(0), last_modified_at=Timestamp(0), - access_type=SharedAccessType.WRITE) + access_type=SharedAccessType.WRITE, + ) return [write_config] + class MockedApiWorkflowClient(ApiWorkflowClient): - embeddings_filename_base = 'img' + embeddings_filename_base = "img" n_embedding_rows_on_server = N_FILES_ON_SERVER def __init__(self, *args, **kwargs): @@ -701,8 +973,9 @@ def __init__(self, *args, **kwargs): self._tags_api = MockedTagsApi(api_client=self.api_client) self._embeddings_api = MockedEmbeddingsApi(api_client=self.api_client) self._samples_api = MockedSamplesApi(api_client=self.api_client) - self._mappings_api = MockedMappingsApi(api_client=self.api_client, - samples_api=self._samples_api) + self._mappings_api = MockedMappingsApi( + api_client=self.api_client, samples_api=self._samples_api + ) self._scores_api = MockedScoresApi(api_client=self.api_client) self._datasets_api = MockedDatasetsApi(api_client=self.api_client) self._datasources_api = MockedDatasourcesApi(api_client=self.api_client) @@ -715,9 +988,13 @@ def __init__(self, *args, **kwargs): self.wait_time_till_next_poll = 0.001 # for api_workflow_selection def upload_file_with_signed_url( - self, file: IOBase, signed_write_url: str, - max_backoff: int = 32, max_retries: int = 5, headers: Dict = None, - session: Optional[requests.Session] = None, + self, + file: IOBase, + signed_write_url: str, + max_backoff: int = 32, + max_retries: int = 5, + headers: Dict = None, + session: Optional[requests.Session] = None, ) -> Response: res = Response() return res @@ -726,9 +1003,11 @@ def _get_csv_reader_from_read_url(self, read_url: str): n_rows: int = self.n_embedding_rows_on_server n_dims: int = self.n_dims_embeddings_on_server - rows_csv = [['filenames'] + [f'embedding_{i}' for i in range(n_dims)] + ['labels']] + rows_csv = [ + ["filenames"] + [f"embedding_{i}" for i in range(n_dims)] + ["labels"] + ] for i in range(n_rows): - row = [f'{self.embeddings_filename_base}_{i}.jpg'] + row = [f"{self.embeddings_filename_base}_{i}.jpg"] for _ in range(n_dims): row.append(np.random.uniform(0, 1)) row.append(i) @@ -747,9 +1026,9 @@ def _get_csv_reader_from_read_url(self, read_url: str): class MockedApiWorkflowSetup(unittest.TestCase): - EMBEDDINGS_FILENAME_BASE: str = 'sample' - - def setUp(self, token="token_xyz", dataset_id="dataset_id_xyz") -> None: - self.api_workflow_client = MockedApiWorkflowClient(token=token, dataset_id=dataset_id) - + EMBEDDINGS_FILENAME_BASE: str = "sample" + def setUp(self, token="token_xyz", dataset_id="dataset_id_xyz") -> None: + self.api_workflow_client = MockedApiWorkflowClient( + token=token, dataset_id=dataset_id + ) diff --git a/tests/api_workflow/test_api_workflow_download_dataset.py b/tests/api_workflow/test_api_workflow_download_dataset.py index 47aebfa34..37265a60a 100644 --- a/tests/api_workflow/test_api_workflow_download_dataset.py +++ b/tests/api_workflow/test_api_workflow_download_dataset.py @@ -5,6 +5,9 @@ import PIL import numpy as np +from lightly.openapi_generated.swagger_client.models.filename_and_read_url import FilenameAndReadUrl +from lightly.openapi_generated.swagger_client.models.label_box_data_row import LabelBoxDataRow +from lightly.openapi_generated.swagger_client.models.label_studio_task import LabelStudioTask import torchvision @@ -44,14 +47,18 @@ def my_func(read_url): def test_export_label_box_data_rows_by_tag_name(self): rows = self.api_workflow_client.export_label_box_data_rows_by_tag_name('initial-tag') self.assertIsNotNone(rows) - self.assertTrue(all(isinstance(row, dict) for row in rows)) + self.assertTrue(all(isinstance(row, LabelBoxDataRow) for row in rows)) def test_export_label_studio_tasks_by_tag_name(self): tasks = self.api_workflow_client.export_label_studio_tasks_by_tag_name('initial-tag') self.assertIsNotNone(tasks) - self.assertTrue(all(isinstance(task, dict) for task in tasks)) + self.assertTrue(all(isinstance(task, LabelStudioTask) for task in tasks)) + def test_export_tag_to_basic_filenames_and_read_urls(self): + filenames_and_read_urls = self.api_workflow_client.export_filenames_and_read_urls_by_tag_name('initial-tag') + self.assertIsNotNone(filenames_and_read_urls) + self.assertTrue(all(isinstance(filenames_and_read_url, FilenameAndReadUrl) for filenames_and_read_url in filenames_and_read_urls)) def test_export_filenames_by_tag_name(self): filenames = self.api_workflow_client.export_filenames_by_tag_name('initial-tag')