From 0a37127021f4dd950d664bf3a3203b72cc446bbb Mon Sep 17 00:00:00 2001 From: Chris Schopp <56572144+chrisschopp@users.noreply.github.com> Date: Sun, 1 Dec 2024 04:07:30 +0000 Subject: [PATCH 1/7] Move `kedro-catalog` JSON schema to `kedro-datasets` #4258 Signed-off-by: Chris Schopp <56572144+chrisschopp@users.noreply.github.com> --- static/jsonschema/kedro-catalog-0.15.9.json | 1202 --------------- static/jsonschema/kedro-catalog-0.16.json | 763 ---------- static/jsonschema/kedro-catalog-0.17.json | 950 ------------ static/jsonschema/kedro-catalog-0.18.json | 1423 ------------------ static/jsonschema/kedro-catalog-0.19.json | 1470 ------------------- 5 files changed, 5808 deletions(-) delete mode 100644 static/jsonschema/kedro-catalog-0.15.9.json delete mode 100644 static/jsonschema/kedro-catalog-0.16.json delete mode 100644 static/jsonschema/kedro-catalog-0.17.json delete mode 100644 static/jsonschema/kedro-catalog-0.18.json delete mode 100644 static/jsonschema/kedro-catalog-0.19.json diff --git a/static/jsonschema/kedro-catalog-0.15.9.json b/static/jsonschema/kedro-catalog-0.15.9.json deleted file mode 100644 index a5e755569d..0000000000 --- a/static/jsonschema/kedro-catalog-0.15.9.json +++ /dev/null @@ -1,1202 +0,0 @@ -{ - "type": "object", - "patternProperties": { - "^[a-z0-9-_]+$": { - "required": ["type"], - "properties": { - "type": { - "type": "string", - "enum": [ - "networkx.NetworkXDataSet", - "dask.ParquetDataSet", - "biosequence.BioSequenceDataSet", - "matplotlib.MatplotlibWriter", - "yaml.YAMLDataSet", - "pickle.PickleDataSet", - "text.TextDataSet", - "spark.SparkJDBCDataSet", - "spark.SparkHiveDataSet", - "spark.SparkDataSet", - "pandas.JSONBlobDataSet", - "pandas.JSONDataSet", - "pandas.SQLTableDataSet", - "pandas.SQLQueryDataSet", - "pandas.ParquetDataSet", - "pandas.FeatherDataSet", - "pandas.CSVBlobDataSet", - "pandas.HDFDataSet", - "pandas.CSVDataSet", - "pandas.ExcelDataSet", - "pandas.GBQTableDataSet", - "PickleLocalDataSet", - "JSONLocalDataSet", - "HDFLocalDataSet", - "PartitionedDataSet", - "CachedDataSet", - "JSONDataSet", - "CSVHTTPDataSet", - "MemoryDataSet", - "CSVLocalDataSet", - "ExcelLocalDataSet", - "LambdaDataSet", - "HDFS3DataSet", - "PickleS3DataSet", - "SQLTableDataSet", - "SQLQueryDataSet", - "CSVS3DataSet", - "ParquetLocalDataSet", - "TextLocalDataSet" - ] - } - }, - "allOf": [ - { - "if": { - "properties": { "type": { "const": "networkx.NetworkXDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "The path to the NetworkX graph JSON file." - }, - "load_args": { - "type": "object", - "description": "Arguments passed on to ```networkx.node_link_graph``.\nSee the details in\nhttps://networkx.org/documentation/networkx-1.9.1/reference/generated/networkx.readwrite.json_graph.node_link_graph.html" - }, - "save_args": { - "type": "object", - "description": "Arguments passed on to ```networkx.node_link_data``.\nSee the details in\nhttps://networkx.org/documentation/networkx-1.9.1/reference/generated/networkx.readwrite.json_graph.node_link_data.html" - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class.\nE.g. for ``GCSFileSystem`` class: `{\"project\": \"my-project\", ...}`" - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "dask.ParquetDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Path to a parquet file\nparquet collection or the directory of a multipart parquet." - }, - "load_args": { - "type": "object", - "description": "Additional loading options `dask.dataframe.read_parquet`:\nhttps://docs.dask.org/en/latest/dataframe-api.html#dask.dataframe.read_parquet" - }, - "save_args": { - "type": "object", - "description": "Additional saving options for `dask.dataframe.to_parquet`:\nhttps://docs.dask.org/en/latest/dataframe-api.html#dask.dataframe.to_parquet" - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Optional parameters to the backend file system driver:\nhttps://docs.dask.org/en/latest/remote-data-services.html#optional-parameters" - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { - "properties": { - "type": { "const": "biosequence.BioSequenceDataSet" } - } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "path to sequence file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``." - }, - "load_args": { - "type": "object", - "description": "Options for parsing sequence files by Biopython ``SeqIO.parse()``." - }, - "save_args": { - "type": "object", - "description": "file format supported by Biopython ``SeqIO.write()``.\nE.g. `{\"format\": \"fasta\"}`." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class.\nE.g. for ``GCSFileSystem`` class: `{\"project\": \"my-project\", ...}`." - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention\n\nNote: Here you can find all supported file formats: https://biopython.org/wiki/SeqIO" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "matplotlib.MatplotlibWriter" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Key path to a matplot object file(s) prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class.\nE.g. for ``GCSFileSystem`` class: `{\"project\": \"my-project\", ...}`" - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``S3FileSystem`` it should look like:\n`{'client_kwargs': {'aws_access_key_id': '', 'aws_secret_access_key': ''}}`" - }, - "save_args": { - "type": "object", - "description": "Save args passed to `plt.savefig`. See\nhttps://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html" - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { "properties": { "type": { "const": "yaml.YAMLDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a YAML file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "save_args": { - "type": "object", - "description": "PyYAML options for saving YAML files (arguments passed\ninto ```yaml.dump``). Here you can find all available arguments:\nhttps://pyyaml.org/wiki/PyYAMLDocumentation\nAll defaults are preserved, but \"default_flow_style\", which is set to False." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class.\nE.g. for ``GCSFileSystem`` class: `{\"project\": \"my-project\", ...}`." - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pickle.PickleDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a Pickle file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pickle options for loading pickle files.\nHere you can find all available arguments:\nhttps://docs.python.org/3/library/pickle.html#pickle.loads\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pickle options for saving pickle files.\nHere you can find all available arguments:\nhttps://docs.python.org/3/library/pickle.html#pickle.dumps\nAll defaults are preserved." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class.\nE.g. for ``GCSFileSystem`` class: `{\"project\": \"my-project\", ...}`." - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { "properties": { "type": { "const": "text.TextDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Load arguments should be specified in accordance with\nthe open function of the underlying filesystem. E.g. for local file\nhttps://docs.python.org/3/library/functions.html#open" - }, - "save_args": { - "type": "object", - "description": "Save arguments should be specified in accordance with\nthe open function of the underlying filesystem. E.g. for local file\nhttps://docs.python.org/3/library/functions.html#open" - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class.\nE.g. for ``GCSFileSystem`` class: `{\"project\": \"my-project\", ...}`." - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "spark.SparkJDBCDataSet" } } - }, - "then": { - "required": ["url", "table"], - "properties": { - "url": { - "type": "string", - "description": "A JDBC URL of the form ``jdbc:subprotocol:subname``." - }, - "table": { - "type": "string", - "description": "The name of the table to load or save data to." - }, - "credentials": { - "type": "object", - "description": "A dictionary of JDBC database connection arguments.\nNormally at least properties ``user`` and ``password`` with\ntheir corresponding values. It updates ``properties``\nparameter in ``load_args`` and ``save_args`` in case it is\nprovided." - }, - "load_args": { - "type": "object", - "description": "Provided to underlying PySpark ``jdbc`` function along\nwith the JDBC URL and the name of the table. To find all\nsupported arguments, see here:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrameReader.jdbc.html" - }, - "save_args": { - "type": "object", - "description": "Provided to underlying PySpark ``jdbc`` function along\nwith the JDBC URL and the name of the table. To find all\nsupported arguments, see here:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrameWriter.jdbc.html" - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "spark.SparkHiveDataSet" } } - }, - "then": { - "required": ["database", "table", "write_mode"], - "properties": { - "database": { - "type": "string", - "description": "The name of the hive database." - }, - "table": { - "type": "string", - "description": "The name of the table within the database." - }, - "write_mode": { - "type": "string", - "description": "``insert``, ``upsert`` or ``overwrite`` are supported." - }, - "table_pk": { - "type": "array", - "description": "If performing an upsert, this identifies the primary key columns used to\nresolve preexisting data. Is required for ``write_mode=\"upsert\"``." - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { "properties": { "type": { "const": "spark.SparkDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Path to a Spark dataframe. When using Databricks\nand working with data written to mount path points,\nspecify ``filepath``s for (versioned) ``SparkDataSet``s\nstarting with ``/dbfs/mnt``." - }, - "file_format": { - "type": "string", - "description": "File format used during load and save\noperations. These are formats supported by the running\nSparkContext include parquet, csv. For a list of supported\nformats please refer to Apache Spark documentation at\nhttps://spark.apache.org/docs/latest/sql-programming-guide.html" - }, - "load_args": { - "type": "object", - "description": "Load args passed to Spark DataFrameReader load method.\nIt is dependent on the selected file format. You can find\na list of read options for each supported format\nin Spark DataFrame read documentation:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrame.html" - }, - "save_args": { - "type": "object", - "description": "Save args passed to Spark DataFrame write options.\nSimilar to load_args this is dependent on the selected file\nformat. You can pass ``mode`` and ``partitionBy`` to specify\nyour overwrite mode and partitioning respectively. You can find\na list of options for each format in Spark DataFrame\nwrite documentation:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrame.html" - }, - "credentials": { - "type": "object", - "description": "Credentials to access the S3 bucket, such as\n``aws_access_key_id``, ``aws_secret_access_key``, if ``filepath``\nprefix is ``s3a://`` or ``s3n://``. Optional keyword arguments passed to\n``hdfs.client.InsecureClient`` if ``filepath`` prefix is ``hdfs://``.\nIgnored otherwise." - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.JSONBlobDataSet" } } - }, - "then": { - "required": ["filepath", "container_name", "credentials"], - "properties": { - "filepath": { - "type": "string", - "description": "Path to an Azure Blob of a JSON file." - }, - "container_name": { - "type": "string", - "description": "Azure container name." - }, - "credentials": { - "type": "object", - "description": "Credentials (``account_name`` and\n``account_key`` or ``sas_token``) to access the Azure Blob Storage." - }, - "encoding": { - "type": "string", - "description": "Default utf-8. Defines encoding of JSON files downloaded as binary streams." - }, - "blob_from_bytes_args": { - "type": "object", - "description": "Any additional arguments to pass to Azure's\n``create_blob_from_bytes`` method:\nhttps://docs.microsoft.com/en-us/python/api/azure.storage.blob.blockblobservice.blockblobservice?view=azure-python#create-blob-from-bytes" - }, - "blob_to_bytes_args": { - "type": "object", - "description": "Any additional arguments to pass to Azure's\n``get_blob_to_bytes`` method:\nhttps://docs.microsoft.com/en-us/python/api/azure.storage.blob.baseblobservice.baseblobservice?view=azure-python#get-blob-to-bytes" - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading JSON files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_json.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving JSON files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_json.html\nAll defaults are preserved, but \"index\", which is set to False." - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { "properties": { "type": { "const": "pandas.JSONDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a JSON file prefixed with a protocol like `s3://`.\nIf prefix is not provided `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading JSON files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_json.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving JSON files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_json.html\nAll defaults are preserved, but \"index\", which is set to False." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{'token': None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class.\nE.g. for ``GCSFileSystem`` class: `{project: 'my-project', ...}`." - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.SQLTableDataSet" } } - }, - "then": { - "required": ["table_name", "credentials"], - "properties": { - "table_name": { - "type": "string", - "description": "The table name to load or save data to. It\noverwrites name in ``save_args`` and ``table_name``\nparameters in ``load_args``." - }, - "credentials": { - "type": "object", - "description": "A dictionary with a ``SQLAlchemy`` connection string.\nUsers are supposed to provide the connection string 'con'\nthrough credentials. It overwrites `con` parameter in\n``load_args`` and ``save_args`` in case it is provided. To find\nall supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "load_args": { - "type": "object", - "description": "Provided to underlying pandas ``read_sql_table``\nfunction along with the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_sql_table.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "save_args": { - "type": "object", - "description": "Provided to underlying pandas ``to_sql`` function along\nwith the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_sql.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls\nIt has ``index=False`` in the default parameters." - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.SQLQueryDataSet" } } - }, - "then": { - "required": ["sql", "credentials"], - "properties": { - "sql": { - "type": "string", - "description": "The sql query statement." - }, - "credentials": { - "type": "object", - "description": "A dictionary with a ``SQLAlchemy`` connection string.\nUsers are supposed to provide the connection string 'con'\nthrough credentials. It overwrites `con` parameter in\n``load_args`` and ``save_args`` in case it is provided. To find\nall supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "load_args": { - "type": "object", - "description": "Provided to underlying pandas ``read_sql_query``\nfunction along with the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_sql_query.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.ParquetDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a Parquet file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading Parquet files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_parquet.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Additional saving options for `pyarrow.parquet.write_table`.\nHere you can find all available arguments:\nhttps://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html?highlight=write_table#pyarrow.parquet.write_table" - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class.\nE.g. for ``GCSFileSystem`` class: `{\"project\": \"my-project\", ...}`." - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.FeatherDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a feather file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading feather files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_feather.html\nAll defaults are preserved." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class.\nE.g. for ``GCSFileSystem`` class: `{\"project\": \"my-project\", ...}`." - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.CSVBlobDataSet" } } - }, - "then": { - "required": ["filepath", "container_name", "credentials"], - "properties": { - "filepath": { - "type": "string", - "description": "Path to an Azure Blob of a CSV file." - }, - "container_name": { - "type": "string", - "description": "Azure container name." - }, - "credentials": { - "type": "object", - "description": "Credentials (``account_name`` and\n``account_key`` or ``sas_token``) to access the Azure Blob Storage." - }, - "blob_to_text_args": { - "type": "object", - "description": "Any additional arguments to pass to Azure's\n``get_blob_to_text`` method:\nhttps://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.baseblobservice.baseblobservice?view=azure-python#get-blob-to-text" - }, - "blob_from_text_args": { - "type": "object", - "description": "Any additional arguments to pass to Azure's\n``create_blob_from_text`` method:\nhttps://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.baseblobservice.baseblobservice?view=azure-python#get-blob-to-text" - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading CSV files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving CSV files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html\nAll defaults are preserved, but \"index\", which is set to False." - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { "properties": { "type": { "const": "pandas.HDFDataSet" } } }, - "then": { - "required": ["filepath", "key"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a hdf file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "key": { - "type": "string", - "description": "Identifier to the group in the HDF store." - }, - "load_args": { - "type": "object", - "description": "PyTables options for loading hdf files.\nYou can find all available arguments at:\nhttps://www.pytables.org/usersguide/libref/top_level.html#tables.open_file\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "PyTables options for saving hdf files.\nYou can find all available arguments at:\nhttps://www.pytables.org/usersguide/libref/top_level.html#tables.open_file\nAll defaults are preserved." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class.\nE.g. for ``GCSFileSystem`` class: `{\"project\": \"my-project\", ...}`" - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { "properties": { "type": { "const": "pandas.CSVDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a CSV file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading CSV files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving CSV files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html\nAll defaults are preserved, but \"index\", which is set to False." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class.\nE.g. for ``GCSFileSystem`` class: `{\"project\": \"my-project\", ...}`." - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.ExcelDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a Excel file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "engine": { - "type": "string", - "description": "The engine used to write to excel files. The default\nengine is 'xlsxwriter'." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading Excel files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_excel.html\nAll defaults are preserved, but \"engine\", which is set to \"xlrd\"." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving Excel files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_excel.html\nAll defaults are preserved, but \"index\", which is set to False.\nIf you would like to specify options for the `ExcelWriter`,\nyou can include them under \"writer\" key. Here you can\nfind all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.ExcelWriter.html" - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class.\nE.g. for ``GCSFileSystem`` class: `{\"project\": \"my-project\", ...}`." - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.GBQTableDataSet" } } - }, - "then": { - "required": ["dataset", "table_name"], - "properties": { - "dataset": { - "type": "string", - "description": "Google BigQuery dataset." - }, - "table_name": { - "type": "string", - "description": "Google BigQuery table name." - }, - "project": { - "type": "string", - "description": "Google BigQuery Account project ID.\nOptional when available from the environment.\nhttps://cloud.google.com/resource-manager/docs/creating-managing-projects" - }, - "credentials": { - "pattern": ".*", - "description": "Credentials for accessing Google APIs.\nEither ``google.auth.credentials.Credentials`` object or dictionary with\nparameters required to instantiate ``google.oauth2.credentials.Credentials``.\nHere you can find all the arguments:\nhttps://google-auth.readthedocs.io/en/latest/reference/google.oauth2.credentials.html" - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading BigQuery table into DataFrame.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_gbq.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving DataFrame to BigQuery table.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_gbq.html\nAll defaults are preserved, but \"progress_bar\", which is set to False." - }, - "layer": { - "type": "string", - "description": "The data layer according to the data engineering convention:\nhttps://kedro.readthedocs.io/en/0.15.9/06_resources/01_faq.html#what-is-data-engineering-convention" - } - } - } - }, - { - "if": { "properties": { "type": { "const": "PickleLocalDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "path to a pkl file." - }, - "backend": { - "type": "string", - "description": "backend to use, must be one of ['pickle', 'joblib']." - }, - "load_args": { - "type": "object", - "description": "Options for loading pickle files. Refer to the help\nfile of ``pickle.load`` or ``joblib.load`` for options." - }, - "save_args": { - "type": "object", - "description": "Options for saving pickle files. Refer to the help\nfile of ``pickle.dump`` or ``joblib.dump`` for options." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "JSONLocalDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "path to a local json file." - }, - "load_args": { - "type": "object", - "description": "Arguments passed on to ```json.load``.\nSee https://docs.python.org/3/library/json.html for details.\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Arguments passed on to ```json.dump``.\nSee https://docs.python.org/3/library/json.html\nfor details. All defaults are preserved." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "HDFLocalDataSet" } } }, - "then": { - "required": ["filepath", "key"], - "properties": { - "filepath": { - "type": "string", - "description": "Path to an hdf file." - }, - "key": { - "type": "string", - "description": "Identifier to the group in the HDF store." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading hdf files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_hdf.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving hdf files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_hdf.html\nAll defaults are preserved." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "PartitionedDataSet" } } }, - "then": { - "required": ["path", "dataset"], - "properties": { - "path": { - "type": "string", - "description": "Path to the folder containing partitioned data.\nIf path starts with the protocol (e.g., ``s3://``) then the\ncorresponding ``fsspec`` concrete filesystem implementation will\nbe used. If protocol is not specified,\n``fsspec.implementations.local.LocalFileSystem`` will be used.\n**Note:** Some concrete implementations are bundled with ``fsspec``,\nwhile others (like ``s3`` or ``gcs``) must be installed separately\nprior to usage of the ``PartitionedDataSet``." - }, - "dataset": { - "pattern": ".*", - "description": "Underlying dataset definition. This is used to instantiate\nthe dataset for each file located inside the ``path``.\nAccepted formats are:\na) object of a class that inherits from ``AbstractDataSet``\nb) a string representing a fully qualified class name to such class\nc) a dictionary with ``type`` key pointing to a string from b),\nother keys are passed to the Dataset initializer.\nCredentials for the dataset can be explicitly specified in\nthis configuration." - }, - "filepath_arg": { - "type": "string", - "description": "Underlying dataset initializer argument that will\ncontain a path to each corresponding partition file.\nIf unspecified, defaults to \"filepath\"." - }, - "filename_suffix": { - "type": "string", - "description": "If specified, only partitions that end with this\nstring will be processed." - }, - "credentials": { - "type": "object", - "description": "Protocol-specific options that will be passed to\n``fsspec.filesystem``\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.filesystem\nand the dataset initializer. If the dataset config contains\nexplicit credentials spec, then such spec will take precedence.\n**Note:** ``dataset_credentials`` key has now been deprecated\nand should not be specified.\nAll possible credentials management scenarios are documented here:\nhttps://kedro.readthedocs.io/en/0.15.9/04_user_guide/08_advanced_io.html#partitioned-dataset-credentials" - }, - "load_args": { - "type": "object", - "description": "Keyword arguments to be passed into ``find()`` method of\nthe filesystem implementation." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "CachedDataSet" } } }, - "then": { - "required": ["dataset"], - "properties": { - "dataset": { - "pattern": ".*", - "description": "A Kedro DataSet object or a dictionary to cache." - }, - "copy_mode": { - "type": "string", - "description": "The copy mode used to copy the data. Possible\nvalues are: \"deepcopy\", \"copy\" and \"assign\". If not\nprovided, it is inferred based on the data type." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "JSONDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a JSON file prefixed with a protocol like `s3://`.\nIf prefix is not provided `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading JSON files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_json.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving JSON files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_json.html\nAll defaults are preserved, but \"index\", which is set to False." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{'token': None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class.\nE.g. for ``GCSFileSystem`` class: `{project: 'my-project', ...}`" - } - } - } - }, - { - "if": { "properties": { "type": { "const": "CSVHTTPDataSet" } } }, - "then": { - "required": ["fileurl"], - "properties": { - "fileurl": { - "type": "string", - "description": "A URL to fetch the CSV file." - }, - "auth": { - "pattern": ".*", - "description": "Anything ``requests.get`` accepts. Normally it's either\n``('login', 'password')``, or ``AuthBase`` instance for more complex cases." - }, - "load_args": { - "pattern": ".*", - "description": "Pandas options for loading csv files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html\nAll defaults are preserved." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "MemoryDataSet" } } }, - "then": { - "required": [], - "properties": { - "data": { - "pattern": ".*", - "description": "Python object containing the data." - }, - "copy_mode": { - "type": "string", - "description": "The copy mode used to copy the data. Possible\nvalues are: \"deepcopy\", \"copy\" and \"assign\". If not\nprovided, it is inferred based on the data type." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "CSVLocalDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "path to a csv file." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading csv files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving csv files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html\nAll defaults are preserved, but \"index\", which is set to False." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "ExcelLocalDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "path to an Excel file." - }, - "engine": { - "type": "string", - "description": "The engine used to write to excel files. The default\nengine is 'xlsxwriter'." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading Excel files. Here you can\nfind all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_excel.html\nThe default_load_arg engine is 'xlrd', all others preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving Excel files. Here you can\nfind all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_excel.html\nAll defaults are preserved, but \"index\", which is set to False.\nIf you would like to specify options for the `ExcelWriter`,\nyou can include them under \"writer\" key. Here you can\nfind all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.ExcelWriter.html" - } - } - } - }, - { - "if": { "properties": { "type": { "const": "LambdaDataSet" } } }, - "then": { - "required": ["load", "save"], - "properties": { - "load": { - "pattern": ".*", - "description": "Method to load data from a data set." - }, - "save": { - "pattern": ".*", - "description": "Method to save data to a data set." - }, - "exists": { - "pattern": ".*", - "description": "Method to check whether output data already exists." - }, - "release": { - "pattern": ".*", - "description": "Method to release any cached information." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "HDFS3DataSet" } } }, - "then": { - "required": ["filepath", "key"], - "properties": { - "filepath": { - "type": "string", - "description": "Path to an hdf file. May contain the full path in S3\nincluding bucket and protocol, e.g. `s3://bucket-name/path/to/file.hdf`." - }, - "key": { - "type": "string", - "description": "Identifier to the group in the HDF store." - }, - "bucket_name": { - "type": "string", - "description": "S3 bucket name. Must be specified **only** if not\npresent in ``filepath``." - }, - "credentials": { - "type": "object", - "description": "Credentials to access the S3 bucket, such as\n``aws_access_key_id``, ``aws_secret_access_key``." - }, - "load_args": { - "type": "object", - "description": "PyTables options for loading hdf files.\nYou can find all available arguments at:\nhttps://www.pytables.org/usersguide/libref/top_level.html#tables.open_file\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "PyTables options for saving hdf files.\nYou can find all available arguments at:\nhttps://www.pytables.org/usersguide/libref/top_level.html#tables.open_file\nAll defaults are preserved." - }, - "s3fs_args": { - "type": "object", - "description": "S3FileSystem options. You can find all available arguments at:\nhttps://s3fs.readthedocs.io/en/latest/api.html#s3fs.core.S3FileSystem" - } - } - } - }, - { - "if": { "properties": { "type": { "const": "PickleS3DataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "path to a pkl file. May contain the full path in S3\nincluding bucket and protocol, e.g. `s3://bucket-name/path/to/file.pkl`." - }, - "bucket_name": { - "type": "string", - "description": "S3 bucket name. Must be specified **only** if not\npresent in ``filepath``." - }, - "credentials": { - "type": "object", - "description": "Credentials to access the S3 bucket, such as\n``aws_access_key_id``, ``aws_secret_access_key``." - }, - "load_args": { - "type": "object", - "description": "Pickle options for loading pickle files.\nYou can find all available arguments at:\nhttps://docs.python.org/3/library/pickle.html#pickle.loads\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pickle options for saving pickle files.\nYou can see all available arguments at:\nhttps://docs.python.org/3/library/pickle.html#pickle.dumps\nAll defaults are preserved." - }, - "s3fs_args": { - "type": "object", - "description": "S3FileSystem options. You can see all available arguments at:\nhttps://s3fs.readthedocs.io/en/latest/api.html#s3fs.core.S3FileSystem" - } - } - } - }, - { - "if": { "properties": { "type": { "const": "SQLTableDataSet" } } }, - "then": { - "required": ["table_name", "credentials"], - "properties": { - "table_name": { - "type": "string", - "description": "The table name to load or save data to. It\noverwrites name in ``save_args`` and ``table_name``\nparameters in ``load_args``." - }, - "credentials": { - "type": "object", - "description": "A dictionary with a ``SQLAlchemy`` connection string.\nUsers are supposed to provide the connection string 'con'\nthrough credentials. It overwrites `con` parameter in\n``load_args`` and ``save_args`` in case it is provided. To find\nall supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "load_args": { - "type": "object", - "description": "Provided to underlying pandas ``read_sql_table``\nfunction along with the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_sql_table.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "save_args": { - "type": "object", - "description": "Provided to underlying pandas ``to_sql`` function along\nwith the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_sql.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls\nIt has ``index=False`` in the default parameters." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "SQLQueryDataSet" } } }, - "then": { - "required": ["sql", "credentials"], - "properties": { - "sql": { - "type": "string", - "description": "The sql query statement." - }, - "credentials": { - "type": "object", - "description": "A dictionary with a ``SQLAlchemy`` connection string.\nUsers are supposed to provide the connection string 'con'\nthrough credentials. It overwrites `con` parameter in\n``load_args`` and ``save_args`` in case it is provided. To find\nall supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "load_args": { - "type": "object", - "description": "Provided to underlying pandas ``read_sql_query``\nfunction along with the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_sql_query.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - } - } - } - }, - { - "if": { "properties": { "type": { "const": "CSVS3DataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Path to a csv file. May contain the full path in S3\nincluding bucket and protocol, e.g. `s3://bucket-name/path/to/file.csv`." - }, - "bucket_name": { - "type": "string", - "description": "S3 bucket name. Must be specified **only** if not\npresent in ``filepath``." - }, - "credentials": { - "type": "object", - "description": "Credentials to access the S3 bucket, such as\n``aws_access_key_id``, ``aws_secret_access_key``." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading csv files.\nYou can find all available arguments at:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving csv files.\nYou can find all available arguments at:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html\nAll defaults are preserved, but \"index\", which is set to False." - }, - "s3fs_args": { - "type": "object", - "description": "S3FileSystem options. You can see all available arguments at:\nhttps://s3fs.readthedocs.io/en/latest/api.html#s3fs.core.S3FileSystem" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "ParquetLocalDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Path to a parquet file or a metadata file of a multipart\nparquet collection or the directory of a multipart parquet." - }, - "engine": { - "type": "string", - "description": "The engine to use, one of: `auto`, `fastparquet`,\n`pyarrow`. If `auto`, then the default behavior is to try\n`pyarrow`, falling back to `fastparquet` if `pyarrow` is\nunavailable." - }, - "load_args": { - "type": "object", - "description": "Additional loading options `pyarrow`:\nhttps://arrow.apache.org/docs/python/generated/pyarrow.parquet.read_table.html\nor `fastparquet`:\nhttps://fastparquet.readthedocs.io/en/latest/api.html#fastparquet.ParquetFile.to_pandas" - }, - "save_args": { - "type": "object", - "description": "Additional saving options for `pyarrow`:\nhttps://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.from_pandas\nor `fastparquet`:\nhttps://fastparquet.readthedocs.io/en/latest/api.html#fastparquet.write" - } - } - } - }, - { - "if": { "properties": { "type": { "const": "TextLocalDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "path to a text file." - }, - "load_args": { - "type": "object", - "description": "Load arguments should be specified in accordance with\nthe built in open function. This can be found at\nhttps://docs.python.org/3/library/functions.html#open" - }, - "save_args": { - "type": "object", - "description": "Save arguments should be specified in accordance with\nthe built in open function. This can be found at\nhttps://docs.python.org/3/library/functions.html#open" - } - } - } - } - ] - } - } -} diff --git a/static/jsonschema/kedro-catalog-0.16.json b/static/jsonschema/kedro-catalog-0.16.json deleted file mode 100644 index 32b23591bd..0000000000 --- a/static/jsonschema/kedro-catalog-0.16.json +++ /dev/null @@ -1,763 +0,0 @@ -{ - "type": "object", - "patternProperties": { - "^[a-z0-9-_]+$": { - "required": ["type"], - "properties": { - "type": { - "type": "string", - "enum": [ - "PartitionedDataSet", - "CachedDataSet", - "MemoryDataSet", - "LambdaDataSet", - "networkx.NetworkXDataSet", - "dask.ParquetDataSet", - "geopandas.GeoJSONDataSet", - "pillow.ImageDataSet", - "biosequence.BioSequenceDataSet", - "api.APIDataSet", - "matplotlib.MatplotlibWriter", - "yaml.YAMLDataSet", - "pickle.PickleDataSet", - "text.TextDataSet", - "spark.SparkJDBCDataSet", - "spark.SparkHiveDataSet", - "spark.SparkDataSet", - "pandas.JSONDataSet", - "pandas.SQLTableDataSet", - "pandas.SQLQueryDataSet", - "pandas.ParquetDataSet", - "pandas.FeatherDataSet", - "pandas.HDFDataSet", - "pandas.CSVDataSet", - "pandas.ExcelDataSet", - "pandas.GBQTableDataSet" - ] - } - }, - "allOf": [ - { - "if": { "properties": { "type": { "const": "PartitionedDataSet" } } }, - "then": { - "required": ["path", "dataset"], - "properties": { - "path": { - "type": "string", - "description": "Path to the folder containing partitioned data.\nIf path starts with the protocol (e.g., ``s3://``) then the\ncorresponding ``fsspec`` concrete filesystem implementation will\nbe used. If protocol is not specified,\n``fsspec.implementations.local.LocalFileSystem`` will be used.\n**Note:** Some concrete implementations are bundled with ``fsspec``,\nwhile others (like ``s3`` or ``gcs``) must be installed separately\nprior to usage of the ``PartitionedDataSet``." - }, - "dataset": { - "pattern": ".*", - "description": "Underlying dataset definition. This is used to instantiate\nthe dataset for each file located inside the ``path``.\nAccepted formats are:\na) object of a class that inherits from ``AbstractDataSet``\nb) a string representing a fully qualified class name to such class\nc) a dictionary with ``type`` key pointing to a string from b),\nother keys are passed to the Dataset initializer.\nCredentials for the dataset can be explicitly specified in\nthis configuration." - }, - "filepath_arg": { - "type": "string", - "description": "Underlying dataset initializer argument that will\ncontain a path to each corresponding partition file.\nIf unspecified, defaults to \"filepath\"." - }, - "filename_suffix": { - "type": "string", - "description": "If specified, only partitions that end with this\nstring will be processed." - }, - "credentials": { - "type": "object", - "description": "Protocol-specific options that will be passed to\n``fsspec.filesystem``\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.filesystem\nand the dataset initializer. If the dataset config contains\nexplicit credentials spec, then such spec will take precedence.\n**Note:** ``dataset_credentials`` key has now been deprecated\nand should not be specified.\nAll possible credentials management scenarios are documented here:\nhttps://kedro.readthedocs.io/en/0.16.0/04_user_guide/08_advanced_io.html#partitioned-dataset-credentials" - }, - "load_args": { - "type": "object", - "description": "Keyword arguments to be passed into ``find()`` method of\nthe filesystem implementation." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "CachedDataSet" } } }, - "then": { - "required": ["dataset"], - "properties": { - "dataset": { - "pattern": ".*", - "description": "A Kedro DataSet object or a dictionary to cache." - }, - "copy_mode": { - "type": "string", - "description": "The copy mode used to copy the data. Possible\nvalues are: \"deepcopy\", \"copy\" and \"assign\". If not\nprovided, it is inferred based on the data type." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "MemoryDataSet" } } }, - "then": { - "required": [], - "properties": { - "data": { - "pattern": ".*", - "description": "Python object containing the data." - }, - "copy_mode": { - "type": "string", - "description": "The copy mode used to copy the data. Possible\nvalues are: \"deepcopy\", \"copy\" and \"assign\". If not\nprovided, it is inferred based on the data type." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "LambdaDataSet" } } }, - "then": { - "required": ["load", "save"], - "properties": { - "load": { - "pattern": ".*", - "description": "Method to load data from a data set." - }, - "save": { - "pattern": ".*", - "description": "Method to save data to a data set." - }, - "exists": { - "pattern": ".*", - "description": "Method to check whether output data already exists." - }, - "release": { - "pattern": ".*", - "description": "Method to release any cached information." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "networkx.NetworkXDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "The path to the NetworkX graph JSON file." - }, - "load_args": { - "type": "object", - "description": "Arguments passed on to ```networkx.node_link_graph``.\nSee the details in\nhttps://networkx.org/documentation/networkx-1.9.1/reference/generated/networkx.readwrite.json_graph.node_link_graph.html" - }, - "save_args": { - "type": "object", - "description": "Arguments passed on to ```networkx.node_link_data``.\nSee the details in\nhttps://networkx.org/documentation/networkx-1.9.1/reference/generated/networkx.readwrite.json_graph.node_link_data.html" - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "dask.ParquetDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Path to a parquet file\nparquet collection or the directory of a multipart parquet." - }, - "load_args": { - "type": "object", - "description": "Additional loading options `dask.dataframe.read_parquet`:\nhttps://docs.dask.org/en/latest/dataframe-api.html#dask.dataframe.read_parquet" - }, - "save_args": { - "type": "object", - "description": "Additional saving options for `dask.dataframe.to_parquet`:\nhttps://docs.dask.org/en/latest/dataframe-api.html#dask.dataframe.to_parquet" - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Optional parameters to the backend file system driver:\nhttps://docs.dask.org/en/latest/remote-data-services.html#optional-parameters" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "geopandas.GeoJSONDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a GeoJSON file prefixed with a protocol like `s3://`.\nIf prefix is not provided `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "GeoPandas options for loading GeoJSON files.\nHere you can find all available arguments:\nhttps://geopandas.org/reference/geopandas.read_file.html" - }, - "save_args": { - "type": "object", - "description": "GeoPandas options for saving geojson files.\nHere you can find all available arguments:\nhttps://geopandas.org/reference.html#geopandas.GeoDataFrame.to_file\nThe default_save_arg driver is 'GeoJSON', all others preserved." - }, - "credentials": { - "type": "object", - "description": "credentials required to access the underlying filesystem.\nEg. for ``GCFileSystem`` it would look like `{'token': None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pillow.ImageDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to an image file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "save_args": { - "type": "object", - "description": "Pillow options for saving image files.\nHere you can find all available arguments:\nhttps://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.save\nAll defaults are preserved." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { "const": "biosequence.BioSequenceDataSet" } - } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "path to sequence file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``." - }, - "load_args": { - "type": "object", - "description": "Options for parsing sequence files by Biopython ``SeqIO.parse()``." - }, - "save_args": { - "type": "object", - "description": "file format supported by Biopython ``SeqIO.write()``.\nE.g. `{\"format\": \"fasta\"}`." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\n to pass to the filesystem's `open` method through nested keys\n `open_args_load` and `open_args_save`.\n Here you can find all available arguments for `open`:\n https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\n All defaults are preserved, except `mode`, which is set to `r` when loading\n and to `w` when saving.\n\nNote: Here you can find all supported file formats: https://biopython.org/wiki/SeqIO" - } - } - } - }, - { - "if": { "properties": { "type": { "const": "api.APIDataSet" } } }, - "then": { - "required": ["url"], - "properties": { - "url": { - "type": "string", - "description": "The API URL endpoint." - }, - "method": { - "type": "string", - "description": "The Method of the request, GET, POST, PUT, DELETE, HEAD, etc..." - }, - "data": { - "pattern": ".*", - "description": "The request payload, used for POST, PUT, etc requests\nhttps://requests.readthedocs.io/en/master/user/quickstart/#more-complicated-post-requests" - }, - "params": { - "type": "object", - "description": "The url parameters of the API.\nhttps://requests.readthedocs.io/en/master/user/quickstart/#passing-parameters-in-urls" - }, - "headers": { - "type": "object", - "description": "The HTTP headers.\nhttps://requests.readthedocs.io/en/master/user/quickstart/#custom-headers" - }, - "auth": { - "pattern": ".*", - "description": "Anything ``requests`` accepts. Normally it's either ``('login', 'password')``,\nor ``AuthBase``, ``HTTPBasicAuth`` instance for more complex cases." - }, - "timeout": { - "type": "integer", - "description": "The wait time in seconds for a response, defaults to 1 minute.\nhttps://requests.readthedocs.io/en/master/user/quickstart/#timeouts" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "matplotlib.MatplotlibWriter" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Key path to a matplot object file(s) prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested key `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``S3FileSystem`` it should look like:\n`{'client_kwargs': {'aws_access_key_id': '', 'aws_secret_access_key': ''}}`" - }, - "save_args": { - "type": "object", - "description": "Save args passed to `plt.savefig`. See\nhttps://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html" - } - } - } - }, - { - "if": { "properties": { "type": { "const": "yaml.YAMLDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a YAML file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "save_args": { - "type": "object", - "description": "PyYAML options for saving YAML files (arguments passed\ninto ```yaml.dump``). Here you can find all available arguments:\nhttps://pyyaml.org/wiki/PyYAMLDocumentation\nAll defaults are preserved, but \"default_flow_style\", which is set to False." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pickle.PickleDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a Pickle file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "backend": { - "type": "string", - "description": "Backend to use, must be one of ['pickle', 'joblib']. Defaults to 'pickle'." - }, - "load_args": { - "type": "object", - "description": "Pickle options for loading pickle files.\nHere you can find all available arguments for different backends:\npickle.load: https://docs.python.org/3/library/pickle.html#pickle.load\njoblib.load: https://joblib.readthedocs.io/en/latest/generated/joblib.load.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pickle options for saving pickle files.\nHere you can find all available arguments for different backends:\npickle.dump: https://docs.python.org/3/library/pickle.html#pickle.dump\njoblib.dump: https://joblib.readthedocs.io/en/latest/generated/joblib.dump.html\nAll defaults are preserved." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "text.TextDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "spark.SparkJDBCDataSet" } } - }, - "then": { - "required": ["url", "table"], - "properties": { - "url": { - "type": "string", - "description": "A JDBC URL of the form ``jdbc:subprotocol:subname``." - }, - "table": { - "type": "string", - "description": "The name of the table to load or save data to." - }, - "credentials": { - "type": "object", - "description": "A dictionary of JDBC database connection arguments.\nNormally at least properties ``user`` and ``password`` with\ntheir corresponding values. It updates ``properties``\nparameter in ``load_args`` and ``save_args`` in case it is\nprovided." - }, - "load_args": { - "type": "object", - "description": "Provided to underlying PySpark ``jdbc`` function along\nwith the JDBC URL and the name of the table. To find all\nsupported arguments, see here:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrameReader.jdbc.html" - }, - "save_args": { - "type": "object", - "description": "Provided to underlying PySpark ``jdbc`` function along\nwith the JDBC URL and the name of the table. To find all\nsupported arguments, see here:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrameWriter.jdbc.html" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "spark.SparkHiveDataSet" } } - }, - "then": { - "required": ["database", "table", "write_mode"], - "properties": { - "database": { - "type": "string", - "description": "The name of the hive database." - }, - "table": { - "type": "string", - "description": "The name of the table within the database." - }, - "write_mode": { - "type": "string", - "description": "``insert``, ``upsert`` or ``overwrite`` are supported." - }, - "table_pk": { - "type": "array", - "description": "If performing an upsert, this identifies the primary key columns used to\nresolve preexisting data. Is required for ``write_mode=\"upsert\"``." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "spark.SparkDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Path to a Spark dataframe. When using Databricks\nand working with data written to mount path points,\nspecify ``filepath``s for (versioned) ``SparkDataSet``s\nstarting with ``/dbfs/mnt``." - }, - "file_format": { - "type": "string", - "description": "File format used during load and save\noperations. These are formats supported by the running\nSparkContext include parquet, csv. For a list of supported\nformats please refer to Apache Spark documentation at\nhttps://spark.apache.org/docs/latest/sql-programming-guide.html" - }, - "load_args": { - "type": "object", - "description": "Load args passed to Spark DataFrameReader load method.\nIt is dependent on the selected file format. You can find\na list of read options for each supported format\nin Spark DataFrame read documentation:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrame.html" - }, - "save_args": { - "type": "object", - "description": "Save args passed to Spark DataFrame write options.\nSimilar to load_args this is dependent on the selected file\nformat. You can pass ``mode`` and ``partitionBy`` to specify\nyour overwrite mode and partitioning respectively. You can find\na list of options for each format in Spark DataFrame\nwrite documentation:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrame.html" - }, - "credentials": { - "type": "object", - "description": "Credentials to access the S3 bucket, such as\n``aws_access_key_id``, ``aws_secret_access_key``, if ``filepath``\nprefix is ``s3a://`` or ``s3n://``. Optional keyword arguments passed to\n``hdfs.client.InsecureClient`` if ``filepath`` prefix is ``hdfs://``.\nIgnored otherwise." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "pandas.JSONDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a JSON file prefixed with a protocol like `s3://`.\nIf prefix is not provided `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading JSON files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_json.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving JSON files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_json.html\nAll defaults are preserved, but \"index\", which is set to False." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{'token': None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.SQLTableDataSet" } } - }, - "then": { - "required": ["table_name", "credentials"], - "properties": { - "table_name": { - "type": "string", - "description": "The table name to load or save data to. It\noverwrites name in ``save_args`` and ``table_name``\nparameters in ``load_args``." - }, - "credentials": { - "type": "object", - "description": "A dictionary with a ``SQLAlchemy`` connection string.\nUsers are supposed to provide the connection string 'con'\nthrough credentials. It overwrites `con` parameter in\n``load_args`` and ``save_args`` in case it is provided. To find\nall supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "load_args": { - "type": "object", - "description": "Provided to underlying pandas ``read_sql_table``\nfunction along with the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_sql_table.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "save_args": { - "type": "object", - "description": "Provided to underlying pandas ``to_sql`` function along\nwith the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_sql.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls\nIt has ``index=False`` in the default parameters." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.SQLQueryDataSet" } } - }, - "then": { - "required": ["sql", "credentials"], - "properties": { - "sql": { - "type": "string", - "description": "The sql query statement." - }, - "credentials": { - "type": "object", - "description": "A dictionary with a ``SQLAlchemy`` connection string.\nUsers are supposed to provide the connection string 'con'\nthrough credentials. It overwrites `con` parameter in\n``load_args`` and ``save_args`` in case it is provided. To find\nall supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "load_args": { - "type": "object", - "description": "Provided to underlying pandas ``read_sql_query``\nfunction along with the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_sql_query.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.ParquetDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a Parquet file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nIt can also be a path to a directory. If the directory is\nprovided then it can be used for reading partitioned parquet files.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Additional options for loading Parquet file(s).\nHere you can find all available arguments when reading single file:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_parquet.html\nHere you can find all available arguments when reading partitioned datasets:\nhttps://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html#pyarrow.parquet.ParquetDataset.read\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Additional saving options for `pyarrow.parquet.write_table`.\nHere you can find all available arguments:\nhttps://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html?highlight=write_table#pyarrow.parquet.write_table" - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.FeatherDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a feather file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading feather files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_feather.html\nAll defaults are preserved." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "pandas.HDFDataSet" } } }, - "then": { - "required": ["filepath", "key"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a hdf file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "key": { - "type": "string", - "description": "Identifier to the group in the HDF store." - }, - "load_args": { - "type": "object", - "description": "PyTables options for loading hdf files.\nYou can find all available arguments at:\nhttps://www.pytables.org/usersguide/libref/top_level.html#tables.open_file\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "PyTables options for saving hdf files.\nYou can find all available arguments at:\nhttps://www.pytables.org/usersguide/libref/top_level.html#tables.open_file\nAll defaults are preserved." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set `wb` when saving." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "pandas.CSVDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a CSV file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading CSV files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving CSV files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html\nAll defaults are preserved, but \"index\", which is set to False." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.ExcelDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath to a Excel file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "engine": { - "type": "string", - "description": "The engine used to write to excel files. The default\nengine is 'xlsxwriter'." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading Excel files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_excel.html\nAll defaults are preserved, but \"engine\", which is set to \"xlrd\"." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving Excel files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_excel.html\nAll defaults are preserved, but \"index\", which is set to False.\nIf you would like to specify options for the `ExcelWriter`,\nyou can include them under \"writer\" key. Here you can\nfind all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.ExcelWriter.html" - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.GBQTableDataSet" } } - }, - "then": { - "required": ["dataset", "table_name"], - "properties": { - "dataset": { - "type": "string", - "description": "Google BigQuery dataset." - }, - "table_name": { - "type": "string", - "description": "Google BigQuery table name." - }, - "project": { - "type": "string", - "description": "Google BigQuery Account project ID.\nOptional when available from the environment.\nhttps://cloud.google.com/resource-manager/docs/creating-managing-projects" - }, - "credentials": { - "pattern": ".*", - "description": "Credentials for accessing Google APIs.\nEither ``google.auth.credentials.Credentials`` object or dictionary with\nparameters required to instantiate ``google.oauth2.credentials.Credentials``.\nHere you can find all the arguments:\nhttps://google-auth.readthedocs.io/en/latest/reference/google.oauth2.credentials.html" - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading BigQuery table into DataFrame.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_gbq.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving DataFrame to BigQuery table.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_gbq.html\nAll defaults are preserved, but \"progress_bar\", which is set to False." - } - } - } - } - ] - } - } -} diff --git a/static/jsonschema/kedro-catalog-0.17.json b/static/jsonschema/kedro-catalog-0.17.json deleted file mode 100644 index 197e59dcb2..0000000000 --- a/static/jsonschema/kedro-catalog-0.17.json +++ /dev/null @@ -1,950 +0,0 @@ -{ - "type": "object", - "patternProperties": { - "^[a-z0-9-_]+$": { - "required": ["type"], - "properties": { - "type": { - "type": "string", - "enum": [ - "PartitionedDataSet", - "CachedDataSet", - "MemoryDataSet", - "LambdaDataSet", - "networkx.NetworkXDataSet", - "dask.ParquetDataSet", - "geopandas.GeoJSONDataSet", - "pillow.ImageDataSet", - "json.JSONDataSet", - "biosequence.BioSequenceDataSet", - "tensorflow.TensorFlowModelDataset", - "api.APIDataSet", - "matplotlib.MatplotlibWriter", - "yaml.YAMLDataSet", - "pickle.PickleDataSet", - "text.TextDataSet", - "holoviews.HoloviewsWriter", - "email.EmailMessageDataSet", - "spark.SparkJDBCDataSet", - "spark.SparkHiveDataSet", - "spark.SparkDataSet", - "pandas.AppendableExcelDataSet", - "pandas.JSONDataSet", - "pandas.SQLTableDataSet", - "pandas.SQLQueryDataSet", - "pandas.ParquetDataSet", - "pandas.FeatherDataSet", - "pandas.HDFDataSet", - "pandas.CSVDataSet", - "pandas.ExcelDataSet", - "pandas.GBQTableDataSet", - "pandas.GBQQueryDataSet", - "pandas.GenericDataSet" - ] - } - }, - "allOf": [ - { - "if": { "properties": { "type": { "const": "PartitionedDataSet" } } }, - "then": { - "required": ["path", "dataset"], - "properties": { - "path": { - "type": "string", - "description": "Path to the folder containing partitioned data.\nIf path starts with the protocol (e.g., ``s3://``) then the\ncorresponding ``fsspec`` concrete filesystem implementation will\nbe used. If protocol is not specified,\n``fsspec.implementations.local.LocalFileSystem`` will be used.\n**Note:** Some concrete implementations are bundled with ``fsspec``,\nwhile others (like ``s3`` or ``gcs``) must be installed separately\nprior to usage of the ``PartitionedDataSet``." - }, - "dataset": { - "pattern": ".*", - "description": "Underlying dataset definition. This is used to instantiate\nthe dataset for each file located inside the ``path``.\nAccepted formats are:\na) object of a class that inherits from ``AbstractDataSet``\nb) a string representing a fully qualified class name to such class\nc) a dictionary with ``type`` key pointing to a string from b),\nother keys are passed to the Dataset initializer.\nCredentials for the dataset can be explicitly specified in\nthis configuration." - }, - "filepath_arg": { - "type": "string", - "description": "Underlying dataset initializer argument that will\ncontain a path to each corresponding partition file.\nIf unspecified, defaults to \"filepath\"." - }, - "filename_suffix": { - "type": "string", - "description": "If specified, only partitions that end with this\nstring will be processed." - }, - "credentials": { - "type": "object", - "description": "Protocol-specific options that will be passed to\n``fsspec.filesystem``\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.filesystem\nand the dataset initializer. If the dataset config contains\nexplicit credentials spec, then such spec will take precedence.\nAll possible credentials management scenarios are documented here:\nhttps://kedro.readthedocs.io/en/0.17.0/05_data/02_kedro_io.html#partitioned-dataset-credentials" - }, - "load_args": { - "type": "object", - "description": "Keyword arguments to be passed into ``find()`` method of\nthe filesystem implementation." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``)" - } - } - } - }, - { - "if": { "properties": { "type": { "const": "CachedDataSet" } } }, - "then": { - "required": ["dataset"], - "properties": { - "dataset": { - "pattern": ".*", - "description": "A Kedro DataSet object or a dictionary to cache." - }, - "copy_mode": { - "type": "string", - "description": "The copy mode used to copy the data. Possible\nvalues are: \"deepcopy\", \"copy\" and \"assign\". If not\nprovided, it is inferred based on the data type." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "MemoryDataSet" } } }, - "then": { - "required": [], - "properties": { - "data": { - "pattern": ".*", - "description": "Python object containing the data." - }, - "copy_mode": { - "type": "string", - "description": "The copy mode used to copy the data. Possible\nvalues are: \"deepcopy\", \"copy\" and \"assign\". If not\nprovided, it is inferred based on the data type." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "LambdaDataSet" } } }, - "then": { - "required": ["load", "save"], - "properties": { - "load": { - "pattern": ".*", - "description": "Method to load data from a data set." - }, - "save": { - "pattern": ".*", - "description": "Method to save data to a data set." - }, - "exists": { - "pattern": ".*", - "description": "Method to check whether output data already exists." - }, - "release": { - "pattern": ".*", - "description": "Method to release any cached information." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "networkx.NetworkXDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to the NetworkX graph JSON file." - }, - "load_args": { - "type": "object", - "description": "Arguments passed on to ```networkx.node_link_graph``.\nSee the details in\nhttps://networkx.github.io/documentation/networkx-1.9.1/reference/generated/networkx.readwrite.json_graph.node_link_graph.html" - }, - "save_args": { - "type": "object", - "description": "Arguments passed on to ```networkx.node_link_data``.\nSee the details in\nhttps://networkx.github.io/documentation/networkx-1.9.1/reference/generated/networkx.readwrite.json_graph.node_link_data.html" - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "dask.ParquetDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a parquet file\nparquet collection or the directory of a multipart parquet." - }, - "load_args": { - "type": "object", - "description": "Additional loading options `dask.dataframe.read_parquet`:\nhttps://docs.dask.org/en/latest/dataframe-api.html#dask.dataframe.read_parquet" - }, - "save_args": { - "type": "object", - "description": "Additional saving options for `dask.dataframe.to_parquet`:\nhttps://docs.dask.org/en/latest/dataframe-api.html#dask.dataframe.to_parquet" - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Optional parameters to the backend file system driver:\nhttps://docs.dask.org/en/latest/remote-data-services.html#optional-parameters" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "geopandas.GeoJSONDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a GeoJSON file prefixed with a protocol like\n`s3://`. If prefix is not provided `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "GeoPandas options for loading GeoJSON files.\nHere you can find all available arguments:\nhttps://geopandas.org/reference/geopandas.read_file.html" - }, - "save_args": { - "type": "object", - "description": "GeoPandas options for saving geojson files.\nHere you can find all available arguments:\nhttps://geopandas.org/reference.html#geopandas.GeoDataFrame.to_file\nThe default_save_arg driver is 'GeoJSON', all others preserved." - }, - "credentials": { - "type": "object", - "description": "credentials required to access the underlying filesystem.\nEg. for ``GCFileSystem`` it would look like `{'token': None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pillow.ImageDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to an image file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "save_args": { - "type": "object", - "description": "Pillow options for saving image files.\nHere you can find all available arguments:\nhttps://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.save\nAll defaults are preserved." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "json.JSONDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a JSON file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "save_args": { - "type": "object", - "description": "json options for saving JSON files (arguments passed\ninto ```json.dump``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/json.html\nAll defaults are preserved, but \"default_flow_style\", which is set to False." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { "const": "biosequence.BioSequenceDataSet" } - } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to sequence file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``." - }, - "load_args": { - "type": "object", - "description": "Options for parsing sequence files by Biopython ``SeqIO.parse()``." - }, - "save_args": { - "type": "object", - "description": "file format supported by Biopython ``SeqIO.write()``.\nE.g. `{\"format\": \"fasta\"}`." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\n to pass to the filesystem's `open` method through nested keys\n `open_args_load` and `open_args_save`.\n Here you can find all available arguments for `open`:\n https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\n All defaults are preserved, except `mode`, which is set to `r` when loading\n and to `w` when saving.\n\nNote: Here you can find all supported file formats: https://biopython.org/wiki/SeqIO" - } - } - } - }, - { - "if": { - "properties": { - "type": { "const": "tensorflow.TensorFlowModelDataset" } - } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a TensorFlow model directory prefixed with a\nprotocol like `s3://`. If prefix is not provided `file` protocol (local filesystem)\nwill be used. The prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "TensorFlow options for loading models.\nHere you can find all available arguments:\nhttps://www.tensorflow.org/api_docs/python/tf/keras/models/load_model\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "TensorFlow options for saving models.\nHere you can find all available arguments:\nhttps://www.tensorflow.org/api_docs/python/tf/keras/models/save_model\nAll defaults are preserved, except for \"save_format\", which is set to \"tf\"." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{'token': None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``)." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "api.APIDataSet" } } }, - "then": { - "required": ["url"], - "properties": { - "url": { - "type": "string", - "description": "The API URL endpoint." - }, - "method": { - "type": "string", - "description": "The Method of the request, GET, POST, PUT, DELETE, HEAD, etc..." - }, - "data": { - "pattern": ".*", - "description": "The request payload, used for POST, PUT, etc requests\nhttps://requests.readthedocs.io/en/master/user/quickstart/#more-complicated-post-requests" - }, - "params": { - "type": "object", - "description": "The url parameters of the API.\nhttps://requests.readthedocs.io/en/master/user/quickstart/#passing-parameters-in-urls" - }, - "headers": { - "type": "object", - "description": "The HTTP headers.\nhttps://requests.readthedocs.io/en/master/user/quickstart/#custom-headers" - }, - "auth": { - "pattern": ".*", - "description": "Anything ``requests`` accepts. Normally it's either ``('login', 'password')``,\nor ``AuthBase``, ``HTTPBasicAuth`` instance for more complex cases." - }, - "json": { - "pattern": ".*", - "description": "The request payload, used for POST, PUT, etc requests, passed in\nto the json kwarg in the requests object.\nhttps://requests.readthedocs.io/en/master/user/quickstart/#more-complicated-post-requests" - }, - "timeout": { - "type": "integer", - "description": "The wait time in seconds for a response, defaults to 1 minute.\nhttps://requests.readthedocs.io/en/master/user/quickstart/#timeouts" - }, - "credentials": { - "pattern": ".*", - "description": "Same as ``auth``. Allows specifying ``auth`` secrets in \ncredentials.yml." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "matplotlib.MatplotlibWriter" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a matplot object file(s) prefixed with a protocol\nlike `s3://`. If prefix is not provided, `file` protocol (local filesystem) will be\nused. The prefix should be any protocol supported by ``fsspec``." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested key `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``S3FileSystem`` it should look like:\n`{'key': '', 'secret': ''}}`" - }, - "save_args": { - "type": "object", - "description": "Save args passed to `plt.savefig`. See\nhttps://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html" - } - } - } - }, - { - "if": { "properties": { "type": { "const": "yaml.YAMLDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a YAML file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "save_args": { - "type": "object", - "description": "PyYAML options for saving YAML files (arguments passed\ninto ```yaml.dump``). Here you can find all available arguments:\nhttps://pyyaml.org/wiki/PyYAMLDocumentation\nAll defaults are preserved, but \"default_flow_style\", which is set to False." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pickle.PickleDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a Pickle file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "backend": { - "type": "string", - "description": "Backend to use, must be one of ['pickle', 'joblib']. Defaults to 'pickle'." - }, - "load_args": { - "type": "object", - "description": "Pickle options for loading pickle files.\nHere you can find all available arguments for different backends:\npickle.load: https://docs.python.org/3/library/pickle.html#pickle.load\njoblib.load: https://joblib.readthedocs.io/en/latest/generated/joblib.load.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pickle options for saving pickle files.\nHere you can find all available arguments for different backends:\npickle.dump: https://docs.python.org/3/library/pickle.html#pickle.dump\njoblib.dump: https://joblib.readthedocs.io/en/latest/generated/joblib.dump.html\nAll defaults are preserved." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "text.TextDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "holoviews.HoloviewsWriter" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested key `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``S3FileSystem`` it should look like:\n`{'key': '', 'secret': ''}}`" - }, - "save_args": { - "type": "object", - "description": "Extra save args passed to `holoviews.save()`. See\nhttp://holoviews.org/reference_manual/holoviews.util.html#holoviews.util.save" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "email.EmailMessageDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "``email`` options for parsing email messages (arguments passed\ninto ``email.parser.Parser.parse``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/email.parser.html#email.parser.Parser.parse\nIf you would like to specify options for the `Parser`,\nyou can include them under the \"parser\" key. Here you can\nfind all available arguments:\nhttps://docs.python.org/3/library/email.parser.html#email.parser.Parser\nAll defaults are preserved, but \"policy\", which is set to ``email.policy.default``." - }, - "save_args": { - "type": "object", - "description": "``email`` options for generating MIME documents (arguments passed into\n``email.generator.Generator.flatten``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/email.generator.html#email.generator.Generator.flatten\nIf you would like to specify options for the `Generator`,\nyou can include them under the \"generator\" key. Here you can\nfind all available arguments:\nhttps://docs.python.org/3/library/email.generator.html#email.generator.Generator\nAll defaults are preserved." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "spark.SparkJDBCDataSet" } } - }, - "then": { - "required": ["url", "table"], - "properties": { - "url": { - "type": "string", - "description": "A JDBC URL of the form ``jdbc:subprotocol:subname``." - }, - "table": { - "type": "string", - "description": "The name of the table to load or save data to." - }, - "credentials": { - "type": "object", - "description": "A dictionary of JDBC database connection arguments.\nNormally at least properties ``user`` and ``password`` with\ntheir corresponding values. It updates ``properties``\nparameter in ``load_args`` and ``save_args`` in case it is\nprovided." - }, - "load_args": { - "type": "object", - "description": "Provided to underlying PySpark ``jdbc`` function along\nwith the JDBC URL and the name of the table. To find all\nsupported arguments, see here:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrameReader.jdbc.html" - }, - "save_args": { - "type": "object", - "description": "Provided to underlying PySpark ``jdbc`` function along\nwith the JDBC URL and the name of the table. To find all\nsupported arguments, see here:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrameWriter.jdbc.html" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "spark.SparkHiveDataSet" } } - }, - "then": { - "required": ["database", "table", "write_mode"], - "properties": { - "database": { - "type": "string", - "description": "The name of the hive database." - }, - "table": { - "type": "string", - "description": "The name of the table within the database." - }, - "write_mode": { - "type": "string", - "description": "``insert``, ``upsert`` or ``overwrite`` are supported." - }, - "table_pk": { - "type": "array", - "description": "If performing an upsert, this identifies the primary key columns used to\nresolve preexisting data. Is required for ``write_mode=\"upsert\"``." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "spark.SparkDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a Spark dataframe. When using Databricks\nand working with data written to mount path points,\nspecify ``filepath``s for (versioned) ``SparkDataSet``s\nstarting with ``/dbfs/mnt``." - }, - "file_format": { - "type": "string", - "description": "File format used during load and save\noperations. These are formats supported by the running\nSparkContext include parquet, csv. For a list of supported\nformats please refer to Apache Spark documentation at\nhttps://spark.apache.org/docs/latest/sql-programming-guide.html" - }, - "load_args": { - "type": "object", - "description": "Load args passed to Spark DataFrameReader load method.\nIt is dependent on the selected file format. You can find\na list of read options for each supported format\nin Spark DataFrame read documentation:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrame.html" - }, - "save_args": { - "type": "object", - "description": "Save args passed to Spark DataFrame write options.\nSimilar to load_args this is dependent on the selected file\nformat. You can pass ``mode`` and ``partitionBy`` to specify\nyour overwrite mode and partitioning respectively. You can find\na list of options for each format in Spark DataFrame\nwrite documentation:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrame.html" - }, - "credentials": { - "type": "object", - "description": "Credentials to access the S3 bucket, such as\n``key``, ``secret``, if ``filepath`` prefix is ``s3a://`` or ``s3n://``.\nOptional keyword arguments passed to ``hdfs.client.InsecureClient``\nif ``filepath`` prefix is ``hdfs://``. Ignored otherwise." - } - } - } - }, - { - "if": { - "properties": { - "type": { "const": "pandas.AppendableExcelDataSet" } - } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to an existing local Excel file." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading Excel files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_excel.html\nAll defaults are preserved, but \"engine\", which is set to \"openpyxl\"." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving Excel files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_excel.html\nAll defaults are preserved, but \"index\", which is set to False.\nIf you would like to specify options for the `ExcelWriter`,\nyou can include them under \"writer\" key. Here you can\nfind all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.ExcelWriter.html\nNote: `mode` option of `ExcelWriter` is set to `a` and it can not be overridden." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "pandas.JSONDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a JSON file prefixed with a protocol like `s3://`.\nIf prefix is not provided `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading JSON files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_json.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving JSON files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_json.html\nAll defaults are preserved, but \"index\", which is set to False." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{'token': None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.SQLTableDataSet" } } - }, - "then": { - "required": ["table_name", "credentials"], - "properties": { - "table_name": { - "type": "string", - "description": "The table name to load or save data to. It\noverwrites name in ``save_args`` and ``table_name``\nparameters in ``load_args``." - }, - "credentials": { - "type": "object", - "description": "A dictionary with a ``SQLAlchemy`` connection string.\nUsers are supposed to provide the connection string 'con'\nthrough credentials. It overwrites `con` parameter in\n``load_args`` and ``save_args`` in case it is provided. To find\nall supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "load_args": { - "type": "object", - "description": "Provided to underlying pandas ``read_sql_table``\nfunction along with the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_sql_table.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "save_args": { - "type": "object", - "description": "Provided to underlying pandas ``to_sql`` function along\nwith the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_sql.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls\nIt has ``index=False`` in the default parameters." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.SQLQueryDataSet" } } - }, - "then": { - "required": ["sql", "credentials"], - "properties": { - "sql": { - "type": "string", - "description": "The sql query statement." - }, - "credentials": { - "type": "object", - "description": "A dictionary with a ``SQLAlchemy`` connection string.\nUsers are supposed to provide the connection string 'con'\nthrough credentials. It overwrites `con` parameter in\n``load_args`` and ``save_args`` in case it is provided. To find\nall supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "load_args": { - "type": "object", - "description": "Provided to underlying pandas ``read_sql_query``\nfunction along with the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_sql_query.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.ParquetDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a Parquet file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nIt can also be a path to a directory. If the directory is\nprovided then it can be used for reading partitioned parquet files.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Additional options for loading Parquet file(s).\nHere you can find all available arguments when reading single file:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_parquet.html\nHere you can find all available arguments when reading partitioned datasets:\nhttps://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html#pyarrow.parquet.ParquetDataset.read\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Additional saving options for `pyarrow.parquet.write_table` and\n`pyarrow.Table.from_pandas`.\nHere you can find all available arguments for `write_table()`:\nhttps://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html?highlight=write_table#pyarrow.parquet.write_table\nThe arguments for `from_pandas()` should be passed through a nested\nkey: `from_pandas`. E.g.: `save_args = {\"from_pandas\": {\"preserve_index\": False}}`\nHere you can find all available arguments for `from_pandas()`:\nhttps://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.from_pandas" - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.FeatherDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a feather file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading feather files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_feather.html\nAll defaults are preserved." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "pandas.HDFDataSet" } } }, - "then": { - "required": ["filepath", "key"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a hdf file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "key": { - "type": "string", - "description": "Identifier to the group in the HDF store." - }, - "load_args": { - "type": "object", - "description": "PyTables options for loading hdf files.\nYou can find all available arguments at:\nhttps://www.pytables.org/usersguide/libref/top_level.html#tables.open_file\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "PyTables options for saving hdf files.\nYou can find all available arguments at:\nhttps://www.pytables.org/usersguide/libref/top_level.html#tables.open_file\nAll defaults are preserved." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set `wb` when saving." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "pandas.CSVDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a CSV file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading CSV files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving CSV files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html\nAll defaults are preserved, but \"index\", which is set to False." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { "properties": { "type": { "const": "pandas.GenericDataSet" } } }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "file_format" : { - "type": "string", - "description": "The read/write methods to retrieve from pandas (`pandas.read_{file_format}` or `pd.DataFrame.to_{file_format}`) on a best effort basis." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html\nAll defaults are preserved, but \"index\", which is set to False." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.ExcelDataSet" } } - }, - "then": { - "required": ["filepath"], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a Excel file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "engine": { - "type": "string", - "description": "The engine used to write to excel files. The default\nengine is 'xlsxwriter'." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading Excel files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_excel.html\nAll defaults are preserved, but \"engine\", which is set to \"xlrd\"." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving Excel files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_excel.html\nAll defaults are preserved, but \"index\", which is set to False.\nIf you would like to specify options for the `ExcelWriter`,\nyou can include them under the \"writer\" key. Here you can\nfind all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.ExcelWriter.html" - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - } - } - } - }, - { - "if": { - "properties": { "type": { "const": "pandas.GBQTableDataSet" } } - }, - "then": { - "required": ["dataset", "table_name"], - "properties": { - "dataset": { - "type": "string", - "description": "Google BigQuery dataset." - }, - "table_name": { - "type": "string", - "description": "Google BigQuery table name." - }, - "project": { - "type": "string", - "description": "Google BigQuery Account project ID.\nOptional when available from the environment.\nhttps://cloud.google.com/resource-manager/docs/creating-managing-projects" - }, - "credentials": { - "pattern": ".*", - "description": "Credentials for accessing Google APIs.\nEither ``google.auth.credentials.Credentials`` object or dictionary with\nparameters required to instantiate ``google.oauth2.credentials.Credentials``.\nHere you can find all the arguments:\nhttps://google-auth.readthedocs.io/en/latest/reference/google.oauth2.credentials.html" - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading BigQuery table into DataFrame.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_gbq.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving DataFrame to BigQuery table.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_gbq.html\nAll defaults are preserved, but \"progress_bar\", which is set to False." - } - } - } - } - ] - } - } -} diff --git a/static/jsonschema/kedro-catalog-0.18.json b/static/jsonschema/kedro-catalog-0.18.json deleted file mode 100644 index 13c010e5ce..0000000000 --- a/static/jsonschema/kedro-catalog-0.18.json +++ /dev/null @@ -1,1423 +0,0 @@ -{ - "type": "object", - "patternProperties": { - "^[a-z0-9-_]+$": { - "required": [ - "type" - ], - "properties": { - "type": { - "type": "string", - "enum": [ - "CachedDataSet", - "IncrementalDataSet", - "MemoryDataSet", - "LambdaDataSet", - "PartitionedDataSet", - "api.APIDataSet", - "biosequence.BioSequenceDataSet", - "dask.ParquetDataSet", - "email.EmailMessageDataSet", - "geopandas.GeoJSONDataSet", - "holoviews.HoloviewsWriter", - "json.JSONDataSet", - "matplotlib.MatplotlibWriter", - "networkx.NetworkXDataSet", - "pandas.CSVDataSet", - "pandas.ExcelDataSet", - "pandas.FeatherDataSet", - "pandas.GBQTableDataSet", - "pandas.HDFDataSet", - "pandas.JSONDataSet", - "pandas.ParquetDataSet", - "pandas.SQLTableDataSet", - "pandas.SQLQueryDataSet", - "pandas.XMLDataSet", - "pillow.ImageDataSet", - "pickle.PickleDataSet", - "plotly.PlotlyDataSet", - "redis.PickleDataSet", - "spark.SparkDataSet", - "spark.SparkHiveDataSet", - "spark.SparkJDBCDataSet", - "tensorflow.TensorFlowModelDataset", - "text.TextDataSet", - "tracking.JSONDataSet", - "tracking.MetricsDataSet", - "yaml.YAMLDataSet" - ] - } - }, - "allOf": [ - { - "if": { - "properties": { - "type": { - "const": "CachedDataSet" - } - } - }, - "then": { - "required": [ - "dataset" - ], - "properties": { - "dataset": { - "pattern": ".*", - "description": "A Kedro DataSet object or a dictionary to cache." - }, - "copy_mode": { - "type": "string", - "description": "The copy mode used to copy the data. Possible\nvalues are: \"deepcopy\", \"copy\" and \"assign\". If not\nprovided, it is inferred based on the data type." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "IncrementalDataSet" - } - } - }, - "then": { - "required": [ - "path", - "dataset" - ], - "properties": { - "path": { - "type": "string", - "description": "Path to the folder containing partitioned data.\nIf path starts with the protocol (e.g., ``s3://``) then the\ncorresponding ``fsspec`` concrete filesystem implementation will\nbe used. If protocol is not specified,\n``fsspec.implementations.local.LocalFileSystem`` will be used.\n**Note:** Some concrete implementations are bundled with ``fsspec``,\nwhile others (like ``s3`` or ``gcs``) must be installed separately\nprior to usage of the ``PartitionedDataSet``." - }, - "dataset": { - "pattern": ".*", - "description": "Underlying dataset definition. This is used to instantiate\nthe dataset for each file located inside the ``path``.\nAccepted formats are:\na) object of a class that inherits from ``AbstractDataSet``\nb) a string representing a fully qualified class name to such class\nc) a dictionary with ``type`` key pointing to a string from b),\nother keys are passed to the Dataset initializer.\nCredentials for the dataset can be explicitly specified in\nthis configuration." - }, - "checkpoint": { - "pattern": "object", - "description": "Optional checkpoint configuration. Accepts a dictionary\nwith the corresponding dataset definition including ``filepath``\n(unlike ``dataset`` argument). Checkpoint configuration is\ndescribed here:\nhttps://kedro.readthedocs.io/en/0.18.0/data/kedro_io.html#checkpoint-configuration\nCredentials for the checkpoint can be explicitly specified\nin this configuration." - }, - "filepath_arg": { - "type": "string", - "description": "Underlying dataset initializer argument that will\ncontain a path to each corresponding partition file.\nIf unspecified, defaults to \"filepath\"." - }, - "filename_suffix": { - "type": "string", - "description": "If specified, only partitions that end with this\nstring will be processed." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Protocol-specific options that will be passed to\n``fsspec.filesystem``\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.filesystem\nand the dataset initializer. If the dataset config contains\nexplicit credentials spec, then such spec will take precedence.\nAll possible credentials management scenarios are documented here:\nhttps://kedro.readthedocs.io/en/0.18.0/data/kedro_io.html#partitioned-dataset-credentials" - }, - "load_args": { - "type": "object", - "description": "Keyword arguments to be passed into ``find()`` method of\nthe filesystem implementation." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``)" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "MemoryDataSet" - } - } - }, - "then": { - "required": [], - "properties": { - "data": { - "pattern": ".*", - "description": "Python object containing the data." - }, - "copy_mode": { - "type": "string", - "description": "The copy mode used to copy the data. Possible\nvalues are: \"deepcopy\", \"copy\" and \"assign\". If not\nprovided, it is inferred based on the data type." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "LambdaDataSet" - } - } - }, - "then": { - "required": [ - "load", - "save" - ], - "properties": { - "load": { - "pattern": ".*", - "description": "Method to load data from a data set." - }, - "save": { - "pattern": ".*", - "description": "Method to save data to a data set." - }, - "exists": { - "pattern": ".*", - "description": "Method to check whether output data already exists." - }, - "release": { - "pattern": ".*", - "description": "Method to release any cached information." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "PartitionedDataSet" - } - } - }, - "then": { - "required": [ - "path", - "dataset" - ], - "properties": { - "path": { - "type": "string", - "description": "Path to the folder containing partitioned data.\nIf path starts with the protocol (e.g., ``s3://``) then the\ncorresponding ``fsspec`` concrete filesystem implementation will\nbe used. If protocol is not specified,\n``fsspec.implementations.local.LocalFileSystem`` will be used.\n**Note:** Some concrete implementations are bundled with ``fsspec``,\nwhile others (like ``s3`` or ``gcs``) must be installed separately\nprior to usage of the ``PartitionedDataSet``." - }, - "dataset": { - "pattern": ".*", - "description": "Underlying dataset definition. This is used to instantiate\nthe dataset for each file located inside the ``path``.\nAccepted formats are:\na) object of a class that inherits from ``AbstractDataSet``\nb) a string representing a fully qualified class name to such class\nc) a dictionary with ``type`` key pointing to a string from b),\nother keys are passed to the Dataset initializer.\nCredentials for the dataset can be explicitly specified in\nthis configuration." - }, - "filepath_arg": { - "type": "string", - "description": "Underlying dataset initializer argument that will\ncontain a path to each corresponding partition file.\nIf unspecified, defaults to \"filepath\"." - }, - "filename_suffix": { - "type": "string", - "description": "If specified, only partitions that end with this\nstring will be processed." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Protocol-specific options that will be passed to\n``fsspec.filesystem``\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.filesystem\nand the dataset initializer. If the dataset config contains\nexplicit credentials spec, then such spec will take precedence.\nAll possible credentials management scenarios are documented here:\nhttps://kedro.readthedocs.io/en/0.18.0/data/kedro_io.html#partitioned-dataset-credentials" - }, - "load_args": { - "type": "object", - "description": "Keyword arguments to be passed into ``find()`` method of\nthe filesystem implementation." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``)" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "api.APIDataSet" - } - } - }, - "then": { - "required": [ - "url" - ], - "properties": { - "url": { - "type": "string", - "description": "The API URL endpoint." - }, - "method": { - "type": "string", - "description": "The Method of the request, GET, POST, PUT, DELETE, HEAD, etc..." - }, - "data": { - "pattern": ".*", - "description": "The request payload, used for POST, PUT, etc requests\nhttps://requests.readthedocs.io/en/master/user/quickstart/#more-complicated-post-requests" - }, - "params": { - "type": "object", - "description": "The url parameters of the API.\nhttps://requests.readthedocs.io/en/master/user/quickstart/#passing-parameters-in-urls" - }, - "headers": { - "type": "object", - "description": "The HTTP headers.\nhttps://requests.readthedocs.io/en/master/user/quickstart/#custom-headers" - }, - "auth": { - "pattern": ".*", - "description": "Anything ``requests`` accepts. Normally it's either ``('login', 'password')``,\nor ``AuthBase``, ``HTTPBasicAuth`` instance for more complex cases." - }, - "json": { - "pattern": ".*", - "description": "The request payload, used for POST, PUT, etc requests, passed in\nto the json kwarg in the requests object.\nhttps://requests.readthedocs.io/en/master/user/quickstart/#more-complicated-post-requests" - }, - "timeout": { - "type": "integer", - "description": "The wait time in seconds for a response, defaults to 1 minute.\nhttps://requests.readthedocs.io/en/master/user/quickstart/#timeouts" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "biosequence.BioSequenceDataSet" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to sequence file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``." - }, - "load_args": { - "type": "object", - "description": "Options for parsing sequence files by Biopython ``SeqIO.parse()``." - }, - "save_args": { - "type": "object", - "description": "file format supported by Biopython ``SeqIO.write()``.\nE.g. `{\"format\": \"fasta\"}`." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\n to pass to the filesystem's `open` method through nested keys\n `open_args_load` and `open_args_save`.\n Here you can find all available arguments for `open`:\n https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\n All defaults are preserved, except `mode`, which is set to `r` when loading\n and to `w` when saving.\n\nNote: Here you can find all supported file formats: https://biopython.org/wiki/SeqIO" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "dask.ParquetDataSet" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a parquet file\nparquet collection or the directory of a multipart parquet." - }, - "load_args": { - "type": "object", - "description": "Additional loading options `dask.dataframe.read_parquet`:\nhttps://docs.dask.org/en/latest/dataframe-api.html#dask.dataframe.read_parquet" - }, - "save_args": { - "type": "object", - "description": "Additional saving options for `dask.dataframe.to_parquet`:\nhttps://docs.dask.org/en/latest/dataframe-api.html#dask.dataframe.to_parquet" - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Optional parameters to the backend file system driver:\nhttps://docs.dask.org/en/latest/remote-data-services.html#optional-parameters" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "email.EmailMessageDataSet" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "``email`` options for parsing email messages (arguments passed\ninto ``email.parser.Parser.parse``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/email.parser.html#email.parser.Parser.parse\nIf you would like to specify options for the `Parser`,\nyou can include them under the \"parser\" key. Here you can\nfind all available arguments:\nhttps://docs.python.org/3/library/email.parser.html#email.parser.Parser\nAll defaults are preserved, but \"policy\", which is set to ``email.policy.default``." - }, - "save_args": { - "type": "object", - "description": "``email`` options for generating MIME documents (arguments passed into\n``email.generator.Generator.flatten``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/email.generator.html#email.generator.Generator.flatten\nIf you would like to specify options for the `Generator`,\nyou can include them under the \"generator\" key. Here you can\nfind all available arguments:\nhttps://docs.python.org/3/library/email.generator.html#email.generator.Generator\nAll defaults are preserved." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "geopandas.GeoJSONDataSet" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a GeoJSON file prefixed with a protocol like\n`s3://`. If prefix is not provided `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "GeoPandas options for loading GeoJSON files.\nHere you can find all available arguments:\nhttps://geopandas.org/reference/geopandas.read_file.html" - }, - "save_args": { - "type": "object", - "description": "GeoPandas options for saving geojson files.\nHere you can find all available arguments:\nhttps://geopandas.org/reference.html#geopandas.GeoDataFrame.to_file\nThe default_save_arg driver is 'GeoJSON', all others preserved." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "credentials required to access the underlying filesystem.\nEg. for ``GCFileSystem`` it would look like `{'token': None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "holoviews.HoloviewsWriter" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested key `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``S3FileSystem`` it should look like:\n`{'key': '', 'secret': ''}}`" - }, - "save_args": { - "type": "object", - "description": "Extra save args passed to `holoviews.save()`. See\nhttps://holoviews.org/reference_manual/holoviews.util.html#holoviews.util.save" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "json.JSONDataSet" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a JSON file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "save_args": { - "type": "object", - "description": "json options for saving JSON files (arguments passed\ninto ```json.dump``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/json.html\nAll defaults are preserved, but \"default_flow_style\", which is set to False." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "matplotlib.MatplotlibWriter" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a matplot object file(s) prefixed with a protocol\nlike `s3://`. If prefix is not provided, `file` protocol (local filesystem) will be\nused. The prefix should be any protocol supported by ``fsspec``." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested key `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``S3FileSystem`` it should look like:\n`{'key': '', 'secret': ''}}`" - }, - "save_args": { - "type": "object", - "description": "Save args passed to `plt.savefig`. See\nhttps://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "networkx.NetworkXDataSet" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to the NetworkX graph JSON file." - }, - "load_args": { - "type": "object", - "description": "Arguments passed on to ```networkx.node_link_graph``.\nSee the details in\nhttps://networkx.github.io/documentation/networkx-1.9.1/reference/generated/networkx.readwrite.json_graph.node_link_graph.html" - }, - "save_args": { - "type": "object", - "description": "Arguments passed on to ```networkx.node_link_data``.\nSee the details in\nhttps://networkx.github.io/documentation/networkx-1.9.1/reference/generated/networkx.readwrite.json_graph.node_link_data.html" - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.CSVDataSet" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a CSV file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading CSV files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving CSV files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html\nAll defaults are preserved, but \"index\", which is set to False." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.ExcelDataSet" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a Excel file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "engine": { - "type": "string", - "description": "The engine used to write to excel files. The default\nengine is 'xlsxwriter'." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading Excel files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_excel.html\nAll defaults are preserved, but \"engine\", which is set to \"xlrd\"." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving Excel files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_excel.html\nAll defaults are preserved, but \"index\", which is set to False.\nIf you would like to specify options for the `ExcelWriter`,\nyou can include them under the \"writer\" key. Here you can\nfind all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.ExcelWriter.html" - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.FeatherDataSet" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a feather file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading feather files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_feather.html\nAll defaults are preserved." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.GBQTableDataSet" - } - } - }, - "then": { - "required": [ - "dataset", - "table_name" - ], - "properties": { - "dataset": { - "type": "string", - "description": "Google BigQuery dataset." - }, - "table_name": { - "type": "string", - "description": "Google BigQuery table name." - }, - "project": { - "type": "string", - "description": "Google BigQuery Account project ID.\nOptional when available from the environment.\nhttps://cloud.google.com/resource-manager/docs/creating-managing-projects" - }, - "credentials": { - "pattern": ".*", - "description": "Credentials for accessing Google APIs.\nEither ``google.auth.credentials.Credentials`` object or dictionary with\nparameters required to instantiate ``google.oauth2.credentials.Credentials``.\nHere you can find all the arguments:\nhttps://google-auth.readthedocs.io/en/latest/reference/google.oauth2.credentials.html" - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading BigQuery table into DataFrame.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_gbq.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving DataFrame to BigQuery table.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_gbq.html\nAll defaults are preserved, but \"progress_bar\", which is set to False." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.HDFDataSet" - } - } - }, - "then": { - "required": [ - "filepath", - "key" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a hdf file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "key": { - "type": "string", - "description": "Identifier to the group in the HDF store." - }, - "load_args": { - "type": "object", - "description": "PyTables options for loading hdf files.\nYou can find all available arguments at:\nhttps://www.pytables.org/usersguide/libref/top_level.html#tables.open_file\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "PyTables options for saving hdf files.\nYou can find all available arguments at:\nhttps://www.pytables.org/usersguide/libref/top_level.html#tables.open_file\nAll defaults are preserved." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set `wb` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.JSONDataSet" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a JSON file prefixed with a protocol like `s3://`.\nIf prefix is not provided `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading JSON files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_json.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving JSON files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_json.html\nAll defaults are preserved, but \"index\", which is set to False." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{'token': None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.ParquetDataSet" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a Parquet file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nIt can also be a path to a directory. If the directory is\nprovided then it can be used for reading partitioned parquet files.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Additional options for loading Parquet file(s).\nHere you can find all available arguments when reading single file:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_parquet.html\nHere you can find all available arguments when reading partitioned datasets:\nhttps://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html#pyarrow.parquet.ParquetDataset.read\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Additional saving options for `pyarrow.parquet.write_table` and\n`pyarrow.Table.from_pandas`.\nHere you can find all available arguments for `write_table()`:\nhttps://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html?highlight=write_table#pyarrow.parquet.write_table\nThe arguments for `from_pandas()` should be passed through a nested\nkey: `from_pandas`. E.g.: `save_args = {\"from_pandas\": {\"preserve_index\": False}}`\nHere you can find all available arguments for `from_pandas()`:\nhttps://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.from_pandas" - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.SQLTableDataSet" - } - } - }, - "then": { - "required": [ - "table_name", - "credentials" - ], - "properties": { - "table_name": { - "type": "string", - "description": "The table name to load or save data to. It\noverwrites name in ``save_args`` and ``table_name``\nparameters in ``load_args``." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "A dictionary with a ``SQLAlchemy`` connection string.\nUsers are supposed to provide the connection string 'con'\nthrough credentials. It overwrites `con` parameter in\n``load_args`` and ``save_args`` in case it is provided. To find\nall supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "load_args": { - "type": "object", - "description": "Provided to underlying pandas ``read_sql_table``\nfunction along with the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_sql_table.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "save_args": { - "type": "object", - "description": "Provided to underlying pandas ``to_sql`` function along\nwith the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_sql.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls\nIt has ``index=False`` in the default parameters." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.SQLQueryDataSet" - } - } - }, - "then": { - "required": [ - "sql", - "credentials" - ], - "properties": { - "sql": { - "type": "string", - "description": "The sql query statement." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "A dictionary with a ``SQLAlchemy`` connection string.\nUsers are supposed to provide the connection string 'con'\nthrough credentials. It overwrites `con` parameter in\n``load_args`` and ``save_args`` in case it is provided. To find\nall supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "load_args": { - "type": "object", - "description": "Provided to underlying pandas ``read_sql_query``\nfunction along with the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_sql_query.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "execution_options": { - "type": "object", - "description": "A dictionary with non-SQL options for the connection\nto be applied to the underlying engine.\nTo find all supported execution options, see here:\nhttps://docs.sqlalchemy.org/en/12/core/connections.html#sqlalchemy.engine.Connection.execution_options \nNote that this is not a standard argument supported by pandas API, but could be useful for handling large datasets." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.XMLDataSet" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a XML file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading XML files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_xml.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving XML files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_xml.html\nAll defaults are preserved, but \"index\", which is set to False." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pickle.PickleDataSet" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a Pickle file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "backend": { - "type": "string", - "description": "Backend to use, must be one of ['pickle', 'joblib']. Defaults to 'pickle'." - }, - "load_args": { - "type": "object", - "description": "Pickle options for loading pickle files.\nHere you can find all available arguments for different backends:\npickle.load: https://docs.python.org/3/library/pickle.html#pickle.load\njoblib.load: https://joblib.readthedocs.io/en/latest/generated/joblib.load.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pickle options for saving pickle files.\nHere you can find all available arguments for different backends:\npickle.dump: https://docs.python.org/3/library/pickle.html#pickle.dump\njoblib.dump: https://joblib.readthedocs.io/en/latest/generated/joblib.dump.html\nAll defaults are preserved." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pillow.ImageDataSet" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to an image file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "save_args": { - "type": "object", - "description": "Pillow options for saving image files.\nHere you can find all available arguments:\nhttps://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.save\nAll defaults are preserved." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "plotly.PlotlyDataSet" - } - } - }, - "then": { - "required": [ - "filepath", - "plotly_args" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a JSON file prefixed with a protocol like `s3://`.\nIf prefix is not provided `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "plotly_args": { - "type": "object", - "description": "Plotly configuration for generating a plotly graph object Figure\nrepresenting the plotted data." - }, - "load_args": { - "type": "object", - "description": "Plotly options for loading JSON files.\nHere you can find all available arguments:\nhttps://plotly.com/python-api-reference/generated/plotly.io.from_json.html#plotly.io.from_json\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Plotly options for saving JSON files.\nHere you can find all available arguments:\nhttps://plotly.com/python-api-reference/generated/plotly.io.write_json.html\nAll defaults are preserved, but \"index\", which is set to False." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{'token': None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested key `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "redis.PickleDataSet" - } - } - }, - "then": { - "required": [ - "key" - ], - "properties": { - "key": { - "type": "string", - "description": "The key to use for saving/loading object to Redis." - }, - "backend": { - "type": "string", - "description": "Backend to use, must be an import path to a module which satisfies the ``pickle`` interface.\nThat is, contains a `loads` and `dumps` function. Defaults to 'pickle'." - }, - "load_args": { - "type": "object", - "description": "Pickle options for loading pickle files.\nHere you can find all available arguments:\nhttps://docs.python.org/3/library/pickle.html#pickle.loads\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pickle options for saving pickle files.\nHere you can find all available arguments:\nhttps://docs.python.org/3/library/pickle.html#pickle.dumps\nAll defaults are preserved." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the redis server." - }, - "redis_args": { - "type": "object", - "description": "Extra arguments to pass into the redis client constructor ``redis.StrictRedis.from_url``, as well as to pass to the ``redis.StrictRedis.set``" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "spark.SparkDataSet" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a Spark dataframe. When using Databricks\nand working with data written to mount path points,\nspecify ``filepath``s for (versioned) ``SparkDataSet``s\nstarting with ``/dbfs/mnt``." - }, - "file_format": { - "type": "string", - "description": "File format used during load and save\noperations. These are formats supported by the running\nSparkContext include parquet, csv. For a list of supported\nformats please refer to Apache Spark documentation at\nhttps://spark.apache.org/docs/latest/sql-programming-guide.html" - }, - "load_args": { - "type": "object", - "description": "Load args passed to Spark DataFrameReader load method.\nIt is dependent on the selected file format. You can find\na list of read options for each supported format\nin Spark DataFrame read documentation:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrame.html" - }, - "save_args": { - "type": "object", - "description": "Save args passed to Spark DataFrame write options.\nSimilar to load_args this is dependent on the selected file\nformat. You can pass ``mode`` and ``partitionBy`` to specify\nyour overwrite mode and partitioning respectively. You can find\na list of options for each format in Spark DataFrame\nwrite documentation:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrame.html" - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials to access the S3 bucket, such as\n``key``, ``secret``, if ``filepath`` prefix is ``s3a://`` or ``s3n://``.\nOptional keyword arguments passed to ``hdfs.client.InsecureClient``\nif ``filepath`` prefix is ``hdfs://``. Ignored otherwise." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "spark.SparkHiveDataSet" - } - } - }, - "then": { - "required": [ - "database", - "table", - "write_mode" - ], - "properties": { - "database": { - "type": "string", - "description": "The name of the hive database." - }, - "table": { - "type": "string", - "description": "The name of the table within the database." - }, - "write_mode": { - "type": "string", - "description": "``insert``, ``upsert`` or ``overwrite`` are supported." - }, - "table_pk": { - "type": "array", - "description": "If performing an upsert, this identifies the primary key columns used to\nresolve preexisting data. Is required for ``write_mode=\"upsert\"``." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "spark.SparkJDBCDataSet" - } - } - }, - "then": { - "required": [ - "url", - "table" - ], - "properties": { - "url": { - "type": "string", - "description": "A JDBC URL of the form ``jdbc:subprotocol:subname``." - }, - "table": { - "type": "string", - "description": "The name of the table to load or save data to." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "A dictionary of JDBC database connection arguments.\nNormally at least properties ``user`` and ``password`` with\ntheir corresponding values. It updates ``properties``\nparameter in ``load_args`` and ``save_args`` in case it is\nprovided." - }, - "load_args": { - "type": "object", - "description": "Provided to underlying PySpark ``jdbc`` function along\nwith the JDBC URL and the name of the table. To find all\nsupported arguments, see here:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrameReader.jdbc.html" - }, - "save_args": { - "type": "object", - "description": "Provided to underlying PySpark ``jdbc`` function along\nwith the JDBC URL and the name of the table. To find all\nsupported arguments, see here:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrameWriter.jdbc.html" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "tensorflow.TensorFlowModelDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a TensorFlow model directory prefixed with a\nprotocol like `s3://`. If prefix is not provided `file` protocol (local filesystem)\nwill be used. The prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "TensorFlow options for loading models.\nHere you can find all available arguments:\nhttps://www.tensorflow.org/api_docs/python/tf/keras/models/load_model\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "TensorFlow options for saving models.\nHere you can find all available arguments:\nhttps://www.tensorflow.org/api_docs/python/tf/keras/models/save_model\nAll defaults are preserved, except for \"save_format\", which is set to \"tf\"." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{'token': None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``)." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "text.TextDataSet" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "tracking.JSONDataSet" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "save_args": { - "type": "object", - "description": "json options for saving JSON files (arguments passed\ninto ```json.dump``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/json.html\nAll defaults are preserved, but \"default_flow_style\", which is set to False." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "tracking.MetricsDataSet" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "save_args": { - "type": "object", - "description": "json options for saving JSON files (arguments passed\ninto ```json.dump``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/json.html\nAll defaults are preserved, but \"default_flow_style\", which is set to False." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "yaml.YAMLDataSet" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a YAML file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "save_args": { - "type": "object", - "description": "PyYAML options for saving YAML files (arguments passed\ninto ```yaml.dump``). Here you can find all available arguments:\nhttps://pyyaml.org/wiki/PyYAMLDocumentation\nAll defaults are preserved, but \"default_flow_style\", which is set to False." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - } - ] - } - } -} diff --git a/static/jsonschema/kedro-catalog-0.19.json b/static/jsonschema/kedro-catalog-0.19.json deleted file mode 100644 index f7c271b01e..0000000000 --- a/static/jsonschema/kedro-catalog-0.19.json +++ /dev/null @@ -1,1470 +0,0 @@ -{ - "type": "object", - "patternProperties": { - "^[a-z0-9-_]+$": { - "required": [ - "type" - ], - "properties": { - "type": { - "type": "string", - "enum": [ - "CachedDataset", - "IncrementalDataset", - "MemoryDataset", - "LambdaDataset", - "partitions.PartitionedDataset", - "api.APIDataset", - "biosequence.BioSequenceDataset", - "dask.ParquetDataset", - "email.EmailMessageDataset", - "geopandas.GeoJSONDataset", - "holoviews.HoloviewsWriter", - "huggingface.HFDataset", - "huggingface.HFTransformerPipelineDataset", - "json.JSONDataset", - "matplotlib.MatplotlibWriter", - "networkx.NetworkXDataset", - "pandas.CSVDataset", - "pandas.ExcelDataset", - "pandas.FeatherDataset", - "pandas.GBQTableDataset", - "pandas.HDFDataset", - "pandas.JSONDataset", - "pandas.ParquetDataset", - "pandas.SQLTableDataset", - "pandas.SQLQueryDataset", - "pandas.XMLDataset", - "pillow.ImageDataset", - "pickle.PickleDataset", - "plotly.PlotlyDataset", - "redis.PickleDataset", - "spark.SparkDataset", - "spark.SparkHiveDataset", - "spark.SparkJDBCDataset", - "tensorflow.TensorFlowModelDataset", - "text.TextDataset", - "tracking.JSONDataset", - "tracking.MetricsDataset", - "yaml.YAMLDataset" - ] - } - }, - "allOf": [ - { - "if": { - "properties": { - "type": { - "const": "CachedDataset" - } - } - }, - "then": { - "required": [ - "dataset" - ], - "properties": { - "dataset": { - "pattern": ".*", - "description": "A Kedro Dataset object or a dictionary to cache." - }, - "copy_mode": { - "type": "string", - "description": "The copy mode used to copy the data. Possible\nvalues are: \"deepcopy\", \"copy\" and \"assign\". If not\nprovided, it is inferred based on the data type." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "IncrementalDataset" - } - } - }, - "then": { - "required": [ - "path", - "dataset" - ], - "properties": { - "path": { - "type": "string", - "description": "Path to the folder containing partitioned data.\nIf path starts with the protocol (e.g., ``s3://``) then the\ncorresponding ``fsspec`` concrete filesystem implementation will\nbe used. If protocol is not specified,\n``fsspec.implementations.local.LocalFileSystem`` will be used.\n**Note:** Some concrete implementations are bundled with ``fsspec``,\nwhile others (like ``s3`` or ``gcs``) must be installed separately\nprior to usage of the ``PartitionedDataset``." - }, - "dataset": { - "pattern": ".*", - "description": "Underlying dataset definition. This is used to instantiate\nthe dataset for each file located inside the ``path``.\nAccepted formats are:\na) object of a class that inherits from ``AbstractDataset``\nb) a string representing a fully qualified class name to such class\nc) a dictionary with ``type`` key pointing to a string from b),\nother keys are passed to the Dataset initializer.\nCredentials for the dataset can be explicitly specified in\nthis configuration." - }, - "checkpoint": { - "pattern": "object", - "description": "Optional checkpoint configuration. Accepts a dictionary\nwith the corresponding dataset definition including ``filepath``\n(unlike ``dataset`` argument). Checkpoint configuration is\ndescribed here:\nhttps://kedro.readthedocs.io/en/0.19.0/data/kedro_io.html#checkpoint-configuration\nCredentials for the checkpoint can be explicitly specified\nin this configuration." - }, - "filepath_arg": { - "type": "string", - "description": "Underlying dataset initializer argument that will\ncontain a path to each corresponding partition file.\nIf unspecified, defaults to \"filepath\"." - }, - "filename_suffix": { - "type": "string", - "description": "If specified, only partitions that end with this\nstring will be processed." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Protocol-specific options that will be passed to\n``fsspec.filesystem``\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.filesystem\nand the dataset initializer. If the dataset config contains\nexplicit credentials spec, then such spec will take precedence.\nAll possible credentials management scenarios are documented here:\nhttps://kedro.readthedocs.io/en/0.19.0/data/kedro_io.html#partitioned-dataset-credentials" - }, - "load_args": { - "type": "object", - "description": "Keyword arguments to be passed into ``find()`` method of\nthe filesystem implementation." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``)" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "MemoryDataset" - } - } - }, - "then": { - "required": [], - "properties": { - "data": { - "pattern": ".*", - "description": "Python object containing the data." - }, - "copy_mode": { - "type": "string", - "description": "The copy mode used to copy the data. Possible\nvalues are: \"deepcopy\", \"copy\" and \"assign\". If not\nprovided, it is inferred based on the data type." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "LambdaDataset" - } - } - }, - "then": { - "required": [ - "load", - "save" - ], - "properties": { - "load": { - "pattern": ".*", - "description": "Method to load data from a data set." - }, - "save": { - "pattern": ".*", - "description": "Method to save data to a data set." - }, - "exists": { - "pattern": ".*", - "description": "Method to check whether output data already exists." - }, - "release": { - "pattern": ".*", - "description": "Method to release any cached information." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "PartitionedDataset" - } - } - }, - "then": { - "required": [ - "path", - "dataset" - ], - "properties": { - "path": { - "type": "string", - "description": "Path to the folder containing partitioned data.\nIf path starts with the protocol (e.g., ``s3://``) then the\ncorresponding ``fsspec`` concrete filesystem implementation will\nbe used. If protocol is not specified,\n``fsspec.implementations.local.LocalFileSystem`` will be used.\n**Note:** Some concrete implementations are bundled with ``fsspec``,\nwhile others (like ``s3`` or ``gcs``) must be installed separately\nprior to usage of the ``PartitionedDataset``." - }, - "dataset": { - "pattern": ".*", - "description": "Underlying dataset definition. This is used to instantiate\nthe dataset for each file located inside the ``path``.\nAccepted formats are:\na) object of a class that inherits from ``AbstractDataset``\nb) a string representing a fully qualified class name to such class\nc) a dictionary with ``type`` key pointing to a string from b),\nother keys are passed to the Dataset initializer.\nCredentials for the dataset can be explicitly specified in\nthis configuration." - }, - "filepath_arg": { - "type": "string", - "description": "Underlying dataset initializer argument that will\ncontain a path to each corresponding partition file.\nIf unspecified, defaults to \"filepath\"." - }, - "filename_suffix": { - "type": "string", - "description": "If specified, only partitions that end with this\nstring will be processed." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Protocol-specific options that will be passed to\n``fsspec.filesystem``\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.filesystem\nand the dataset initializer. If the dataset config contains\nexplicit credentials spec, then such spec will take precedence.\nAll possible credentials management scenarios are documented here:\nhttps://kedro.readthedocs.io/en/0.19.0/data/kedro_io.html#partitioned-dataset-credentials" - }, - "load_args": { - "type": "object", - "description": "Keyword arguments to be passed into ``find()`` method of\nthe filesystem implementation." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``)" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "api.APIDataset" - } - } - }, - "then": { - "required": [ - "url" - ], - "properties": { - "url": { - "type": "string", - "description": "The API URL endpoint." - }, - "method": { - "type": "string", - "description": "The Method of the request, GET, POST, PUT, DELETE, HEAD, etc..." - }, - "data": { - "pattern": ".*", - "description": "The request payload, used for POST, PUT, etc requests\nhttps://requests.readthedocs.io/en/master/user/quickstart/#more-complicated-post-requests" - }, - "params": { - "type": "object", - "description": "The url parameters of the API.\nhttps://requests.readthedocs.io/en/master/user/quickstart/#passing-parameters-in-urls" - }, - "headers": { - "type": "object", - "description": "The HTTP headers.\nhttps://requests.readthedocs.io/en/master/user/quickstart/#custom-headers" - }, - "auth": { - "pattern": ".*", - "description": "Anything ``requests`` accepts. Normally it's either ``('login', 'password')``,\nor ``AuthBase``, ``HTTPBasicAuth`` instance for more complex cases." - }, - "json": { - "pattern": ".*", - "description": "The request payload, used for POST, PUT, etc requests, passed in\nto the json kwarg in the requests object.\nhttps://requests.readthedocs.io/en/master/user/quickstart/#more-complicated-post-requests" - }, - "timeout": { - "type": "integer", - "description": "The wait time in seconds for a response, defaults to 1 minute.\nhttps://requests.readthedocs.io/en/master/user/quickstart/#timeouts" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "biosequence.BioSequenceDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to sequence file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``." - }, - "load_args": { - "type": "object", - "description": "Options for parsing sequence files by Biopython ``SeqIO.parse()``." - }, - "save_args": { - "type": "object", - "description": "file format supported by Biopython ``SeqIO.write()``.\nE.g. `{\"format\": \"fasta\"}`." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\n to pass to the filesystem's `open` method through nested keys\n `open_args_load` and `open_args_save`.\n Here you can find all available arguments for `open`:\n https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\n All defaults are preserved, except `mode`, which is set to `r` when loading\n and to `w` when saving.\n\nNote: Here you can find all supported file formats: https://biopython.org/wiki/SeqIO" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "dask.ParquetDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a parquet file\nparquet collection or the directory of a multipart parquet." - }, - "load_args": { - "type": "object", - "description": "Additional loading options `dask.dataframe.read_parquet`:\nhttps://docs.dask.org/en/latest/dataframe-api.html#dask.dataframe.read_parquet" - }, - "save_args": { - "type": "object", - "description": "Additional saving options for `dask.dataframe.to_parquet`:\nhttps://docs.dask.org/en/latest/dataframe-api.html#dask.dataframe.to_parquet" - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Optional parameters to the backend file system driver:\nhttps://docs.dask.org/en/latest/remote-data-services.html#optional-parameters" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "email.EmailMessageDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "``email`` options for parsing email messages (arguments passed\ninto ``email.parser.Parser.parse``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/email.parser.html#email.parser.Parser.parse\nIf you would like to specify options for the `Parser`,\nyou can include them under the \"parser\" key. Here you can\nfind all available arguments:\nhttps://docs.python.org/3/library/email.parser.html#email.parser.Parser\nAll defaults are preserved, but \"policy\", which is set to ``email.policy.default``." - }, - "save_args": { - "type": "object", - "description": "``email`` options for generating MIME documents (arguments passed into\n``email.generator.Generator.flatten``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/email.generator.html#email.generator.Generator.flatten\nIf you would like to specify options for the `Generator`,\nyou can include them under the \"generator\" key. Here you can\nfind all available arguments:\nhttps://docs.python.org/3/library/email.generator.html#email.generator.Generator\nAll defaults are preserved." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "geopandas.GeoJSONDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a GeoJSON file prefixed with a protocol like\n`s3://`. If prefix is not provided `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "GeoPandas options for loading GeoJSON files.\nHere you can find all available arguments:\nhttps://geopandas.org/reference/geopandas.read_file.html" - }, - "save_args": { - "type": "object", - "description": "GeoPandas options for saving geojson files.\nHere you can find all available arguments:\nhttps://geopandas.org/reference.html#geopandas.GeoDataFrame.to_file\nThe default_save_arg driver is 'GeoJSON', all others preserved." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "credentials required to access the underlying filesystem.\nEg. for ``GCFileSystem`` it would look like `{'token': None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "holoviews.HoloviewsWriter" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested key `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``S3FileSystem`` it should look like:\n`{'key': '', 'secret': ''}}`" - }, - "save_args": { - "type": "object", - "description": "Extra save args passed to `holoviews.save()`. See\nhttps://holoviews.org/reference_manual/holoviews.util.html#holoviews.util.save" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "huggingface.HFDataset" - } - } - }, - "then": { - "required": [ - "dataset_name" - ], - "properties": { - "dataset_name": { - "type": "string", - "description": "Huggingface dataset name" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "huggingface.HFTransformerPipelineDataset" - } - } - }, - "then": { - "properties": { - "task": { - "type": "string", - "description": "Huggingface pipeline task name" - }, - "model_name": { - "type": "string", - "description": "Huggingface model name" - }, - "pipeline_kwargs": { - "type": "object", - "description": "Additional kwargs to be passed into the pipeline" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "json.JSONDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a JSON file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "save_args": { - "type": "object", - "description": "json options for saving JSON files (arguments passed\ninto ```json.dump``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/json.html\nAll defaults are preserved, but \"default_flow_style\", which is set to False." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "matplotlib.MatplotlibWriter" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a matplot object file(s) prefixed with a protocol\nlike `s3://`. If prefix is not provided, `file` protocol (local filesystem) will be\nused. The prefix should be any protocol supported by ``fsspec``." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested key `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``S3FileSystem`` it should look like:\n`{'key': '', 'secret': ''}}`" - }, - "save_args": { - "type": "object", - "description": "Save args passed to `plt.savefig`. See\nhttps://matplotlib.org/api/_as_gen/matplotlib.pyplot.savefig.html" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "networkx.NetworkXDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to the NetworkX graph JSON file." - }, - "load_args": { - "type": "object", - "description": "Arguments passed on to ```networkx.node_link_graph``.\nSee the details in\nhttps://networkx.github.io/documentation/networkx-1.9.1/reference/generated/networkx.readwrite.json_graph.node_link_graph.html" - }, - "save_args": { - "type": "object", - "description": "Arguments passed on to ```networkx.node_link_data``.\nSee the details in\nhttps://networkx.github.io/documentation/networkx-1.9.1/reference/generated/networkx.readwrite.json_graph.node_link_data.html" - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.CSVDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a CSV file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading CSV files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving CSV files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html\nAll defaults are preserved, but \"index\", which is set to False." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.ExcelDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a Excel file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "engine": { - "type": "string", - "description": "The engine used to write to excel files. The default\nengine is 'xlsxwriter'." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading Excel files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_excel.html\nAll defaults are preserved, but \"engine\", which is set to \"xlrd\"." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving Excel files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_excel.html\nAll defaults are preserved, but \"index\", which is set to False.\nIf you would like to specify options for the `ExcelWriter`,\nyou can include them under the \"writer\" key. Here you can\nfind all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.ExcelWriter.html" - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.FeatherDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a feather file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading feather files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_feather.html\nAll defaults are preserved." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.GBQTableDataset" - } - } - }, - "then": { - "required": [ - "dataset", - "table_name" - ], - "properties": { - "dataset": { - "type": "string", - "description": "Google BigQuery dataset." - }, - "table_name": { - "type": "string", - "description": "Google BigQuery table name." - }, - "project": { - "type": "string", - "description": "Google BigQuery Account project ID.\nOptional when available from the environment.\nhttps://cloud.google.com/resource-manager/docs/creating-managing-projects" - }, - "credentials": { - "pattern": ".*", - "description": "Credentials for accessing Google APIs.\nEither ``google.auth.credentials.Credentials`` object or dictionary with\nparameters required to instantiate ``google.oauth2.credentials.Credentials``.\nHere you can find all the arguments:\nhttps://google-auth.readthedocs.io/en/latest/reference/google.oauth2.credentials.html" - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading BigQuery table into DataFrame.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_gbq.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving DataFrame to BigQuery table.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_gbq.html\nAll defaults are preserved, but \"progress_bar\", which is set to False." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.HDFDataset" - } - } - }, - "then": { - "required": [ - "filepath", - "key" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a hdf file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "key": { - "type": "string", - "description": "Identifier to the group in the HDF store." - }, - "load_args": { - "type": "object", - "description": "PyTables options for loading hdf files.\nYou can find all available arguments at:\nhttps://www.pytables.org/usersguide/libref/top_level.html#tables.open_file\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "PyTables options for saving hdf files.\nYou can find all available arguments at:\nhttps://www.pytables.org/usersguide/libref/top_level.html#tables.open_file\nAll defaults are preserved." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set `wb` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.JSONDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a JSON file prefixed with a protocol like `s3://`.\nIf prefix is not provided `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading JSON files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_json.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving JSON files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_json.html\nAll defaults are preserved, but \"index\", which is set to False." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{'token': None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.ParquetDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a Parquet file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nIt can also be a path to a directory. If the directory is\nprovided then it can be used for reading partitioned parquet files.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Additional options for loading Parquet file(s).\nHere you can find all available arguments when reading single file:\nhttps://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_parquet.html\nHere you can find all available arguments when reading partitioned datasets:\nhttps://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html#pyarrow.parquet.ParquetDataset.read\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Additional saving options for `pyarrow.parquet.write_table` and\n`pyarrow.Table.from_pandas`.\nHere you can find all available arguments for `write_table()`:\nhttps://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html?highlight=write_table#pyarrow.parquet.write_table\nThe arguments for `from_pandas()` should be passed through a nested\nkey: `from_pandas`. E.g.: `save_args = {\"from_pandas\": {\"preserve_index\": False}}`\nHere you can find all available arguments for `from_pandas()`:\nhttps://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.from_pandas" - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.SQLTableDataset" - } - } - }, - "then": { - "required": [ - "table_name", - "credentials" - ], - "properties": { - "table_name": { - "type": "string", - "description": "The table name to load or save data to. It\noverwrites name in ``save_args`` and ``table_name``\nparameters in ``load_args``." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "A dictionary with a ``SQLAlchemy`` connection string.\nUsers are supposed to provide the connection string 'con'\nthrough credentials. It overwrites `con` parameter in\n``load_args`` and ``save_args`` in case it is provided. To find\nall supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "load_args": { - "type": "object", - "description": "Provided to underlying pandas ``read_sql_table``\nfunction along with the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_sql_table.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "save_args": { - "type": "object", - "description": "Provided to underlying pandas ``to_sql`` function along\nwith the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_sql.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls\nIt has ``index=False`` in the default parameters." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.SQLQueryDataset" - } - } - }, - "then": { - "required": [ - "sql", - "credentials" - ], - "properties": { - "sql": { - "type": "string", - "description": "The sql query statement." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "A dictionary with a ``SQLAlchemy`` connection string.\nUsers are supposed to provide the connection string 'con'\nthrough credentials. It overwrites `con` parameter in\n``load_args`` and ``save_args`` in case it is provided. To find\nall supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "load_args": { - "type": "object", - "description": "Provided to underlying pandas ``read_sql_query``\nfunction along with the connection string.\nTo find all supported arguments, see here:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_sql_query.html\nTo find all supported connection string formats, see here:\nhttps://docs.sqlalchemy.org/en/13/core/engines.html#database-urls" - }, - "execution_options": { - "type": "object", - "description": "A dictionary with non-SQL options for the connection\nto be applied to the underlying engine.\nTo find all supported execution options, see here:\nhttps://docs.sqlalchemy.org/en/12/core/connections.html#sqlalchemy.engine.Connection.execution_options \nNote that this is not a standard argument supported by pandas API, but could be useful for handling large datasets." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pandas.XMLDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a XML file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "Pandas options for loading XML files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_xml.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pandas options for saving XML files.\nHere you can find all available arguments:\nhttps://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_xml.html\nAll defaults are preserved, but \"index\", which is set to False." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pickle.PickleDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a Pickle file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "backend": { - "type": "string", - "description": "Backend to use, must be one of ['pickle', 'joblib']. Defaults to 'pickle'." - }, - "load_args": { - "type": "object", - "description": "Pickle options for loading pickle files.\nHere you can find all available arguments for different backends:\npickle.load: https://docs.python.org/3/library/pickle.html#pickle.load\njoblib.load: https://joblib.readthedocs.io/en/latest/generated/joblib.load.html\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pickle options for saving pickle files.\nHere you can find all available arguments for different backends:\npickle.dump: https://docs.python.org/3/library/pickle.html#pickle.dump\njoblib.dump: https://joblib.readthedocs.io/en/latest/generated/joblib.dump.html\nAll defaults are preserved." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "pillow.ImageDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to an image file prefixed with a protocol like\n`s3://`. If prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "save_args": { - "type": "object", - "description": "Pillow options for saving image files.\nHere you can find all available arguments:\nhttps://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.save\nAll defaults are preserved." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "plotly.PlotlyDataset" - } - } - }, - "then": { - "required": [ - "filepath", - "plotly_args" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a JSON file prefixed with a protocol like `s3://`.\nIf prefix is not provided `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "plotly_args": { - "type": "object", - "description": "Plotly configuration for generating a plotly graph object Figure\nrepresenting the plotted data." - }, - "load_args": { - "type": "object", - "description": "Plotly options for loading JSON files.\nHere you can find all available arguments:\nhttps://plotly.com/python-api-reference/generated/plotly.io.from_json.html#plotly.io.from_json\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Plotly options for saving JSON files.\nHere you can find all available arguments:\nhttps://plotly.com/python-api-reference/generated/plotly.io.write_json.html\nAll defaults are preserved, but \"index\", which is set to False." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{'token': None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested key `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `wb` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "redis.PickleDataset" - } - } - }, - "then": { - "required": [ - "key" - ], - "properties": { - "key": { - "type": "string", - "description": "The key to use for saving/loading object to Redis." - }, - "backend": { - "type": "string", - "description": "Backend to use, must be an import path to a module which satisfies the ``pickle`` interface.\nThat is, contains a `loads` and `dumps` function. Defaults to 'pickle'." - }, - "load_args": { - "type": "object", - "description": "Pickle options for loading pickle files.\nHere you can find all available arguments:\nhttps://docs.python.org/3/library/pickle.html#pickle.loads\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "Pickle options for saving pickle files.\nHere you can find all available arguments:\nhttps://docs.python.org/3/library/pickle.html#pickle.dumps\nAll defaults are preserved." - }, - "credentials": { - "type": "object", - "description": "Credentials required to get access to the redis server." - }, - "redis_args": { - "type": "object", - "description": "Extra arguments to pass into the redis client constructor ``redis.StrictRedis.from_url``, as well as to pass to the ``redis.StrictRedis.set``" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "spark.SparkDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a Spark dataframe. When using Databricks\nand working with data written to mount path points,\nspecify ``filepath``s for (versioned) ``SparkDataset``s\nstarting with ``/dbfs/mnt``." - }, - "file_format": { - "type": "string", - "description": "File format used during load and save\noperations. These are formats supported by the running\nSparkContext include parquet, csv. For a list of supported\nformats please refer to Apache Spark documentation at\nhttps://spark.apache.org/docs/latest/sql-programming-guide.html" - }, - "load_args": { - "type": "object", - "description": "Load args passed to Spark DataFrameReader load method.\nIt is dependent on the selected file format. You can find\na list of read options for each supported format\nin Spark DataFrame read documentation:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrame.html" - }, - "save_args": { - "type": "object", - "description": "Save args passed to Spark DataFrame write options.\nSimilar to load_args this is dependent on the selected file\nformat. You can pass ``mode`` and ``partitionBy`` to specify\nyour overwrite mode and partitioning respectively. You can find\na list of options for each format in Spark DataFrame\nwrite documentation:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrame.html" - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials to access the S3 bucket, such as\n``key``, ``secret``, if ``filepath`` prefix is ``s3a://`` or ``s3n://``.\nOptional keyword arguments passed to ``hdfs.client.InsecureClient``\nif ``filepath`` prefix is ``hdfs://``. Ignored otherwise." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "spark.SparkHiveDataset" - } - } - }, - "then": { - "required": [ - "database", - "table", - "write_mode" - ], - "properties": { - "database": { - "type": "string", - "description": "The name of the hive database." - }, - "table": { - "type": "string", - "description": "The name of the table within the database." - }, - "write_mode": { - "type": "string", - "description": "``insert``, ``upsert`` or ``overwrite`` are supported." - }, - "table_pk": { - "type": "array", - "description": "If performing an upsert, this identifies the primary key columns used to\nresolve preexisting data. Is required for ``write_mode=\"upsert\"``." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "spark.SparkJDBCDataset" - } - } - }, - "then": { - "required": [ - "url", - "table" - ], - "properties": { - "url": { - "type": "string", - "description": "A JDBC URL of the form ``jdbc:subprotocol:subname``." - }, - "table": { - "type": "string", - "description": "The name of the table to load or save data to." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "A dictionary of JDBC database connection arguments.\nNormally at least properties ``user`` and ``password`` with\ntheir corresponding values. It updates ``properties``\nparameter in ``load_args`` and ``save_args`` in case it is\nprovided." - }, - "load_args": { - "type": "object", - "description": "Provided to underlying PySpark ``jdbc`` function along\nwith the JDBC URL and the name of the table. To find all\nsupported arguments, see here:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrameReader.jdbc.html" - }, - "save_args": { - "type": "object", - "description": "Provided to underlying PySpark ``jdbc`` function along\nwith the JDBC URL and the name of the table. To find all\nsupported arguments, see here:\nhttps://spark.apache.org/docs/latest/api/python/reference/api/pyspark.sql.DataFrameWriter.jdbc.html" - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "tensorflow.TensorFlowModelDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a TensorFlow model directory prefixed with a\nprotocol like `s3://`. If prefix is not provided `file` protocol (local filesystem)\nwill be used. The prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "load_args": { - "type": "object", - "description": "TensorFlow options for loading models.\nHere you can find all available arguments:\nhttps://www.tensorflow.org/api_docs/python/tf/keras/models/load_model\nAll defaults are preserved." - }, - "save_args": { - "type": "object", - "description": "TensorFlow options for saving models.\nHere you can find all available arguments:\nhttps://www.tensorflow.org/api_docs/python/tf/keras/models/save_model\nAll defaults are preserved, except for \"save_format\", which is set to \"tf\"." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{'token': None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``)." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "text.TextDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "tracking.JSONDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "save_args": { - "type": "object", - "description": "json options for saving JSON files (arguments passed\ninto ```json.dump``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/json.html\nAll defaults are preserved, but \"default_flow_style\", which is set to False." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "tracking.MetricsDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a text file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "save_args": { - "type": "object", - "description": "json options for saving JSON files (arguments passed\ninto ```json.dump``). Here you can find all available arguments:\nhttps://docs.python.org/3/library/json.html\nAll defaults are preserved, but \"default_flow_style\", which is set to False." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - }, - { - "if": { - "properties": { - "type": { - "const": "yaml.YAMLDataset" - } - } - }, - "then": { - "required": [ - "filepath" - ], - "properties": { - "filepath": { - "type": "string", - "description": "Filepath in POSIX format to a YAML file prefixed with a protocol like `s3://`.\nIf prefix is not provided, `file` protocol (local filesystem) will be used.\nThe prefix should be any protocol supported by ``fsspec``.\nNote: `http(s)` doesn't support versioning." - }, - "save_args": { - "type": "object", - "description": "PyYAML options for saving YAML files (arguments passed\ninto ```yaml.dump``). Here you can find all available arguments:\nhttps://pyyaml.org/wiki/PyYAMLDocumentation\nAll defaults are preserved, but \"default_flow_style\", which is set to False." - }, - "credentials": { - "type": [ - "object", - "string" - ], - "description": "Credentials required to get access to the underlying filesystem.\nE.g. for ``GCSFileSystem`` it should look like `{\"token\": None}`." - }, - "fs_args": { - "type": "object", - "description": "Extra arguments to pass into underlying filesystem class constructor\n(e.g. `{\"project\": \"my-project\"}` for ``GCSFileSystem``), as well as\nto pass to the filesystem's `open` method through nested keys\n`open_args_load` and `open_args_save`.\nHere you can find all available arguments for `open`:\nhttps://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open\nAll defaults are preserved, except `mode`, which is set to `r` when loading\nand to `w` when saving." - } - } - } - } - ] - } - } -} From 55261ac6b0b5ed3efdf68ac40259eb16834ee8ef Mon Sep 17 00:00:00 2001 From: Chris Schopp <56572144+chrisschopp@users.noreply.github.com> Date: Sun, 1 Dec 2024 05:06:15 +0000 Subject: [PATCH 2/7] Add description of change to `RELEASE.md` Signed-off-by: Chris Schopp <56572144+chrisschopp@users.noreply.github.com> --- RELEASE.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index 0cc0fdf013..7a2b33e9d0 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -5,10 +5,13 @@ ## Bug fixes and other changes * Added validation to ensure dataset versions consistency across catalog. +* Moved `kedro-catalog` JSON schema to `kedro-datasets`. ## Breaking changes to the API ## Documentation changes ## Community contributions +Many thanks to the following Kedroids for contributing PRs to this release: +* [Chris Schopp](https://github.com/chrisschopp) # Release 0.19.10 From 1b3436eaf1dbc4d521372560f5c76672a9e745f3 Mon Sep 17 00:00:00 2001 From: Chris Schopp <56572144+chrisschopp@users.noreply.github.com> Date: Wed, 4 Dec 2024 02:23:01 +0000 Subject: [PATCH 3/7] Update refs to `jsonschema` to use `kedro-plugins/kedro-datasets/` Signed-off-by: GitHub --- docs/source/data/how_to_create_a_custom_dataset.md | 2 +- docs/source/development/set_up_pycharm.md | 4 ++-- docs/source/development/set_up_vscode.md | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/source/data/how_to_create_a_custom_dataset.md b/docs/source/data/how_to_create_a_custom_dataset.md index 7f39987dd7..a4289931d4 100644 --- a/docs/source/data/how_to_create_a_custom_dataset.md +++ b/docs/source/data/how_to_create_a_custom_dataset.md @@ -599,6 +599,6 @@ kedro-plugins/kedro-datasets/kedro_datasets/image There are two special considerations when contributing a dataset: 1. Add the dataset to `kedro_datasets.rst` so it shows up in the API documentation. - 2. Add the dataset to `static/jsonschema/kedro-catalog-X.json` for IDE validation. + 2. Add the dataset to `kedro-plugins/kedro-datasets/static/jsonschema/kedro-catalog-X.json` for IDE validation. ``` diff --git a/docs/source/development/set_up_pycharm.md b/docs/source/development/set_up_pycharm.md index fd2e8f8a94..501cc609ff 100644 --- a/docs/source/development/set_up_pycharm.md +++ b/docs/source/development/set_up_pycharm.md @@ -163,10 +163,10 @@ You can enable the Kedro catalog validation schema in your PyCharm IDE to enable ![](../meta/images/pycharm_edit_schema_mapping.png) -Add a new mapping using the "+" button in the top left of the window and select the name you want for it. Enter this URL `https://raw.githubusercontent.com/kedro-org/kedro/develop/static/jsonschema/kedro-catalog-0.19.json` in the "Schema URL" field and select "JSON Schema Version 7" in the "Schema version" field. +Add a new mapping using the "+" button in the top left of the window and select the name you want for it. Enter this URL `https://raw.githubusercontent.com/kedro-org/kedro-plugins/main/kedro-datasets/static/jsonschema/kedro-catalog-0.19.json` in the "Schema URL" field and select "JSON Schema Version 7" in the "Schema version" field. Add the following file path pattern to the mapping: `conf/**/*catalog*`. ![](../meta/images/pycharm_catalog_schema_mapping.png) -> [Different schemas for different Kedro versions can be found in the Kedro repository](https://github.com/kedro-org/kedro/tree/main/static/jsonschema). +> [Different schemas for different Kedro versions can be found in the `kedro-datasets` repository](https://github.com/kedro-org/kedro-plugins/tree/main/kedro-datasets/static/jsonschema). diff --git a/docs/source/development/set_up_vscode.md b/docs/source/development/set_up_vscode.md index 0c94404e35..3429b31faa 100644 --- a/docs/source/development/set_up_vscode.md +++ b/docs/source/development/set_up_vscode.md @@ -260,11 +260,11 @@ Enter the following in your `settings.json` file: ```json { "yaml.schemas": { - "https://raw.githubusercontent.com/kedro-org/kedro/develop/static/jsonschema/kedro-catalog-0.19.json": "conf/**/*catalog*" + "https://raw.githubusercontent.com/kedro-org/kedro-plugins/main/kedro-datasets/static/jsonschema/kedro-catalog-0.19.json": "conf/**/*catalog*" } } ``` and start editing your `catalog` files. -> [Different schemas for different Kedro versions can be found in the Kedro repository](https://github.com/kedro-org/kedro/tree/main/static/jsonschema). +> [Different schemas for different Kedro versions can be found in the Kedro repository](https://github.com/kedro-org/kedro-plugins/tree/main/kedro-datasets/static/jsonschema). From fefea4e959b73e86eeabe46e5b60b5335356bf47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Fri, 10 Jan 2025 14:26:15 +0100 Subject: [PATCH 4/7] Update ignore-names.txt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Juan Luis Cano Rodríguez --- .github/styles/Kedro/ignore-names.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/styles/Kedro/ignore-names.txt b/.github/styles/Kedro/ignore-names.txt index 1573565750..2731d1b139 100644 --- a/.github/styles/Kedro/ignore-names.txt +++ b/.github/styles/Kedro/ignore-names.txt @@ -94,6 +94,7 @@ Puneet Rashida Ravi Richard +Schopp Schwarzmann Sorokin Stichbury From 9b025280f594185c0821bb481b1da5d7d2035e3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Luis=20Cano=20Rodr=C3=ADguez?= Date: Fri, 10 Jan 2025 14:27:58 +0100 Subject: [PATCH 5/7] Update ignore.txt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Juan Luis Cano Rodríguez --- .github/styles/Kedro/ignore.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/styles/Kedro/ignore.txt b/.github/styles/Kedro/ignore.txt index 3d568cddc9..ac2cd46723 100644 --- a/.github/styles/Kedro/ignore.txt +++ b/.github/styles/Kedro/ignore.txt @@ -1,5 +1,6 @@ Kedro Kedro's +Kedroids Kubeflow Databricks Conda From 836e7f4dd9524434f4dd9cf06f9662c4ce17cde4 Mon Sep 17 00:00:00 2001 From: Chris Schopp <56572144+chrisschopp@users.noreply.github.com> Date: Tue, 14 Jan 2025 02:35:53 +0000 Subject: [PATCH 6/7] Keep jsonschemas for CachedDataset, MemoryDataset, and LambdaDataset * These datasets remain in Kedro and were not moved to kedro-datasets Signed-off-by: GitHub --- static/img/kedro-catalog-0.19.json | 102 +++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 static/img/kedro-catalog-0.19.json diff --git a/static/img/kedro-catalog-0.19.json b/static/img/kedro-catalog-0.19.json new file mode 100644 index 0000000000..1be1c95976 --- /dev/null +++ b/static/img/kedro-catalog-0.19.json @@ -0,0 +1,102 @@ +{ + "type": "object", + "patternProperties": { + "^[a-z0-9-_]+$": { + "required": [ + "type" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "CachedDataset", + "MemoryDataset", + "LambdaDataset" + ] + } + }, + "allOf": [ + { + "if": { + "properties": { + "type": { + "const": "CachedDataset" + } + } + }, + "then": { + "required": [ + "dataset" + ], + "properties": { + "dataset": { + "pattern": ".*", + "description": "A Kedro Dataset object or a dictionary to cache." + }, + "copy_mode": { + "type": "string", + "description": "The copy mode used to copy the data. Possible\nvalues are: \"deepcopy\", \"copy\" and \"assign\". If not\nprovided, it is inferred based on the data type." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "MemoryDataset" + } + } + }, + "then": { + "required": [], + "properties": { + "data": { + "pattern": ".*", + "description": "Python object containing the data." + }, + "copy_mode": { + "type": "string", + "description": "The copy mode used to copy the data. Possible\nvalues are: \"deepcopy\", \"copy\" and \"assign\". If not\nprovided, it is inferred based on the data type." + } + } + } + }, + { + "if": { + "properties": { + "type": { + "const": "LambdaDataset" + } + } + }, + "then": { + "required": [ + "load", + "save" + ], + "properties": { + "load": { + "pattern": ".*", + "description": "Method to load data from a data set." + }, + "save": { + "pattern": ".*", + "description": "Method to save data to a data set." + }, + "exists": { + "pattern": ".*", + "description": "Method to check whether output data already exists." + }, + "release": { + "pattern": ".*", + "description": "Method to release any cached information." + } + } + } + } + ] + } + } + } + \ No newline at end of file From 1dbca25e6646019834a1dbbb76b6ddbafc46c9d9 Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Wed, 22 Jan 2025 11:17:12 +0000 Subject: [PATCH 7/7] Fix linter Signed-off-by: Merel Theisen --- static/img/kedro-catalog-0.19.json | 1 - 1 file changed, 1 deletion(-) diff --git a/static/img/kedro-catalog-0.19.json b/static/img/kedro-catalog-0.19.json index 1be1c95976..62a1f4b4dc 100644 --- a/static/img/kedro-catalog-0.19.json +++ b/static/img/kedro-catalog-0.19.json @@ -99,4 +99,3 @@ } } } - \ No newline at end of file