diff --git a/linkml_solr/cli.py b/linkml_solr/cli.py index afd3ba2..475558b 100644 --- a/linkml_solr/cli.py +++ b/linkml_solr/cli.py @@ -50,8 +50,10 @@ def main(verbose: int, quiet: bool): @click.option('--url', '-u', default=DEFAULT_SOLR_URL, help='solr url.') +@click.option('--processor', '-p', + help='Processor argument to pass when bulk loading to Solr') @click.argument('files', nargs=-1) -def bulkload(files, format, schema, url, core): +def bulkload(files, format, schema, url, core, processor=None): """ Convert multiple golr yaml schemas to linkml """ @@ -60,7 +62,7 @@ def bulkload(files, format, schema, url, core): with open(schema) as stream: schema_obj = yaml_loader.load(stream, target_class=SchemaDefinition) for f in files: - bulkload_file(f, format=format, schema=schema_obj, core=core, base_url=url) + bulkload_file(f, format=format, schema=schema_obj, core=core, base_url=url, processor=processor) @main.command() @click.option('--schema', '-s', diff --git a/linkml_solr/utils/solr_bulkload.py b/linkml_solr/utils/solr_bulkload.py index b4b375a..8a087ff 100644 --- a/linkml_solr/utils/solr_bulkload.py +++ b/linkml_solr/utils/solr_bulkload.py @@ -12,6 +12,7 @@ def bulkload_file(f, base_url=None, core=None, schema: SchemaDefinition = None, + processor: str = None, ): """ Bulkload a file using solr bulkload API @@ -21,6 +22,7 @@ def bulkload_file(f, :param base_url: :param core: :param schema: + :param processor: Processor argument to pass when bulk loading to Solr :return: """ mvslots = _get_multivalued_slots(schema) @@ -29,6 +31,8 @@ def bulkload_file(f, internal_separator = '%7C' parts = [f'f.{s}.split=true&f.{s}.separator={internal_separator}' for s in mvslots] url = f'{base_url}/{core}/update?{"&".join(parts)}&commit=true&separator={separator}' + if (processor is not None): + url = f'{url}&processor={processor}' if format == 'csv': ct = 'application/csv' elif format == 'json':