From 1f3e8c34802b17ac059a5c5ae98af025fb32ff99 Mon Sep 17 00:00:00 2001 From: Erick Martins Ratamero Date: Mon, 6 Nov 2023 08:55:05 -0500 Subject: [PATCH] Packing and unpacking figures (#67) * help text for figure pack/unpack * added arguments * stub `populate_figures` * grabbing correct Figures * adding figures to XML * unpacking Figures, updating IDs * flake8 * using Will's Figure infra * flake8 * tests passing! --- src/generate_omero_objects.py | 39 +++- src/generate_xml.py | 76 +++++++- src/omero_cli_transfer.py | 20 +- test/integration/test_figure.py | 325 ++++++++++++++++++++++++++++++++ 4 files changed, 453 insertions(+), 7 deletions(-) create mode 100644 test/integration/test_figure.py diff --git a/src/generate_omero_objects.py b/src/generate_omero_objects.py index 3bd0e47..6c3fdaa 100644 --- a/src/generate_omero_objects.py +++ b/src/generate_omero_objects.py @@ -149,7 +149,8 @@ def find_dataset(ds: Dataset, pjs: List[Project], conn: BlitzGateway) -> int: def create_annotations(ans: List[Annotation], conn: BlitzGateway, hash: str, - folder: str, metadata: List[str]) -> dict: + folder: str, figure: bool, img_map: dict, + metadata: List[str]) -> dict: ann_map = {} for an in ans: if isinstance(an, TagAnnotation): @@ -207,6 +208,11 @@ def create_annotations(ans: List[Annotation], conn: BlitzGateway, hash: str, comm_ann.save() ann_map[an.id] = comm_ann.getId() elif isinstance(an, FileAnnotation): + if an.namespace == "omero.web.figure.json": + if not figure: + continue + else: + update_figure_refs(an, ans, img_map, folder) original_file = create_original_file(an, ans, conn, folder) file_ann = FileAnnotationWrapper(conn) file_ann.setDescription(an.description) @@ -217,6 +223,30 @@ def create_annotations(ans: List[Annotation], conn: BlitzGateway, hash: str, return ann_map +def update_figure_refs(ann: FileAnnotation, ans: List[Annotation], + img_map: dict, folder: str): + curr_folder = str(Path('.').resolve()) + for an in ann.annotation_refs: + clean_id = int(an.id.split(":")[-1]) + if clean_id < 0: + cmnt_id = an.id + for an_loop in ans: + if an_loop.id == cmnt_id and isinstance(an_loop, CommentAnnotation): + fpath = str(an_loop.value) + dest_path = str(os.path.join(curr_folder, folder, '.', fpath)) + with open(dest_path, 'r') as file: + filedata = file.read() + for src_id, dest_id in img_map.items(): + clean_id = int(src_id.split(":")[-1]) + src_str = f"\"imageId\": {clean_id}" + dest_str = f"\"imageId\": {dest_id}" + print(src_str, dest_str) + filedata = filedata.replace(src_str, dest_str) + with open(dest_path, 'w') as file: + file.write(filedata) + return + + def create_original_file(ann: FileAnnotation, ans: List[Annotation], conn: BlitzGateway, folder: str ) -> OriginalFileWrapper: @@ -243,7 +273,7 @@ def create_plate_map(ome: OME, img_map: dict, conn: BlitzGateway file_path = "" for ann in ome.structured_annotations: if (ann.id in ann_ids and - type(ann) == CommentAnnotation and + isinstance(ann, CommentAnnotation) and int(ann.id.split(":")[-1]) < 0): newome.structured_annotations.remove(ann) map_ref_ids.append(ann.id) @@ -585,7 +615,8 @@ def rename_plates(pls: List[Plate], pl_map: dict, conn: BlitzGateway): def populate_omero(ome: OME, img_map: dict, conn: BlitzGateway, hash: str, - folder: str, metadata: List[str], merge: bool): + folder: str, metadata: List[str], merge: bool, + figure: bool): plate_map, ome = create_plate_map(ome, img_map, conn) rename_images(ome.images, img_map, conn) rename_plates(ome.plates, plate_map, conn) @@ -593,7 +624,7 @@ def populate_omero(ome: OME, img_map: dict, conn: BlitzGateway, hash: str, ds_map = create_or_set_datasets(ome.datasets, ome.projects, conn, merge) screen_map = create_or_set_screens(ome.screens, conn, merge) ann_map = create_annotations(ome.structured_annotations, conn, - hash, folder, metadata) + hash, folder, figure, img_map, metadata) create_rois(ome.rois, ome.images, img_map, conn) link_plates(ome, screen_map, plate_map, conn) link_datasets(ome, proj_map, ds_map, conn) diff --git a/src/generate_xml.py b/src/generate_xml.py index 5c156de..5c2fb2d 100644 --- a/src/generate_xml.py +++ b/src/generate_xml.py @@ -18,6 +18,7 @@ from ome_types.model import Point, Line, Rectangle, Ellipse, Polygon from ome_types.model import Polyline, Label, Shape from ome_types.model.map import M +from omero.sys import Parameters from omero.gateway import BlitzGateway from omero.model import TagAnnotationI, MapAnnotationI, FileAnnotationI from omero.model import CommentAnnotationI, LongAnnotationI, Fileset @@ -445,6 +446,20 @@ def create_filepath_annotations(id: str, conn: BlitzGateway, return anns, anrefs +def create_figure_annotations(id: str) -> Tuple[CommentAnnotation, + AnnotationRef]: + ns = id + clean_id = int(ns.split(":")[-1]) + f = f'figures/Figure_{clean_id}.json' + uid = (-1) * uuid4().int + an = CommentAnnotation(id=uid, + namespace=ns, + value=f + ) + anref = AnnotationRef(id=an.id) + return (an, anref) + + def create_provenance_metadata(conn: BlitzGateway, img_id: int, hostname: str, metadata: Union[List[str], None], plate: bool @@ -930,7 +945,7 @@ def list_file_ids(ome: OME) -> dict: def populate_xml(datatype: str, id: int, filepath: str, conn: BlitzGateway, - hostname: str, barchive: bool, simple: bool, + hostname: str, barchive: bool, simple: bool, figure: bool, metadata: List[str]) -> Tuple[OME, dict]: ome = OME() obj = conn.getObject(datatype, id) @@ -944,6 +959,8 @@ def populate_xml(datatype: str, id: int, filepath: str, conn: BlitzGateway, populate_screen(obj, ome, conn, hostname, metadata) elif datatype == 'Plate': populate_plate(obj, ome, conn, hostname, metadata) + if (not (barchive or simple)) and figure: + populate_figures(ome, conn, filepath) if not barchive: with open(filepath, 'w') as fp: print(to_xml(ome), file=fp) @@ -1012,6 +1029,63 @@ def populate_rocrate(datatype: str, ome: OME, filepath: str, return +def populate_figures(ome: OME, conn: BlitzGateway, filepath: str): + cli = CLI() + cli.loadplugins() + clean_img_ids = [] + for img in ome.images: + clean_img_ids.append(img.id.split(":")[-1]) + q = conn.getQueryService() + params = Parameters() + results = q.projection( + "SELECT f.id FROM FileAnnotation f" + " WHERE f.ns='omero.web.figure.json'", + params, + conn.SERVICE_OPTS + ) + figure_ids = [r[0].val for r in results] + if figure_ids: + parent = Path(filepath).parent + figure_dir = parent / "figures" + os.makedirs(figure_dir, exist_ok=True) + for fig in figure_ids: + filepath = figure_dir / ("Figure_" + str(fig) + ".json") + cmd = ['download', "FileAnnotation:" + str(fig), str(filepath)] + cli.invoke(cmd) + f = open(filepath, 'r').read() + has_images = False + for img in clean_img_ids: + searchterm = "\"imageId\": " + img + if searchterm in f: + has_images = True + if has_images: + fig_obj = conn.getObject("FileAnnotation", fig) + contents = fig_obj.getFile().getPath().encode() + b64 = base64.b64encode(contents) + length = len(b64) + fpath = os.path.join(fig_obj.getFile().getPath(), + fig_obj.getFile().getName()) + binaryfile = BinaryFile(file_name=fpath, + size=fig_obj.getFile().getSize(), + bin_data=BinData(big_endian=True, + length=length, + value=b64 + ) + ) + f, _ = create_file_ann_and_ref(id=fig_obj.getId(), + namespace=fig_obj.getNs(), + binary_file=binaryfile) + filepath_ann, ref = create_figure_annotations(f.id) + ome.structured_annotations.append(filepath_ann) + f.annotation_ref.append(ref) + ome.structured_annotations.append(f) + else: + os.remove(filepath) + if not os.listdir(figure_dir): + os.rmdir(figure_dir) + return + + def generate_columns(ome: OME, ids: dict) -> List[str]: columns = ["filename"] if [v for v in ids.values() if v.startswith("file_annotations")]: diff --git a/src/omero_cli_transfer.py b/src/omero_cli_transfer.py index c52c779..6316b9f 100644 --- a/src/omero_cli_transfer.py +++ b/src/omero_cli_transfer.py @@ -63,6 +63,9 @@ --zip packs the object into a compressed zip file rather than a tarball. +--figure includes OMERO.Figures; note that this can lead to a performance +hit and that Figures can reference images that are not included in your pack! + --barchive creates a package compliant with Bioimage Archive submission standards - see repo README for more detail. This package format is not compatible with unpack usage. @@ -109,6 +112,11 @@ already owns entities with the same name as ones defined in `transfer.xml`, effectively merging the "new" unpacked entities with existing ones. +--figure unpacks and updates Figures, if your pack contains those. Note that +there's no guaranteed behavior for images referenced on Figures that were not +included in a pack. You can just have an image missing, a completely unrelated +image, a permission error. Use at your own risk! + --metadata allows you to specify which transfer metadata will be used from `transfer.xml` as MapAnnotation values to the images. Fields that do not exist on `transfer.xml` will be ignored. Default is `all` (equivalent to @@ -190,6 +198,10 @@ def _configure(self, parser): pack.add_argument( "--zip", help="Pack into a zip file rather than a tarball", action="store_true") + pack.add_argument( + "--figure", help="Include OMERO.Figures into the pack" + " (caveats apply)", + action="store_true") pack.add_argument( "--barchive", help="Pack into a file compliant with Bioimage" " Archive submission standards", @@ -218,6 +230,10 @@ def _configure(self, parser): unpack.add_argument( "--merge", help="Use existing entities if possible", action="store_true") + unpack.add_argument( + "--figure", help="Use OMERO.Figures if present" + " (caveats apply)", + action="store_true") unpack.add_argument( "--folder", help="Pass path to a folder rather than a pack", action="store_true") @@ -415,8 +431,8 @@ def __pack(self, args): ome, path_id_dict = populate_xml(src_datatype, src_dataid, md_fp, self.gateway, self.hostname, args.barchive, args.simple, + args.figure, self.metadata) - print("Starting file copy...") self._copy_files(path_id_dict, folder, self.gateway) if args.simple: @@ -461,7 +477,7 @@ def __unpack(self, args): img_map = self._make_image_map(src_img_map, dest_img_map, self.gateway) print("Creating and linking OMERO objects...") populate_omero(ome, img_map, self.gateway, - hash, folder, self.metadata, args.merge) + hash, folder, self.metadata, args.merge, args.figure) return def _load_from_pack(self, filepath: str, output: Optional[str] = None diff --git a/test/integration/test_figure.py b/test/integration/test_figure.py new file mode 100644 index 0000000..98df47c --- /dev/null +++ b/test/integration/test_figure.py @@ -0,0 +1,325 @@ +# Copyright (C) 2023 The Jackson Laboratory +# All rights reserved. +# +# Use is subject to license terms supplied in LICENSE. + +from omero_cli_transfer import TransferControl +from cli import CLITest +from omero.gateway import BlitzGateway + +# import ezomero +import pytest +# import os +# import tarfile +import json +from pathlib import Path + +SUPPORTED = [ + "idonly", "imageid", "datasetid", "projectid"] + + +class TestFigure(CLITest): + + def setup_method(self, method): + super(TestFigure, self).setup_method(method) + self.cli.register("transfer", TransferControl, "TEST") + self.args += ["transfer"] + self.idonly = "-1" + self.imageid = "Image:-1" + self.datasetid = "Dataset:-1" + self.projectid = "Project:-1" + self.plateid = "Project:-1" + self.screenid = "Project:-1" + self.gw = BlitzGateway(client_obj=self.client) + + def create_image(self, sizec=4, sizez=1, sizet=1, target_name=None): + images = self.import_fake_file( + images_count=2, sizeZ=sizez, sizeT=sizet, sizeC=sizec, + client=self.client) + images.append(self.create_test_image(100, 100, 1, 1, 1, + self.client.getSession())) + self.imageid = "Image:%s" % images[0].id.val + self.source = "Image:%s" % images[1].id.val + for image in images: + img = self.gw.getObject("Image", image.id.val) + img.getThumbnail(size=(96,), direct=False) + if target_name == "datasetid" or target_name == "projectid" or\ + target_name == "idonly": + # Create Project/Dataset hierarchy + project = self.make_project(client=self.client) + self.project = self.gw.getObject("Project", project.id.val) + dataset = self.make_dataset(client=self.client) + self.dataset = self.gw.getObject("Dataset", dataset.id.val) + self.projectid = "Project:%s" % self.project.id + self.datasetid = "Dataset:%s" % self.dataset.id + self.idonly = "%s" % self.project.id + self.link(obj1=project, obj2=dataset) + for i in images: + self.link(obj1=dataset, obj2=i) + + def delete_all(self): + pjs = self.gw.getObjects("Project") + for p in pjs: + pj_id = p.id + print(f"deleting project {pj_id}") + self.gw.deleteObjects("Project", [pj_id], deleteAnns=True, + deleteChildren=True, wait=True) + ds = self.gw.getObjects("Dataset") + for d in ds: + ds_id = d.id + print(f"deleting dataset {ds_id}") + self.gw.deleteObjects("Dataset", [ds_id], deleteAnns=True, + deleteChildren=True, wait=True) + scs = self.gw.getObjects("Screen") + for sc in scs: + sc_id = sc.id + print(f"deleting screen {sc_id}") + self.gw.deleteObjects("Screen", [sc_id], deleteAnns=True, + deleteChildren=True, wait=True) + pls = self.gw.getObjects("Plate") + for pl in pls: + pl_id = pl.id + print(f"deleting plate {pl_id}") + self.gw.deleteObjects("Plate", [pl_id], deleteAnns=True, + deleteChildren=True, wait=True) + ims = self.gw.getObjects("Image") + im_ids = [] + for im in ims: + im_ids.append(im.id) + print(f"deleting image {im.id}") + if im_ids: + self.gw.deleteObjects("Image", im_ids, deleteAnns=True, + deleteChildren=True, wait=True) + fas = self.gw.getObjects("FileAnnotation") + fa_ids = [] + for fa in fas: + fa_ids.append(fa.id) + print(f"deleting file annotation {fa.id}") + if fa_ids: + self.gw.deleteObjects("FileAnnotation", fa_ids, + deleteChildren=True, wait=True) + + def get_panel_json(self, image, index, page_x): + """Create a panel.""" + channel = {'emissionWave': "400", + 'label': "DAPI", + 'color': "0000FF", + 'inverted': False, + 'active': True, + 'window': {'min': 0, + 'max': 255, + 'start': 0, + 'end': 255}, + } + img = self.gw.getObject("Image", image) + pix = img.getPrimaryPixels() + size_x = pix.getSizeX() + size_y = pix.getSizeY() + # shapes coordinates are Image coordinates + # Red Line diagonal from corner to corner + # Arrow from other corner to centre + shapes = [ + {"type": "Rectangle", "x": size_x/4, "y": size_y/4, + "width": size_x/2, "height": size_y/2, + "strokeWidth": 4, "strokeColor": "#FFFFFF"}, + {"type": "Line", "x1": 0, "x2": size_x, "y1": 0, + "y2": size_y, "strokeWidth": 5, "strokeColor": "#FF0000"}, + {"type": "Arrow", "x1": 0, "x2": size_x/2, "y1": size_y, + "y2": size_y/2, "strokeWidth": 10, "strokeColor": "#FFFF00"}, + {"type": "Ellipse", "x": size_x/2, "y": size_y/2, + "radiusX": size_x/3, "radiusY": size_y/2, "rotation": 45, + "strokeWidth": 10, "strokeColor": "#00FF00"}] + + # This works if we have Units support (OMERO 5.1) + px = pix.getPhysicalSizeX() + py = pix.getPhysicalSizeY() + pz = pix.getPhysicalSizeZ() + img_json = { + "imageId": image, + "name": "test_image", # image.getName().getValue() + "width": 100 * (index + 1), + "height": 100 * (index + 1), + "sizeZ": pix.getSizeZ(), + "theZ": 0, + "sizeT": pix.getSizeT(), + "theT": 0, + # rdef -> used at panel creation then deleted + "channels": [channel], + "orig_width": size_x, + "orig_height": size_y, + "x": page_x, + "y": index * 200, + 'datasetName': "TestDataset", + 'datasetId': 123, + 'pixel_size_x': None if px is None else px.getValue(), + 'pixel_size_y': None if py is None else py.getValue(), + 'pixel_size_z': None if pz is None else pz.getValue(), + 'pixel_size_x_symbol': '\xB5m' if px is None else px.getSymbol(), + 'pixel_size_z_symbol': None if pz is None else pz.getSymbol(), + 'pixel_size_x_unit': None if px is None else str(px.getUnit()), + 'pixel_size_z_unit': None if pz is None else str(pz.getUnit()), + 'deltaT': [], + "zoom": 100 + (index * 50), + "dx": 0, + "dy": 0, + "rotation": 100 * index, + "rotation_symbol": '\xB0', + "max_export_dpi": 1000, + "shapes": shapes, + } + return img_json + + def create_figure(self, image_ids): + """Create JSON to export figure.""" + figure_json = {"version": 2, + "paper_width": 595, + "paper_height": 842, + "page_size": "A4", + } + panels = [] + idx = 0 + for image in image_ids: + panels.append(self.get_panel_json(image, 0, 50 + (idx * 300))) + panels.append(self.get_panel_json(image, 1, 50 + (idx * 300))) + idx += 1 + figure_json['panels'] = panels + json_string = json.dumps(figure_json) + return json_string + + @pytest.mark.parametrize('target_name', sorted(SUPPORTED)) + def test_pack_unpack_figure(self, target_name, tmpdir): + + # basic case + self.create_image(target_name=target_name) + clear_img_id = int(self.imageid.split(":")[-1]) + jsonstr = self.create_figure([clear_img_id]) + print(jsonstr) + with open(Path(tmpdir)/"figure.json", 'w') as f: + f.write(jsonstr) + # ezomero cannot create orphaned FileAnnotations... + namespace = "omero.web.figure.json" + self.gw.createFileAnnfromLocalFile( + str(Path(tmpdir)/"figure.json"), + mimetype="application/json", + ns=namespace, desc=None) + target = getattr(self, target_name) + args = self.args + ["pack", target, '--figure', + str(tmpdir / 'test.tar')] + self.cli.invoke(args, strict=True) + self.delete_all() + args = self.args + ["unpack", '--figure', + str(tmpdir / 'test.tar')] + self.cli.invoke(args, strict=True) + fas = self.gw.getObjects("FileAnnotation") + count = 0 + for f in fas: + count += 1 + assert count == 1 + self.delete_all() + + # figure w/multi-images, one in, one out + self.create_image(target_name=target_name) + clear_img_id = int(self.imageid.split(":")[-1]) + other_img = int(self.create_test_image(100, 100, 1, 1, 1, + self.client.getSession()).id.val) + print(clear_img_id, other_img) + jsonstr = self.create_figure([clear_img_id, other_img]) + print(jsonstr) + with open(Path(tmpdir)/"figure.json", 'w') as f: + f.write(jsonstr) + # ezomero cannot create orphaned FileAnnotations... + namespace = "omero.web.figure.json" + self.gw.createFileAnnfromLocalFile( + str(Path(tmpdir)/"figure.json"), + mimetype="application/json", + ns=namespace, desc=None) + target = getattr(self, target_name) + args = self.args + ["pack", target, '--figure', + str(tmpdir / 'test.tar')] + self.cli.invoke(args, strict=True) + self.delete_all() + args = self.args + ["unpack", '--figure', + str(tmpdir / 'test.tar')] + self.cli.invoke(args, strict=True) + fas = self.gw.getObjects("FileAnnotation") + count = 0 + for f in fas: + count += 1 + assert count == 1 + self.delete_all() + + # figure w/multi-images, two in, one out + self.create_image(target_name=target_name) + clear_img_id = int(self.imageid.split(":")[-1]) + clear_src_id = int(self.source.split(":")[-1]) + other_img = int(self.create_test_image(100, 100, 1, 1, 1, + self.client.getSession()).id.val) + print(clear_img_id, other_img) + jsonstr = self.create_figure([clear_img_id, other_img, clear_src_id]) + print(jsonstr) + with open(Path(tmpdir)/"figure.json", 'w') as f: + f.write(jsonstr) + namespace = "omero.web.figure.json" + self.gw.createFileAnnfromLocalFile( + str(Path(tmpdir)/"figure.json"), + mimetype="application/json", + ns=namespace, desc=None) + # create another figure with source only + jsonstr = self.create_figure([other_img, clear_src_id]) + print(jsonstr) + with open(Path(tmpdir)/"figure.json", 'w') as f: + f.write(jsonstr) + # ezomero cannot create orphaned FileAnnotations... + namespace = "omero.web.figure.json" + self.gw.createFileAnnfromLocalFile( + str(Path(tmpdir)/"figure.json"), + mimetype="application/json", + ns=namespace, desc=None) + target = getattr(self, target_name) + args = self.args + ["pack", target, '--figure', + str(tmpdir / 'test.tar')] + self.cli.invoke(args, strict=True) + self.delete_all() + args = self.args + ["unpack", '--figure', + str(tmpdir / 'test.tar')] + self.cli.invoke(args, strict=True) + fas = self.gw.getObjects("FileAnnotation") + count = 0 + for f in fas: + count += 1 + if target == "imageid": + assert count == 1 + else: + assert count == 2 + self.delete_all() + + # figure without relevant images + self.create_image(target_name=target_name) + other_img1 = int(self.create_test_image(100, 100, 1, 1, 1, + self.client.getSession()).id.val) + other_img2 = int(self.create_test_image(100, 100, 1, 1, 1, + self.client.getSession()).id.val) + print(clear_img_id, other_img) + jsonstr = self.create_figure([other_img1, other_img2]) + print(jsonstr) + with open(Path(tmpdir)/"figure.json", 'w') as f: + f.write(jsonstr) + namespace = "omero.web.figure.json" + self.gw.createFileAnnfromLocalFile( + str(Path(tmpdir)/"figure.json"), + mimetype="application/json", + ns=namespace, desc=None) + target = getattr(self, target_name) + args = self.args + ["pack", target, '--figure', + str(tmpdir / 'test.tar')] + self.cli.invoke(args, strict=True) + self.delete_all() + args = self.args + ["unpack", '--figure', + str(tmpdir / 'test.tar')] + self.cli.invoke(args, strict=True) + fas = self.gw.getObjects("FileAnnotation") + count = 0 + for f in fas: + count += 1 + assert count == 0 + self.delete_all()