From 15d5ba2907a3aab871f5eef8f5ca070f8735ca1a Mon Sep 17 00:00:00 2001 From: Elias Vitali Date: Mon, 26 Jun 2023 12:59:28 +0200 Subject: [PATCH] added support for non-indexed acquisitions --- acquisitionMapper.py | 10 +++++++++- metaMapper.py | 36 +++++++++++++++++++++++++++--------- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/acquisitionMapper.py b/acquisitionMapper.py index c71b12f..386ed89 100644 --- a/acquisitionMapper.py +++ b/acquisitionMapper.py @@ -64,13 +64,21 @@ def extract_values(addresses, data, dataset_num = 1): for key, address in addresses.items(): levels = address.split('.') current_data = data + # print(levels) for level in levels: # emxml contains multiple instances of "Dataset", so it returns a list when asked. We need to tell it # which dataset we actually want. 1 is SEM Image, 2 is SEM Image 2, and 3 is the one we're not # interested in. We subtract 1 because indexing begins at zero # Still needed: check this against image folder name + # print(f"current level: {level}") if level == 'Dataset': - current_data = current_data[level][dataset_num - 1] + if isinstance(current_data[level], list): + try: + current_data = current_data[level][dataset_num - 1] + except IndexError: + print(f"There is no dataset at index {dataset_num}.") + else: + current_data = current_data[level] else: current_data = current_data[level] result[key] = current_data diff --git a/metaMapper.py b/metaMapper.py index 9b2e5f5..5e9436d 100644 --- a/metaMapper.py +++ b/metaMapper.py @@ -12,6 +12,7 @@ def extract_zip_file(zip_file_path): temp_dir = tempfile.mkdtemp() + start_time = time.time() # Start time logging.info("Extracting {zip_file_path}...") @@ -19,8 +20,11 @@ def extract_zip_file(zip_file_path): target_dir = None with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: + total_items = len(zip_ref.namelist()) for index, file_name in enumerate(zip_ref.namelist(), start=1): + if index%10 == 0: + print(f"Extracting file {index}/{total_items}...") file_path = os.path.join(temp_dir, file_name) zip_ref.extract(file_name, temp_dir) @@ -42,14 +46,24 @@ def extract_zip_file(zip_file_path): inputZip = sys.argv[2] outputFile = sys.argv[3] +def getExampleImage(directory): + for file in os.listdir(directory): + if file.endswith('.tif'): + return os.path.join(directory, file) + mainDir, tempDir = extract_zip_file(inputZip) -imgFile = os.path.join(mainDir, 'Images/SEM Image 2/SEM Image 2 - SliceImage - 001.tif') # uses the first image +imgFile = getExampleImage(os.path.join(mainDir, 'Images/SEM Image')) imgDirectory = os.path.join(mainDir, 'Images') xmlFile = os.path.join(mainDir, 'EMproject.emxml') xmlMap, imgMap = extract_metadata_addresses(mapFile) xmlMetadata = xml_to_dict(xmlFile) +# print('XML MAP:') +# print(xmlMap) +# print('xmlMetadata') +# print(xmlMetadata) + acqXmlMetadata = extract_values(xmlMap, xmlMetadata) # Read an image for acquisition metadata @@ -65,7 +79,11 @@ def extract_zip_file(zip_file_path): # Read and format dataset metadata datasetXmlMap, datasetImgMap = extract_metadata_addresses_dataset(mapFile) datasets = xmlMetadata['EMProject']['Datasets']['Dataset'] -datasetNames = [d['Name'] for d in datasets] +# print(f'len = {len(datasets)}, datasets: {datasets}') +if isinstance(datasets, list): + datasetNames = [d['Name'] for d in datasets] +else: + datasetNames = [datasets['Name']] def processDatasets(datasetNum, imageDirectory): # Extract xml data for this dataset mappedEMMetadata = extract_values(datasetXmlMap, xmlMetadata, datasetNum) @@ -198,17 +216,17 @@ def combineMetadata(acquisition_metadata, dataset_metadata, image_metadata): metadata['acquisition']['dataset'][i]['images'].append(image_dict) return metadata -def save_metadata_as_json(metadata, save_path): - with open(save_path, 'w') as file: - json.dump(metadata, file, indent=4) - logging.info(f"Metadata saved as {save_path}") - -# For local tests # def save_metadata_as_json(metadata, save_path): -# with open(os.path.join(save_path, 'output.json'), 'w') as file: +# with open(save_path, 'w') as file: # json.dump(metadata, file, indent=4) # logging.info(f"Metadata saved as {save_path}") +# For local tests +def save_metadata_as_json(metadata, save_path): + with open(os.path.join(save_path, 'output.json'), 'w') as file: + json.dump(metadata, file, indent=4) + logging.info(f"Metadata saved as {save_path}") + combinedMetadata = combineMetadata(acqMetadata, datasetMetadata, imageMetadata) save_metadata_as_json(combinedMetadata, outputFile) shutil.rmtree(tempDir)