Skip to content

Commit

Permalink
added support for non-indexed acquisitions
Browse files Browse the repository at this point in the history
  • Loading branch information
vitalielias committed Jun 26, 2023
1 parent 1504088 commit 15d5ba2
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 10 deletions.
10 changes: 9 additions & 1 deletion acquisitionMapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,21 @@ def extract_values(addresses, data, dataset_num = 1):
for key, address in addresses.items():
levels = address.split('.')
current_data = data
# print(levels)
for level in levels:
# emxml contains multiple instances of "Dataset", so it returns a list when asked. We need to tell it
# which dataset we actually want. 1 is SEM Image, 2 is SEM Image 2, and 3 is the one we're not
# interested in. We subtract 1 because indexing begins at zero
# Still needed: check this against image folder name
# print(f"current level: {level}")
if level == 'Dataset':
current_data = current_data[level][dataset_num - 1]
if isinstance(current_data[level], list):
try:
current_data = current_data[level][dataset_num - 1]
except IndexError:
print(f"There is no dataset at index {dataset_num}.")
else:
current_data = current_data[level]
else:
current_data = current_data[level]
result[key] = current_data
Expand Down
36 changes: 27 additions & 9 deletions metaMapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,19 @@

def extract_zip_file(zip_file_path):
temp_dir = tempfile.mkdtemp()


start_time = time.time() # Start time
logging.info("Extracting {zip_file_path}...")

target_dir = None

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
total_items = len(zip_ref.namelist())

for index, file_name in enumerate(zip_ref.namelist(), start=1):
if index%10 == 0:
print(f"Extracting file {index}/{total_items}...")
file_path = os.path.join(temp_dir, file_name)
zip_ref.extract(file_name, temp_dir)

Expand All @@ -42,14 +46,24 @@ def extract_zip_file(zip_file_path):
inputZip = sys.argv[2]
outputFile = sys.argv[3]

def getExampleImage(directory):
for file in os.listdir(directory):
if file.endswith('.tif'):
return os.path.join(directory, file)

mainDir, tempDir = extract_zip_file(inputZip)
imgFile = os.path.join(mainDir, 'Images/SEM Image 2/SEM Image 2 - SliceImage - 001.tif') # uses the first image
imgFile = getExampleImage(os.path.join(mainDir, 'Images/SEM Image'))
imgDirectory = os.path.join(mainDir, 'Images')
xmlFile = os.path.join(mainDir, 'EMproject.emxml')

xmlMap, imgMap = extract_metadata_addresses(mapFile)
xmlMetadata = xml_to_dict(xmlFile)

# print('XML MAP:')
# print(xmlMap)
# print('xmlMetadata')
# print(xmlMetadata)

acqXmlMetadata = extract_values(xmlMap, xmlMetadata)

# Read an image for acquisition metadata
Expand All @@ -65,7 +79,11 @@ def extract_zip_file(zip_file_path):
# Read and format dataset metadata
datasetXmlMap, datasetImgMap = extract_metadata_addresses_dataset(mapFile)
datasets = xmlMetadata['EMProject']['Datasets']['Dataset']
datasetNames = [d['Name'] for d in datasets]
# print(f'len = {len(datasets)}, datasets: {datasets}')
if isinstance(datasets, list):
datasetNames = [d['Name'] for d in datasets]
else:
datasetNames = [datasets['Name']]
def processDatasets(datasetNum, imageDirectory):
# Extract xml data for this dataset
mappedEMMetadata = extract_values(datasetXmlMap, xmlMetadata, datasetNum)
Expand Down Expand Up @@ -198,17 +216,17 @@ def combineMetadata(acquisition_metadata, dataset_metadata, image_metadata):
metadata['acquisition']['dataset'][i]['images'].append(image_dict)
return metadata

def save_metadata_as_json(metadata, save_path):
with open(save_path, 'w') as file:
json.dump(metadata, file, indent=4)
logging.info(f"Metadata saved as {save_path}")

# For local tests
# def save_metadata_as_json(metadata, save_path):
# with open(os.path.join(save_path, 'output.json'), 'w') as file:
# with open(save_path, 'w') as file:
# json.dump(metadata, file, indent=4)
# logging.info(f"Metadata saved as {save_path}")

# For local tests
def save_metadata_as_json(metadata, save_path):
with open(os.path.join(save_path, 'output.json'), 'w') as file:
json.dump(metadata, file, indent=4)
logging.info(f"Metadata saved as {save_path}")

combinedMetadata = combineMetadata(acqMetadata, datasetMetadata, imageMetadata)
save_metadata_as_json(combinedMetadata, outputFile)
shutil.rmtree(tempDir)

0 comments on commit 15d5ba2

Please sign in to comment.