Skip to content

Commit

Permalink
Added find_files() method
Browse files Browse the repository at this point in the history
Added find_files() method
Bumped version number to 0.3.0
  • Loading branch information
AlexHenderson committed Aug 11, 2023
1 parent 4f4bfe4 commit defb395
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 11 deletions.
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ authors:
affiliation: University of Manchester, UK
email: [email protected]
website: https://alexhenderson.info
version: 0.2.0
date-released: "2023-06-11"
version: 0.3.0
date-released: "2023-08-11"
license: MIT
license-url: https://spdx.org/licenses/MIT#licenseText
repository: "https://github.com/AlexHenderson/agilent-ir-formats"
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Agilent File Format Handling for Infrared Spectroscopy
Author: Alex Henderson <[[email protected]]([email protected])>
Version: 0.2.0
Version: 0.3.0
Copyright: (c) 2018-2023 Alex Henderson

## About ##
Expand Down Expand Up @@ -36,6 +36,7 @@ Methods:
Static methods:
filetype() string identifying the type of files this class reads.
filefilter() string identifying the Windows file extensions for files this class can read.
find_files() list of all readable files in a directory structure.
isreadable() whether this class is capable of reading a given file.
version() the version number of this code.
```
Expand Down
86 changes: 78 additions & 8 deletions src/agilentirformats/agilent_ir_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"""

__version__ = "0.2.0"
__version__ = "0.3.0"

from datetime import datetime
from enum import Enum
Expand Down Expand Up @@ -48,6 +48,7 @@ class AgilentIRFile:
Static methods:
filetype() string identifying the type of files this class reads.
filefilter() string identifying the Windows file extensions for files this class can read.
find_files() list of all readable files in a directory structure.
isreadable() whether this class is capable of reading a given file.
version() the version number of this code.
Expand All @@ -57,9 +58,8 @@ class TileOrMosaic(Enum):
"""Nested Enumeration class to hold flags for the types of data the outer class can cope with.
"""

TILE = 1 # : Single tile images from a focal plane array experiment.
MOSAIC = 2 # : Multiple single tile images arranged into a mosaic.
TILE = 1 # Single tile images from a focal plane array experiment.
MOSAIC = 2 # Multiple single tile images arranged into a mosaic.

@staticmethod
def filetype() -> str:
Expand All @@ -83,6 +83,54 @@ def filefilter() -> str:
"""
return "*.dmt;*.seq"

@staticmethod
def find_files(search_location: str | Path = ".", recursive: bool = True) -> list[str]:
"""Return a list of Agilent IR image files in a search location.
`search_location` is searched for *.dmt and *.seq files.
If `recursive is `True` (default) all paths below the `search_location` will be searched. Otherwise, only the
`search_location` directory will be searched.
Discovered files are checked to see if they are readable, and discarded if not.
:param search_location: Directory to act as starting point for tree search.
:type search_location: str or :class:`pathlib.Path`, optional
:param recursive: Whether a recursive search is required.
:type recursive: bool, optional
:return: list of discovered files.
:rtype: list[str]
:raises RuntimeError: Raised if the `search_location` is not a directory.
"""

search_location = Path(search_location)
if not search_location.is_dir():
raise RuntimeError("search_location should be a directory.")

if recursive:
mosaicdmtfiles = list(search_location.rglob("*.dmt"))
else:
mosaicdmtfiles = list(search_location.glob("*.dmt"))
mosaicdmtfiles = list(map(Path.resolve, mosaicdmtfiles))
mosaicdmtfiles = list(map(Path.as_posix, mosaicdmtfiles))

if recursive:
singletileseqfiles = list(search_location.rglob("*.seq"))
else:
singletileseqfiles = list(search_location.glob("*.seq"))
singletileseqfiles = list(map(Path.resolve, singletileseqfiles))
singletileseqfiles = list(map(Path.as_posix, singletileseqfiles))

foundfiles = list()
for file in mosaicdmtfiles:
if AgilentIRFile.isreadable(file):
foundfiles.append(file)
for file in singletileseqfiles:
if AgilentIRFile.isreadable(file):
foundfiles.append(file)

return foundfiles

@staticmethod
def isreadable(filename: str | Path = None) -> bool:
"""Determine whether this class is capable of reading a given file.
Expand All @@ -99,6 +147,28 @@ def isreadable(filename: str | Path = None) -> bool:
if filename.suffix.lower() not in [".dmt", ".seq"]:
return False

# Look inside the file
# If we have a mosaic, the .dmt file will have the words "Mosaic Tiles X" inside
if filename.suffix.lower() == ".dmt":
file_contents = filename.read_bytes()
regex = re.compile(b"Mosaic Tiles X")
matches = re.search(regex, file_contents)
if not matches:
return False

# See if there is a .bsp file with the same name as the .seq file.
# If so, does it contain the words "Phase Apodization"
if filename.suffix.lower() == ".seq":
bspfilename = filename.with_suffix(".bsp")
if not bspfilename.is_file():
return False
else:
file_contents = bspfilename.read_bytes()
regex = re.compile(b"Phase Apodization")
matches = re.search(regex, file_contents)
if not matches:
return False

# Passed all available tests so suggest we can read this file
return True

Expand Down Expand Up @@ -430,11 +500,11 @@ def _generate_hdf5_metadata(self) -> dict[str, any]:
'/metadata/plotting/xlabel': label suitable for the x-axis of a spectral plot.
'/metadata/plotting/ylabelname': physical quantity of the spectral y-axis dimension.
'/metadata/plotting/ylabelunit': unit of the spectral y-axis dimension.
'/metadata/plotting/ylabel': = label suitable for the y-axis of a spectral plot.
'/metadata/plotting/ylabel': label suitable for the y-axis of a spectral plot.
'/metadata/plotting/plot_high2low': whether it is appropriate to plot the x-axis from high to low.
'/metadata/experiment/first_xvalue': lowest wavenumber.
'/metadata/experiment/last_xvalue': = highest wavenumber.
'/metadata/experiment/last_xvalue': highest wavenumber.
'/metadata/semantics/technique/accuracy_of_term': how accurate are the semantics of this section.
'/metadata/semantics/technique/term': ontological name of this analysis technique.
Expand Down Expand Up @@ -494,7 +564,7 @@ def _generate_hdf5_metadata(self) -> dict[str, any]:
def export_hdf5(self, filename: str | Path = None):
"""Write a version of the file to disc in HDF5 format.
If `filename` is `None`, the source file's name is used, swapping the .dmt extension with .h5.
If `filename` is `None`, the source file's name is used, swapping the .dmt/.seq extension with .h5.
The data is both chunked and compressed. The total intensity spectrum and total intensity image
(where appropriate) are also exported. A range of associated metadata is also included.
Expand Down Expand Up @@ -622,7 +692,7 @@ def metadata(self) -> dict:
'lastwavenumber': highest wavenumber recorded.
'xlabel': a label that can be used for the x-axis of a spectral plot ('wavenumbers (cm-1)').
'ylabel': a label that can be used for the y-axis of a spectral plot ('absorbance').
'acqdatetime': date and time of data acqusition in ISO 8601 format (YYYY-MM-DDTHH:mm:ss)
'acqdatetime': date and time of data acquisition in ISO 8601 format (YYYY-MM-DDTHH:mm:ss)
:return: A dict of parameters extracted from the file.
:rtype: dict
Expand Down

0 comments on commit defb395

Please sign in to comment.