From 3893ef7abbe4bb3b91392d460d5c6c04536de36e Mon Sep 17 00:00:00 2001 From: lauraporta Date: Mon, 11 Nov 2024 19:16:30 +0000 Subject: [PATCH] Add first group of docstrings --- calcium_imaging_automation/core/reader.py | 94 ++++++++++++++++++++++- examples/example_usage.py | 4 +- 2 files changed, 93 insertions(+), 5 deletions(-) diff --git a/calcium_imaging_automation/core/reader.py b/calcium_imaging_automation/core/reader.py index e600844..17b5e72 100644 --- a/calcium_imaging_automation/core/reader.py +++ b/calcium_imaging_automation/core/reader.py @@ -2,13 +2,33 @@ from typing import List -class ReadAllPathsInFolder: +class ReadAquiredData: def __init__( self, raw_data_folder: Path, folder_read_pattern: str, file_read_pattern: List[str], ): + """ + Class to handle filepaths and dataset names in the raw data folder. + It can load folders and files based on the provided patterns, allowing + flexibility in the data structure of origin. + It also provides the maximum number of sessions for each dataset based + on the total number of files found in the dataset folders, by default + it searches for tif files. + + Parameters + ---------- + raw_data_folder : Path + The path to the raw data folder. + folder_read_pattern : str + The pattern to search for folders in the raw data folder. It + corresponds to the naming convention of the datasets. + file_read_pattern : List[str] + The patterns to search for files in the dataset folders. It + corresponds to the naming convention of the files in the dataset + folders. + """ self.folder_read_pattern = folder_read_pattern self.file_read_pattern = file_read_pattern @@ -18,14 +38,62 @@ def __init__( ] def get_folders_first_layer(self, file_path: Path) -> List[Path]: + """ + Get the first layer of folders in the raw data folder. The rest + of the class assumes that the first layer of folders corresponds + to the dataset folders. + + Parameters + ---------- + file_path : Path + The path to the raw data folder. + + Returns + ------- + List[Path] + The list of paths to the dataset folders. + """ return list(file_path.glob(self.folder_read_pattern)) def get_files_paths_by_format( self, folder: Path, filetype="tif" ) -> List[Path]: + """ + Get the paths to the files in the dataset folders based on the + provided file type. By default, it searches for tif files. + + Parameters + ---------- + folder : Path + The path to the dataset folder. + filetype : str, optional + The file type to search for in the dataset folder, by default + "tif". + + Returns + ------- + List[Path] + The list of paths to the files in the dataset folder. + """ return list(folder.rglob(filetype)) - def total_objects_by_format(self, folder: Path) -> dict: + def total_objects_by_extension(self, folder: Path) -> dict: + """ + Get the total number of files in the dataset folder based on the + extensions included in the file_read_pattern. + + Parameters + ---------- + folder : Path + The path to the dataset folder. + + Returns + ------- + dict + The dictionary with the number of files for each extension in the + patterns found in file_read_pattern. + """ + return { filetype.split(".")[-1]: len( self.get_files_paths_by_format(folder, filetype) @@ -34,8 +102,28 @@ def total_objects_by_format(self, folder: Path) -> dict: } def max_session_number(self, filetype="tif", max_allowed=5) -> int: + """ + Get the maximum number of sessions for each dataset based on the total + number of files found in the dataset folders. By default, it searches + for tif files and allows a maximum of 5 sessions. It assumes that every + tif file corresponds to an experimental session. + + Parameters + ---------- + filetype : str, optional + The file type to search for in the dataset folder, by default + "tif". + max_allowed : int, optional + The maximum number of sessions allowed, by default 5. + + Returns + ------- + int + The maximum number of sessions for each dataset. + """ + total_tif_number = [ - self.total_objects_by_format(dataset_path).get(filetype, 0) + self.total_objects_by_extension(dataset_path).get(filetype, 0) for dataset_path in self.datasets_paths ] diff --git a/examples/example_usage.py b/examples/example_usage.py index 3789fa9..fada0f3 100644 --- a/examples/example_usage.py +++ b/examples/example_usage.py @@ -7,7 +7,7 @@ import mlflow import numpy as np -from calcium_imaging_automation.core.reader import ReadAllPathsInFolder +from calcium_imaging_automation.core.reader import ReadAquiredData from calcium_imaging_automation.core.writer import DatashuttleWrapper @@ -35,7 +35,7 @@ def main( mlflow.set_experiment("calcium_imaging_pipeline") # --- Read folders and files --- - reader = ReadAllPathsInFolder( + reader = ReadAquiredData( raw_data_path, folder_read_pattern, file_read_pattern,