From 3893ef7abbe4bb3b91392d460d5c6c04536de36e Mon Sep 17 00:00:00 2001
From: lauraporta <ucqflpo@ucl.ac.uk>
Date: Mon, 11 Nov 2024 19:16:30 +0000
Subject: [PATCH] Add first group of docstrings

---
 calcium_imaging_automation/core/reader.py | 94 ++++++++++++++++++++++-
 examples/example_usage.py                 |  4 +-
 2 files changed, 93 insertions(+), 5 deletions(-)

diff --git a/calcium_imaging_automation/core/reader.py b/calcium_imaging_automation/core/reader.py
index e600844..17b5e72 100644
--- a/calcium_imaging_automation/core/reader.py
+++ b/calcium_imaging_automation/core/reader.py
@@ -2,13 +2,33 @@
 from typing import List
 
 
-class ReadAllPathsInFolder:
+class ReadAquiredData:
     def __init__(
         self,
         raw_data_folder: Path,
         folder_read_pattern: str,
         file_read_pattern: List[str],
     ):
+        """
+        Class to handle filepaths and dataset names in the raw data folder.
+        It can load folders and files based on the provided patterns, allowing
+        flexibility in the data structure of origin.
+        It also provides the maximum number of sessions for each dataset based
+        on the total number of files found in the dataset folders, by default
+        it searches for tif files.
+
+        Parameters
+        ----------
+        raw_data_folder : Path
+            The path to the raw data folder.
+        folder_read_pattern : str
+            The pattern to search for folders in the raw data folder. It
+            corresponds to the naming convention of the datasets.
+        file_read_pattern : List[str]
+            The patterns to search for files in the dataset folders. It
+            corresponds to the naming convention of the files in the dataset
+            folders.
+        """
         self.folder_read_pattern = folder_read_pattern
         self.file_read_pattern = file_read_pattern
 
@@ -18,14 +38,62 @@ def __init__(
         ]
 
     def get_folders_first_layer(self, file_path: Path) -> List[Path]:
+        """
+        Get the first layer of folders in the raw data folder. The rest
+        of the class assumes that the first layer of folders corresponds
+        to the dataset folders.
+
+        Parameters
+        ----------
+        file_path : Path
+            The path to the raw data folder.
+
+        Returns
+        -------
+        List[Path]
+            The list of paths to the dataset folders.
+        """
         return list(file_path.glob(self.folder_read_pattern))
 
     def get_files_paths_by_format(
         self, folder: Path, filetype="tif"
     ) -> List[Path]:
+        """
+        Get the paths to the files in the dataset folders based on the
+        provided file type. By default, it searches for tif files.
+
+        Parameters
+        ----------
+        folder : Path
+            The path to the dataset folder.
+        filetype : str, optional
+            The file type to search for in the dataset folder, by default
+            "tif".
+
+        Returns
+        -------
+        List[Path]
+            The list of paths to the files in the dataset folder.
+        """
         return list(folder.rglob(filetype))
 
-    def total_objects_by_format(self, folder: Path) -> dict:
+    def total_objects_by_extension(self, folder: Path) -> dict:
+        """
+        Get the total number of files in the dataset folder based on the
+        extensions included in the file_read_pattern.
+
+        Parameters
+        ----------
+        folder : Path
+            The path to the dataset folder.
+
+        Returns
+        -------
+        dict
+            The dictionary with the number of files for each extension in the
+            patterns found in file_read_pattern.
+        """
+
         return {
             filetype.split(".")[-1]: len(
                 self.get_files_paths_by_format(folder, filetype)
@@ -34,8 +102,28 @@ def total_objects_by_format(self, folder: Path) -> dict:
         }
 
     def max_session_number(self, filetype="tif", max_allowed=5) -> int:
+        """
+        Get the maximum number of sessions for each dataset based on the total
+        number of files found in the dataset folders. By default, it searches
+        for tif files and allows a maximum of 5 sessions. It assumes that every
+        tif file corresponds to an experimental session.
+
+        Parameters
+        ----------
+        filetype : str, optional
+            The file type to search for in the dataset folder, by default
+            "tif".
+        max_allowed : int, optional
+            The maximum number of sessions allowed, by default 5.
+
+        Returns
+        -------
+        int
+            The maximum number of sessions for each dataset.
+        """
+
         total_tif_number = [
-            self.total_objects_by_format(dataset_path).get(filetype, 0)
+            self.total_objects_by_extension(dataset_path).get(filetype, 0)
             for dataset_path in self.datasets_paths
         ]
 
diff --git a/examples/example_usage.py b/examples/example_usage.py
index 3789fa9..fada0f3 100644
--- a/examples/example_usage.py
+++ b/examples/example_usage.py
@@ -7,7 +7,7 @@
 import mlflow
 import numpy as np
 
-from calcium_imaging_automation.core.reader import ReadAllPathsInFolder
+from calcium_imaging_automation.core.reader import ReadAquiredData
 from calcium_imaging_automation.core.writer import DatashuttleWrapper
 
 
@@ -35,7 +35,7 @@ def main(
     mlflow.set_experiment("calcium_imaging_pipeline")
 
     # --- Read folders and files ---
-    reader = ReadAllPathsInFolder(
+    reader = ReadAquiredData(
         raw_data_path,
         folder_read_pattern,
         file_read_pattern,