DOC: Add detailed docstrings for model architect and identification f…

…unctions, enhancing documentation clarity
acsenrafilho · Jan 10, 2025 · 0129375 · 0129375
1 parent 920036d
commit 0129375
Show file tree

Hide file tree

Showing 8 changed files with 115 additions and 6 deletions.
diff --git a/cucaracha/ml_models/model_architect.py b/cucaracha/ml_models/model_architect.py
@@ -4,6 +4,16 @@
 
 
 class ModelArchitect(ABC):
+    """
+    Abstract base class for defining model architectures.
+    Attributes:
+        modality (str): The modality of the model architecture. Expected values are defined in VALID_MODALITIES.
+    Methods:
+        get_model():
+            Abstract method to be implemented by subclasses to return the model architecture.
+        __str__():
+            Returns a string representation of the model architecture, including its modality.
+    """
     def __init__(self, **kwargs):
         self.modality = kwargs.get('modality', None)
         # valid_modalities = ['image_classification', 'image_keypoint_detection', 'image_object_detection']

diff --git a/cucaracha/tasks/__init__.py b/cucaracha/tasks/__init__.py
@@ -14,6 +14,44 @@
 def call_cucacha_image_task(
     input: np.ndarray, doc_preset: str = 'cnh_cpf_rg', auto_fit: bool = True
 ):
+    """
+    Processes an input image using a pre-trained model specified by the document preset.
+
+    Examples:
+        >>> import numpy as np
+        >>> from cucaracha.tasks import call_cucacha_image_task
+        >>> input_image = np.random.rand(224, 224, 3)  # Example input image
+        >>> label, extra = call_cucacha_image_task(input_image, doc_preset='cnh_cpf_rg', auto_fit=True)
+        >>> isinstance(label, str)
+        True
+        >>> isinstance(extra, dict)
+        True
+
+    Note:
+        This method is directly oriented to image classification tasks. To see
+        what the presets availble to be used in this method, check the 
+        `cucaracha.ml_models.CUCARACHA_PRESETS` variable and the list of 
+        `image_classification` keys.
+
+    Info:
+        For the `auto_fit` option, If the input image is not consistent to the
+        ML model input shape, then the method will fit it before prediction. 
+        If the user does not want this behavior, e.g. one may want to already
+        provide an input data with the correct shape, then the user should set
+        `auto_fit` to `False`. 
+
+    Args:
+        input (np.ndarray): The image to be used in the ML model.
+        doc_preset (str, optional): Cucaracha preset to be used. Defaults to 'cnh_cpf_rg'.
+        auto_fit (bool, optional): Fits the input shape to ML model needs. Defaults to True.
+
+    Raises:
+        FileNotFoundError: If the preset is not located in the cucaracha models
+        ValueError: Input shape does not match the model input shape. Only raised when `auto_fit` is False.
+
+    Returns:
+        tuple: The predicted label and extra information.
+    """
     _check_input(input)
     _check_doc_preset(doc_preset)
 

diff --git a/cucaracha/tasks/identification.py b/cucaracha/tasks/identification.py
@@ -4,8 +4,67 @@
 
 
 def identify_personal_document(input: np.array, auto_fit: bool = True):
+    """
+    Identify the personal document type from an image, seting the document 
+    type based on the Brazilian personal documents such as CNH, CPF and RG.
+
+    If the document is not identified as CNH, CPF or RG, the method will return
+    the string 'others' to exemplify an unrecognized document type.
+
+    Note:
+        This method is not intended to be used for document verification, i.e.
+        it does not check if the document is valid or not, and also does not
+        collect any information from the document. It only identifies the type
+        as CNH, CPF or RG.
+
+    Note:
+        The method assumed that the input image is taken considering the 
+        majority of the image space of being as the document itself. Images 
+        with partial document or with a lot of noise may not be correctly 
+        identified.
+
+    Info:
+        For the `auto_fit` option, If the input image is not consistent to the
+        ML model input shape, then the method will fit it before prediction. 
+        If the user does not want this behavior, e.g. one may want to already
+        provide an input data with the correct shape, then the user should set
+        `auto_fit` to `False`. 
+
+    Args:
+        input (np.array): An image representing the personal document.
+        auto_fit (bool, optional): Fits the input shape to ML model needs. Defaults to True.
+    Returns:
+        tuple: The predicted document type and extra information.
+    """
     return call_cucacha_image_task(input, 'cnh_cpf_rg', auto_fit)
 
 
 def identify_document_is_signed(input: np.array, auto_fit: bool = True):
+    """
+    Identify if the document is signed or not from an image.
+
+    Note:
+        This method is not intended to be used for document verification, i.e.
+        it does not check if the document is valid or not, and also does not
+        collect any information from the document. It only verifies whether
+        the document presents a signature or not.
+
+    Note:
+        The method assumes that the signature is well seen in the image, i.e.
+        it should be easily identified by a human eye.
+
+    Info:
+        For the `auto_fit` option, If the input image is not consistent to the
+        ML model input shape, then the method will fit it before prediction. 
+        If the user does not want this behavior, e.g. one may want to already
+        provide an input data with the correct shape, then the user should set
+        `auto_fit` to `False`. 
+
+    Args:
+        input (np.array): An image representing the document with or without a signature.
+        auto_fit (bool, optional): Fits the input shape to ML model needs. Defaults to True.
+
+    Returns:
+        tuple: The predicted document type and extra information.
+    """
     return call_cucacha_image_task(input, 'doc_is_signed', auto_fit)
diff --git a/docs/api/aligment.md → docs/api/tasks/aligment.md b/docs/api/aligment.md → docs/api/tasks/aligment.md
@@ -4,4 +4,4 @@ All the methods has a pattern to be an image filter, i.e. all the method must ha
 
 Additional data can be provided, depending on the method, which can be seen in the dedicated documentation.
 
-::: aligment
+::: tasks.aligment
diff --git a/docs/api/tasks/identification.md b/docs/api/tasks/identification.md
@@ -0,0 +1 @@
+::: tasks.identification
diff --git a/docs/api/noise_removal.md → docs/api/tasks/noise_removal.md b/docs/api/noise_removal.md → docs/api/tasks/noise_removal.md
@@ -4,4 +4,4 @@ All the methods has a pattern to be an image filter, i.e. all the method must ha
 
 Additional data can be provided, depending on the method, which can be seen in the dedicated documentation.
 
-::: noise_removal
+::: tasks.noise_removal
diff --git a/docs/api/threshold.md → docs/api/tasks/threshold.md b/docs/api/threshold.md → docs/api/tasks/threshold.md
@@ -6,5 +6,5 @@ must have an image as input and offer at least one image as output.
 Additional data can be provided, depending on the method, which can be seen
 in the dedicated documentation.
 
-::: threshold
+::: tasks.threshold
 
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -44,9 +44,10 @@ nav:
   - 'faq.md'
   - 'getting_started.md'
   - 'api/document.md'
-  - 'api/threshold.md'
-  - 'api/aligment.md'
-  - 'api/noise_removal.md'
+  - 'api/tasks/identification.md'
+  - 'api/tasks/threshold.md'
+  - 'api/tasks/aligment.md'
+  - 'api/tasks/noise_removal.md'
   - 'api/ml_models/image_classification/img_class.md'
   - 'api/ml_models/image_segmentation/img_sem_seg.md'
   - 'api/ml_trainers/img_trainers.md'
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,4 +4,4 @@ All the methods has a pattern to be an image filter, i.e. all the method must ha

		Additional data can be provided, depending on the method, which can be seen in the dedicated documentation.

		::: aligment
		::: tasks.aligment