Dev (cansuyalcinn#20)

* dataset creation * segmentation examples * changes * writing initial class * initial draft finished * initial feat extraction class * add local color features * run classification experiment color features * final color test * add 1000 samples md * merging 1000 samples csv * patching preprocessing.py with cropping fov condition * Cansu feature extraction (cansuyalcinn#14) * feature extraction notebook is added * changes * changes * changes * changes * changes * changes * LBP pipeline * changes * added reqs * changes * added channel lbp Co-authored-by: alexCortinaU <[email protected]> * finish lbp and create glcm (cansuyalcinn#15) * create texture_features nb * extract texture features * texture features class * classification test with color and texture features * adding feature extraction nb * changes for efficient feature extraction * val set features and classification test * gabor testing * update three class csv * ready to run * no changes just update * classification test all binary data * feature importances * update preprocess class for lesion segmentation * Alex three class (cansuyalcinn#18) * three class classification test * using balanced SVM * testing SVC with balancing and SMOTE * first RF runs * Testing Boosting algorithms * ensemble * final ensemble methods * final test prediction * runs for presentation * inference time test * Cansu running tests (cansuyalcinn#19) * classifer test * comments * changes * svm update * smv update * feature selection methods * knn * rf tests * updates * LR and adaboost * updates * gradient boosting * xgboost * changes * best classifiers * changes * ensemble model is added * changes * changes Co-authored-by: Cansu Yalcin <[email protected]> * reformatting * adding report and readme * readme correction Co-authored-by: Cansu YALCIN <[email protected]> Co-authored-by: Cansu Yalcin <[email protected]>
alexCortinaU · Nov 7, 2022 · 0b28a43 · 0b28a43
1 parent bf32852
commit 0b28a43
Show file tree

Hide file tree

Showing 44 changed files with 39,899 additions and 24,081 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,7 @@
 data/
 database/
 .idea/
-examples/
+examples/
+dataset/__pycache__/
+pipeline/__pycache__/
+notebooks/.ipynb_checkpoints
diff --git a/Cad1_Skin_ Lesion_presentation.pdf b/Cad1_Skin_ Lesion_presentation.pdf
diff --git a/README.md b/README.md
@@ -1 +1,7 @@
-# Skin_lesion_segmentation
+# Skin_lesion_segmentation
+
+#### Skin lesion segmentation project by Cansu Yalcin and Alejandro Cortina, for Computer Aided Diagnosis subject at University of Girona
+
+To make an inference test, please refer to notebooks/pipeline_test.ipynb
+
+Please refer to Cad1_Skin_lesion_presentation.pdf for the report of the work done
diff --git a/__init__.py b/__init__.py
diff --git a/dataset/__init__.py b/dataset/__init__.py
diff --git a/dataset/dataset.py b/dataset/dataset.py
@@ -0,0 +1,169 @@
+from operator import index
+import os
+import sys; sys.path.insert(0, os.path.abspath("../"))
+
+import collections
+import cv2
+import numpy as np
+import pandas as pd
+from pathlib import Path
+from typing import List, Tuple
+import multiprocessing as mp
+thispath = Path(__file__).resolve()
+
+datapath = thispath.parent.parent / "data"
+md_df_path = thispath.parent.parent / "metadata"
+
+class Dataset():
+    def __init__(self):
+        pass
+
+    def totals(self):
+        counts = [
+            dict(
+                collections.Counter(items[~np.isnan(items)]).most_common()
+            ) for items in self.labels.T
+        ]
+        return dict(zip(self.lesion_types, counts))
+
+    def __repr__(self):
+        print.print(self.totals())
+        return self.string()
+
+    def check_paths_exist(self):
+        if not self.datapath_class.exists():
+            raise Exception(f"{self.datapath_class} could not be found")
+        if not self.metadata_path.exists():
+            raise Exception(f"{self.metadata_path} could not be found")
+
+
+class SkinLesion_Dataset(Dataset):
+
+    def __init__(
+        self, class_task: str = 'binary',
+        df_path: Path = md_df_path,
+        datapath_class: Path = datapath,
+        seed: int = 0,
+        partitions: List[str] = ['train', 'val'],
+        n_jobs: int = -1,
+        resize_image: bool = True,      
+    ):
+        """
+        Constructor of SkinLesion_Dataset class
+
+        Args:
+            class_task (str, optional): Classification task, 'binary' or 'three_class'.
+                Defaults to 'binary'.
+            df_path (Path, optional): Metadata dataframe path. Defaults to md_df_path.
+            datapath_class (Path, optional): data folder containing dataset images.
+                Defaults to datapath.
+            seed (int, optional): Seed to guarantee reproducibility. Defaults to 0.
+            partitions (List[str], optional): Selected sets. Defaults to ['train', 'val'].
+            n_jobs (int, optional): Number of processes to use in parallel operations.
+                Defaults to -1.
+            crop_fov (bool, optional): To crop FOV for images or not. Defaults to True
+        """
+        super(SkinLesion_Dataset, self).__init__()
+
+        self.class_task = class_task
+        self.partitions = partitions
+        self.resize_image = resize_image
+
+        # Set seed and number of cores to use
+        self.seed = seed
+        np.random.seed(self.seed)
+        self.n_jobs = mp.cpu_count() if n_jobs == -1 else n_jobs
+
+        self.datapath = datapath
+        self.datapath_class = datapath_class/class_task
+        self.metadata_path = df_path/ (class_task + '.csv')
+        self.df_path = df_path
+
+        # Load data
+        self.check_paths_exist()
+        self.md_df = pd.read_csv(self.metadata_path, index_col=0)
+
+        # Filter partition
+        self.filter_by_partition()
+        self.labels = self.md_df['label'].values
+
+        # Create segmentation examples df
+        self.seg_examples_df = pd.read_csv(df_path / "seg_examples.csv")
+
+    def filter_by_partition(self):
+        """
+        Tthis method is called to filter the images according to the predefined
+        partitions given with the original dataset
+        """
+        self.md_df = self.md_df.loc[self.md_df.split.isin(self.partitions), :]
+        self. md_df.reset_index(inplace=True, drop=True)
+
+    def __len__(self):
+        return len(self.labels)
+
+    def __getitem__(self, idx):
+        sample = {}
+        sample['idx'] = idx
+        sample['label'] = self.labels[idx]
+        sample['img_id'] = self.md_df['img_id'].iloc[idx]
+
+        # read and save the image
+        img_path = Path(self.md_df['path'].iloc[idx])
+        img = cv2.imread(str(img_path), cv2.IMREAD_COLOR)
+
+        sample['img'] = img
+
+        if self.resize_image:
+            height, width, ch = img.shape
+            img_resized = cv2.resize(img,(int(width/2),int(height/2)), interpolation=cv2.INTER_AREA)
+            sample['img'] = img_resized
+
+        return sample
+
+
+class SegExamples(SkinLesion_Dataset):
+    def __init__(self,
+        examples_type: List[str] = ['easy', 'medium', 'hard', 'vhard'],
+
+    ):
+        """
+        Sample easy, medium, hard and very hard examples from both tasks (binary, three class)
+
+        Args:
+            examples_type (List[str], optional): Type of examples. Defaults to ['easy', 'medium', 'hard', 'vhard'].
+        """
+
+        super(SegExamples, self).__init__()
+
+        self.examples_type = examples_type
+        self.seg_examples_path = str(self.datapath)
+        self.filter_by_type
+
+    def filter_by_type(self):
+        """
+        Tthis method is called to filter the images according to the predefined
+        partitions given with the original dataset
+        """
+        self.seg_examples_df = self.seg_examples_df.loc[self.seg_examples_df.split.isin(self.examples_type), :]
+        self. seg_examples_df.reset_index(inplace=True, drop=True)
+
+    def __getitem__(self, idx):
+        sample = {}
+        sample['idx'] = idx
+        img_path = self.seg_examples_df['path'].iloc[idx]
+        sample['type'] = self.seg_examples_df['type'].iloc[idx]
+        sample['problem'] = img_path.split('/')[0]
+        sample['label'] = img_path.split('/')[2]
+        img = cv2.imread(self.seg_examples_path+ '/' + img_path, cv2.IMREAD_COLOR )
+        sample['img'] = img
+
+        if self.resize_image:
+            sample['resized'] = True
+            height, width, ch = img.shape
+            img_resized = cv2.resize(img,(int(width/2),int(height/2)), interpolation=cv2.INTER_AREA)
+            sample['img'] = img_resized
+        return sample
+
+
+
+
diff --git a/metadata/__init__.py b/metadata/__init__.py