Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
* dataset creation

* segmentation examples

* changes

* writing initial class

* initial draft finished

* initial feat extraction class

* add local color features

* run classification experiment color features

* final color test

* add 1000 samples md

* merging 1000 samples csv

* patching preprocessing.py with cropping fov condition

* Cansu feature extraction (cansuyalcinn#14)

* feature extraction notebook is added

* changes

* changes

* changes

* changes

* changes

* changes

* LBP pipeline

* changes

* added reqs

* changes

* added  channel lbp

Co-authored-by: alexCortinaU <[email protected]>

* finish lbp and create glcm (cansuyalcinn#15)

* create texture_features nb

* extract texture features

* texture features class

* classification test with color and texture features

* adding feature extraction nb

* changes for efficient feature extraction

* val set features and classification test

* gabor testing

* update three class csv

* ready to run

* no changes just update

* classification test all binary data

* feature importances

* update preprocess class for lesion segmentation

* Alex three class (cansuyalcinn#18)

* three class classification test

* using balanced SVM

* testing SVC with balancing and SMOTE

* first RF runs

* Testing Boosting algorithms

* ensemble

* final ensemble methods

* final test prediction

* runs for presentation

* inference time test

* Cansu running tests (cansuyalcinn#19)

* classifer test

* comments

* changes

* svm update

* smv update

* feature selection methods

* knn

* rf tests

* updates

* LR and adaboost

* updates

* gradient boosting

* xgboost

* changes

* best classifiers

* changes

* ensemble model is added

* changes

* changes

Co-authored-by: Cansu Yalcin <[email protected]>

* reformatting

* adding report and readme

* readme correction

Co-authored-by: Cansu YALCIN <[email protected]>
Co-authored-by: Cansu Yalcin <[email protected]>
  • Loading branch information
3 people authored Nov 7, 2022
1 parent bf32852 commit 0b28a43
Show file tree
Hide file tree
Showing 44 changed files with 39,899 additions and 24,081 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
data/
database/
.idea/
examples/
examples/
dataset/__pycache__/
pipeline/__pycache__/
notebooks/.ipynb_checkpoints
Binary file added Cad1_Skin_ Lesion_presentation.pdf
Binary file not shown.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
# Skin_lesion_segmentation
# Skin_lesion_segmentation

#### Skin lesion segmentation project by Cansu Yalcin and Alejandro Cortina, for Computer Aided Diagnosis subject at University of Girona

To make an inference test, please refer to notebooks/pipeline_test.ipynb

Please refer to Cad1_Skin_lesion_presentation.pdf for the report of the work done
Empty file added __init__.py
Empty file.
Empty file added dataset/__init__.py
Empty file.
169 changes: 169 additions & 0 deletions dataset/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
from operator import index
import os
import sys; sys.path.insert(0, os.path.abspath("../"))

import collections
import cv2
import numpy as np
import pandas as pd
from pathlib import Path
from typing import List, Tuple
import multiprocessing as mp
thispath = Path(__file__).resolve()

datapath = thispath.parent.parent / "data"
md_df_path = thispath.parent.parent / "metadata"

class Dataset():
def __init__(self):
pass

def totals(self):
counts = [
dict(
collections.Counter(items[~np.isnan(items)]).most_common()
) for items in self.labels.T
]
return dict(zip(self.lesion_types, counts))

def __repr__(self):
print.print(self.totals())
return self.string()

def check_paths_exist(self):
if not self.datapath_class.exists():
raise Exception(f"{self.datapath_class} could not be found")
if not self.metadata_path.exists():
raise Exception(f"{self.metadata_path} could not be found")


class SkinLesion_Dataset(Dataset):

def __init__(
self, class_task: str = 'binary',
df_path: Path = md_df_path,
datapath_class: Path = datapath,
seed: int = 0,
partitions: List[str] = ['train', 'val'],
n_jobs: int = -1,
resize_image: bool = True,
):
"""
Constructor of SkinLesion_Dataset class
Args:
class_task (str, optional): Classification task, 'binary' or 'three_class'.
Defaults to 'binary'.
df_path (Path, optional): Metadata dataframe path. Defaults to md_df_path.
datapath_class (Path, optional): data folder containing dataset images.
Defaults to datapath.
seed (int, optional): Seed to guarantee reproducibility. Defaults to 0.
partitions (List[str], optional): Selected sets. Defaults to ['train', 'val'].
n_jobs (int, optional): Number of processes to use in parallel operations.
Defaults to -1.
crop_fov (bool, optional): To crop FOV for images or not. Defaults to True
"""
super(SkinLesion_Dataset, self).__init__()

self.class_task = class_task
self.partitions = partitions
self.resize_image = resize_image

# Set seed and number of cores to use
self.seed = seed
np.random.seed(self.seed)
self.n_jobs = mp.cpu_count() if n_jobs == -1 else n_jobs

self.datapath = datapath
self.datapath_class = datapath_class/class_task
self.metadata_path = df_path/ (class_task + '.csv')
self.df_path = df_path

# Load data
self.check_paths_exist()
self.md_df = pd.read_csv(self.metadata_path, index_col=0)

# Filter partition
self.filter_by_partition()
self.labels = self.md_df['label'].values

# Create segmentation examples df
self.seg_examples_df = pd.read_csv(df_path / "seg_examples.csv")

def filter_by_partition(self):
"""
Tthis method is called to filter the images according to the predefined
partitions given with the original dataset
"""
self.md_df = self.md_df.loc[self.md_df.split.isin(self.partitions), :]
self. md_df.reset_index(inplace=True, drop=True)

def __len__(self):
return len(self.labels)

def __getitem__(self, idx):
sample = {}
sample['idx'] = idx
sample['label'] = self.labels[idx]
sample['img_id'] = self.md_df['img_id'].iloc[idx]

# read and save the image
img_path = Path(self.md_df['path'].iloc[idx])
img = cv2.imread(str(img_path), cv2.IMREAD_COLOR)

sample['img'] = img

if self.resize_image:
height, width, ch = img.shape
img_resized = cv2.resize(img,(int(width/2),int(height/2)), interpolation=cv2.INTER_AREA)
sample['img'] = img_resized

return sample


class SegExamples(SkinLesion_Dataset):
def __init__(self,
examples_type: List[str] = ['easy', 'medium', 'hard', 'vhard'],

):
"""
Sample easy, medium, hard and very hard examples from both tasks (binary, three class)
Args:
examples_type (List[str], optional): Type of examples. Defaults to ['easy', 'medium', 'hard', 'vhard'].
"""

super(SegExamples, self).__init__()

self.examples_type = examples_type
self.seg_examples_path = str(self.datapath)
self.filter_by_type

def filter_by_type(self):
"""
Tthis method is called to filter the images according to the predefined
partitions given with the original dataset
"""
self.seg_examples_df = self.seg_examples_df.loc[self.seg_examples_df.split.isin(self.examples_type), :]
self. seg_examples_df.reset_index(inplace=True, drop=True)

def __getitem__(self, idx):
sample = {}
sample['idx'] = idx
img_path = self.seg_examples_df['path'].iloc[idx]
sample['type'] = self.seg_examples_df['type'].iloc[idx]
sample['problem'] = img_path.split('/')[0]
sample['label'] = img_path.split('/')[2]
img = cv2.imread(self.seg_examples_path+ '/' + img_path, cv2.IMREAD_COLOR )
sample['img'] = img

if self.resize_image:
sample['resized'] = True
height, width, ch = img.shape
img_resized = cv2.resize(img,(int(width/2),int(height/2)), interpolation=cv2.INTER_AREA)
sample['img'] = img_resized
return sample




Empty file added metadata/__init__.py
Empty file.
Loading

0 comments on commit 0b28a43

Please sign in to comment.