Remove unused dependencies - review examples and tutorials (NeuroTech…

…X#188) * remove wfdb * update poetry * adding requests to dependency * correct URL information * update example with smaller dataset * correct Lee2019 FTP error and data_dl to replace data_path * correct electrode names error * switch to data_dl * replace MNE _fetch with Pooch retrieve * use correct FTP url, remove pyunpack dependency * update tests for download * convert bytes to string for results dataframe * switch to MI to avoid download, update text * reduce subject number, update text * correct encoding, limit subject, update text * default storage for results is now in handle by MNE config * indicating deprecated function * updating tests * correct docstring and consistent figshare function name Signed-off-by: Sylvain Chevallier <[email protected]>
girafe-ai · Jun 2, 2021 · 3d42d79 · 3d42d79
1 parent 73d9a1e
commit 3d42d79
Show file tree

Hide file tree

Showing 28 changed files with 873 additions and 758 deletions.
diff --git a/examples/plot_cross_session_multiple_datasets.py b/examples/plot_cross_session_multiple_datasets.py
@@ -19,13 +19,14 @@
 
 import matplotlib.pyplot as plt
 import seaborn as sns
+from mne.decoding import CSP
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
 from sklearn.pipeline import make_pipeline
 
 import moabb
-from moabb.datasets import MAMEM1, MAMEM2, MAMEM3
+from moabb.datasets import BNCI2014001, Zhou2016
 from moabb.evaluations import CrossSessionEvaluation
-from moabb.paradigms import SSVEP
-from moabb.pipelines import SSVEP_CCA
+from moabb.paradigms import LeftRightImagery
 
 
 warnings.simplefilter(action="ignore", category=FutureWarning)
@@ -36,56 +37,51 @@
 # Loading dataset
 # ---------------
 #
-# Load 2 subjects of MAMEM1 2 and 3 datasets, with 5 session each
+# Load 2 subjects of BNCI 2014-004 and Zhou2016 datasets, with 2 session each
 
-subj = [2, 5]
-for s in subj:
-    for d in [MAMEM1(), MAMEM2(), MAMEM3()]:
-        d._get_single_subject_data(s)
-datasets = [MAMEM1(), MAMEM2(), MAMEM3()]
+subj = [1, 2]
+datasets = [Zhou2016(), BNCI2014001()]
 for d in datasets:
     d.subject_list = subj
 
 ###############################################################################
 # Choose paradigm
 # ---------------
 #
-# We select the paradigm SSVEP, applying a bandpass filter (3-15 Hz) on
-# the data and we keep all the 5 classes, that is stimulation
-# frequency of 6.66, 7.50 and 8.57, 10 and 12 Hz.
+# We select the paradigm MI, applying a bandpass filter (8-35 Hz) on
+# the data and we will keep only left and right hand motor imagery
 
-paradigm = SSVEP(fmin=3, fmax=15, n_classes=None)
+paradigm = LeftRightImagery(fmin=8, fmax=35)
 
 ##############################################################################
 # Create pipelines
 # ----------------
 #
-# Use a Canonical Correlation Analysis classifier
-
-interval = datasets[0].interval
-freqs = paradigm.used_events(datasets[0])
+# Use the Common Spatial Patterns with 8 components and a Linear Discriminant
+# Analysis classifier.
 
 pipeline = {}
-pipeline["CCA"] = make_pipeline(SSVEP_CCA(interval=interval, freqs=freqs, n_harmonics=3))
+pipeline["CSP+LDA"] = make_pipeline(CSP(n_components=8), LDA())
 
 ##############################################################################
 # Get data (optional)
 # -------------------
 #
 # To get access to the EEG signals downloaded from the dataset, you could
-# use `dataset._get_single_subject_data(subject_id) to obtain the EEG under
+# use `dataset.get_data(subjects=[subject_id])` to obtain the EEG under
 # an MNE format, stored in a dictionary of sessions and runs.
 # Otherwise, `paradigm.get_data(dataset=dataset, subjects=[subject_id])`
 # allows to obtain the EEG data in scikit format, the labels and the meta
-# information.
+# information. The data are preprocessed according to the paradigm
+# requirements.
 
-X_all, labels_all, meta_all = [], [], []
-for d in datasets:
-    # sessions = d._get_single_subject_data(2)
-    X, labels, meta = paradigm.get_data(dataset=d, subjects=[2])
-    X_all.append(X)
-    labels_all.append(labels)
-    meta_all.append(meta)
+# X_all, labels_all, meta_all = [], [], []
+# for d in datasets:
+#     # sessions = d.get_data(subjects=[2])
+#     X, labels, meta = paradigm.get_data(dataset=d, subjects=[2])
+#     X_all.append(X)
+#     labels_all.append(labels)
+#     meta_all.append(meta)
 
 ##############################################################################
 # Evaluation
@@ -102,6 +98,7 @@
 results = evaluation.process(pipeline)
 
 print(results.head())
+results.replace(["session_E", "session_T"], ["session_0", "session_1"], inplace=True)
 
 ##############################################################################
 # Plot Results

diff --git a/examples/plot_cross_session_ssvep.py b/examples/plot_cross_session_ssvep.py
@@ -4,7 +4,7 @@
 ===================
 
 This Example show how to perform a cross-session SSVEP analysis on the
-MAMEM dataset 1, using a CCA pipeline.
+MAMEM dataset 3, using a CCA pipeline.
 
 The cross session evaluation context will evaluate performance using a leave
 one session out cross-validation. For each session in the dataset, a model
@@ -22,7 +22,7 @@
 from sklearn.pipeline import make_pipeline
 
 import moabb
-from moabb.datasets import MAMEM1
+from moabb.datasets import MAMEM3
 from moabb.evaluations import CrossSessionEvaluation
 from moabb.paradigms import SSVEP
 from moabb.pipelines import SSVEP_CCA
@@ -36,12 +36,10 @@
 # Loading dataset
 # ---------------
 #
-# Load 2 subjects of MAMEM1 dataset, with 3 session each
+# Load 2 subjects of MAMEM3 dataset, with 3 session each
 
 subj = [1, 3]
-for s in subj:
-    MAMEM1()._get_single_subject_data(s)
-dataset = MAMEM1()
+dataset = MAMEM3()
 dataset.subject_list = subj
 
 ###############################################################################
@@ -71,14 +69,15 @@
 # -------------------
 #
 # To get access to the EEG signals downloaded from the dataset, you could
-# use `dataset._get_single_subject_data(subject_id) to obtain the EEG under
-# an MNE format, stored in a dictionary of sessions and runs.
+# use `dataset.get_data(subjects=[subject_id])` to obtain the EEG under
+# MNE format, stored in a dictionary of sessions and runs.
 # Otherwise, `paradigm.get_data(dataset=dataset, subjects=[subject_id])`
 # allows to obtain the EEG data in scikit format, the labels and the meta
-# information.
+# information. In `paradigm.get_data`, the EEG are preprocessed according
+# to the paradigm requirement.
 
-sessions = dataset._get_single_subject_data(3)
-X, labels, meta = paradigm.get_data(dataset=dataset, subjects=[3])
+# sessions = dataset.get_data(subjects=[3])
+# X, labels, meta = paradigm.get_data(dataset=dataset, subjects=[3])
 
 ##############################################################################
 # Evaluation

diff --git a/examples/plot_cross_subject_ssvep.py b/examples/plot_cross_subject_ssvep.py
@@ -45,8 +45,6 @@
 # frequency.
 
 n_subject = 2
-for i in range(n_subject):
-    SSVEPExo()._get_single_subject_data(i + 1)
 dataset = SSVEPExo()
 dataset.subject_list = dataset.subject_list[:n_subject]
 interval = dataset.interval

diff --git a/moabb/analysis/results.py b/moabb/analysis/results.py
@@ -1,12 +1,14 @@
 import hashlib
-import inspect
 import os
+import os.path as osp
 import re
 from datetime import datetime
 
 import h5py
 import numpy as np
 import pandas as pd
+from mne import get_config, set_config
+from mne.datasets.utils import _get_path
 from sklearn.base import BaseEstimator
 
 
@@ -50,7 +52,6 @@ def __init__(
         """
         class that will abstract result storage
         """
-        import moabb
         from moabb.evaluations.base import BaseEvaluation
         from moabb.paradigms.base import BaseParadigm
 
@@ -64,24 +65,28 @@ class that will abstract result storage
             self.additional_columns = additional_columns
 
         if hdf5_path is None:
-            self.mod_dir = os.path.dirname(os.path.abspath(inspect.getsourcefile(moabb)))
+            if get_config("MOABB_RESULTS") is None:
+                set_config("MOABB_RESULTS", osp.join(osp.expanduser("~"), "mne_data"))
+            self.mod_dir = _get_path(None, "MOABB_RESULTS", "results")
+            # was previously stored in the moabb source file folder:
+            # self.mod_dir = osp.dirname(osp.abspath(inspect.getsourcefile(moabb)))
         else:
-            self.mod_dir = os.path.abspath(hdf5_path)
-        self.filepath = os.path.join(
+            self.mod_dir = osp.abspath(hdf5_path)
+        self.filepath = osp.join(
             self.mod_dir,
             "results",
             paradigm_class.__name__,
             evaluation_class.__name__,
             "results{}.hdf5".format("_" + suffix),
         )
 
-        os.makedirs(os.path.dirname(self.filepath), exist_ok=True)
+        os.makedirs(osp.dirname(self.filepath), exist_ok=True)
         self.filepath = self.filepath
 
-        if overwrite and os.path.isfile(self.filepath):
+        if overwrite and osp.isfile(self.filepath):
             os.remove(self.filepath)
 
-        if not os.path.isfile(self.filepath):
+        if not osp.isfile(self.filepath):
             with h5py.File(self.filepath, "w") as f:
                 f.attrs["create_time"] = np.string_(
                     "{:%Y-%m-%d, %H:%M}".format(datetime.now())
@@ -171,8 +176,8 @@ def to_dataframe(self, pipelines=None):
                     array = np.array(dset["data"])
                     ids = np.array(dset["id"])
                     df = pd.DataFrame(array, columns=dset.attrs["columns"])
-                    df["subject"] = ids[:, 0]
-                    df["session"] = ids[:, 1]
+                    df["subject"] = [s.decode() for s in ids[:, 0]]
+                    df["session"] = [s.decode() for s in ids[:, 1]]
                     df["channels"] = dset.attrs["channels"]
                     df["n_sessions"] = dset.attrs["n_sessions"]
                     df["dataset"] = dname

diff --git a/moabb/datasets/Lee2019.py b/moabb/datasets/Lee2019.py
@@ -118,7 +118,6 @@ def _get_single_subject_data(self, subject):
     def data_path(
         self, subject, path=None, force_update=False, update_path=None, verbose=None
     ):
-
         if subject not in self.subject_list:
             raise (ValueError("Invalid subject number"))
 
@@ -127,8 +126,7 @@ def data_path(
             url = "{0}session{1}/s{2}/sess{1:02d}_subj{2:02d}_EEG_MI.mat".format(
                 Lee2019_URL, session, subject
             )
-
-            data_path = dl.data_path(
+            data_path = dl.data_dl(
                 url, "Lee2019_MI", path, force_update, update_path, verbose
             )
             subject_paths.append(data_path)

diff --git a/moabb/datasets/Weibo2014.py b/moabb/datasets/Weibo2014.py
@@ -134,8 +134,9 @@ def _get_single_subject_data(self, subject):
         ch_names = [
             "Fp1", "Fpz", "Fp2", "AF3", "AF4", "F7", "F5", "F3", "F1", "Fz", "F2", "F4", "F6",
             "F8", "FT7", "FC5", "FC3", "FC1", "FCz", "FC2", "FC4", "FC6", "FT8", "T7", "C5",
-            "C3", "C1", "Pz", "P2", "P4", "P6", "P8", "PO7", "PO5", "PO3", "POz", "PO4", "PO6",
-            "PO8", "CB1", "O1", "Oz", "O2", "CB2", "VEO", "HEO",
+            "C3", "C1", "Cz", "C2", "C4", "C6", "T8", "TP7", "CP5", "CP3", "CP1", "CPz", "CP2",
+            "CP4", "CP6", "TP8", "P7", "P5", "P3", "P1", "Pz", "P2", "P4", "P6", "P8", "PO7",
+            "PO5", "PO3", "POz", "PO4", "PO6", "PO8", "CB1", "O1", "Oz", "O2", "CB2", "VEO", "HEO",
         ]
         # fmt: on
 

diff --git a/moabb/datasets/Zhou2016.py b/moabb/datasets/Zhou2016.py
@@ -11,7 +11,7 @@
 from mne.channels import make_standard_montage
 from mne.datasets.utils import _do_path_update, _get_path
 from mne.io import read_raw_cnt
-from mne.utils import _fetch_file
+from pooch import retrieve
 
 from .base import BaseDataset
 
@@ -22,9 +22,7 @@
 def local_data_path(base_path, subject):
     if not os.path.isdir(os.path.join(base_path, "subject_{}".format(subject))):
         if not os.path.isdir(os.path.join(base_path, "data")):
-            _fetch_file(
-                DATA_PATH, os.path.join(base_path, "data.zip"), print_destination=False
-            )
+            retrieve(DATA_PATH, None, fname="data.zip", path=base_path)
             with z.ZipFile(os.path.join(base_path, "data.zip"), "r") as f:
                 f.extractall(base_path)
             os.remove(os.path.join(base_path, "data.zip"))

diff --git a/moabb/datasets/alex_mi.py b/moabb/datasets/alex_mi.py
@@ -59,4 +59,4 @@ def data_path(
         if subject not in self.subject_list:
             raise (ValueError("Invalid subject number"))
         url = "{:s}subject{:d}.raw.fif".format(ALEX_URL, subject)
-        return dl.data_path(url, "ALEXEEG", path, force_update, update_path, verbose)
+        return dl.data_dl(url, "ALEXEEG", path, force_update, update_path, verbose)
diff --git a/moabb/datasets/bnci.py b/moabb/datasets/bnci.py
@@ -18,7 +18,7 @@
 
 
 def data_path(url, path=None, force_update=False, update_path=None, verbose=None):
-    return [dl.data_path(url, "BNCI", path, force_update, update_path, verbose)]
+    return [dl.data_dl(url, "BNCI", path, force_update, update_path, verbose)]
 
 
 @verbose

diff --git a/moabb/datasets/braininvaders.py b/moabb/datasets/braininvaders.py
@@ -155,7 +155,7 @@ def data_path(
 
         # check if has the .zip
         url = "{:s}subject{:d}.zip".format(BI2013a_URL, subject)
-        path_zip = dl.data_path(url, "BRAININVADERS")
+        path_zip = dl.data_dl(url, "BRAININVADERS")
         path_folder = path_zip.strip("subject{:d}.zip".format(subject))
 
         # check if has to unzip
-Original file line number
+Diff line change
@@ Expand Up / @@ -18,7 +18,7 @@ @@
     def data_path(url, path=None, force_update=False, update_path=None, verbose=None):
-        return [dl.data_path(url, "BNCI", path, force_update, update_path, verbose)]
+        return [dl.data_dl(url, "BNCI", path, force_update, update_path, verbose)]
     @verbose
@@ Expand Down @@