diff --git a/docs/source/images/crosssess.pdf b/docs/source/images/crosssess.pdf
new file mode 100644
index 000000000..7a39c9600
Binary files /dev/null and b/docs/source/images/crosssess.pdf differ
diff --git a/docs/source/images/crosssubj.pdf b/docs/source/images/crosssubj.pdf
new file mode 100644
index 000000000..e5c65135d
Binary files /dev/null and b/docs/source/images/crosssubj.pdf differ
diff --git a/docs/source/images/withinsess.pdf b/docs/source/images/withinsess.pdf
new file mode 100644
index 000000000..3e29af0b6
Binary files /dev/null and b/docs/source/images/withinsess.pdf differ
diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst
index 660b5bf24..d56bfdb0a 100644
--- a/docs/source/whats_new.rst
+++ b/docs/source/whats_new.rst
@@ -75,6 +75,7 @@ Enhancements
 - Add new dataset :class:`moabb.datasets.Rodrigues2017` dataset (:gh:`602` by `Gregoire Cattan`_ and `Pedro L. C. Rodrigues`_)
 - Change unittest to pytest (:gh:`618` by `Bruno Aristimunha`_)
 - Remove tensorflow import warning (:gh:`622` by `Bruno Aristimunha`_)
+- Add data splitter classes (:gh:`612` by `Bruna Lopes_`)
 
 Bugs
 ~~~~
diff --git a/moabb/evaluations/metasplitters.py b/moabb/evaluations/metasplitters.py
new file mode 100644
index 000000000..fe428ba90
--- /dev/null
+++ b/moabb/evaluations/metasplitters.py
@@ -0,0 +1,352 @@
+"""
+The data splitters defined in this file are not directly related to a evaluation method
+the way that WithinSession, CrossSession and CrossSubject splitters are.
+
+OfflineSplit and TimeSeriesSplit split the test data, indicating weather model inference
+will be computed using a Offline or a Pseudo-Online validation. Pseudo-online evaluation
+is important when training data is pre-processed with some data-dependent transformation.
+One part of the test data is separated as a calibration to compute the transformation.
+
+SamplerSplit is an optional subsplit done on the training set to generate subsets with
+different numbers of samples. It can be used to estimate the performance of the model
+on different training sizes and plot learning curves.
+
+"""
+
+import numpy as np
+from sklearn.model_selection import BaseCrossValidator
+
+from moabb.evaluations.utils import sort_group
+
+
+class OfflineSplit(BaseCrossValidator):
+    """Offline split for evaluation test data.
+
+    It can be used for further splitting of the test data based on sessions or runs as needed.
+
+    Assumes that, per session, all test trials are available for inference. It can be used when
+    no filtering or data alignment is needed.
+
+    Parameters
+    ----------
+    n_folds: int
+    Not used in this case, just so it can be initialized in the same way as
+    PseudoOnlineSplit.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> from moabb.evaluations.splitters import CrossSubjectSplitter
+    >>> X = np.array([[[5, 6]]*12])[0]
+    >>> y = np.array([[1, 2]*12])[0]
+    >>> subjects = np.array([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3])
+    >>> sessions = np.array([[0, 0, 1, 1]*3])[0]
+    >>> metadata = pd.DataFrame(data={'subject': subjects, 'session': sessions})
+    >>> csubj = CrossSubjectSplitter()
+    >>> off = OfflineSplit()
+    >>> csubj.get_n_splits(metadata)
+    3
+    >>> for i, (train_index, test_index) in enumerate(csubj.split(X, y, metadata)):
+    >>>     print(f"Fold {i}:")
+    >>>     print(f"  Train: index={train_index}, group={subjects[train_index]}, sessions={sessions[train_index]}")
+    >>>     print(f"  Test:  index={test_index}, group={subjects[test_index]}, sessions={sessions[test_index]}")
+    >>>     X_test, y_test, meta_test = X[test_index], y[test_index], metadata.loc[test_index]
+    >>>     for j, test_session in enumerate(off.split(X_test, y_test, meta_test)):
+    >>>         print(f"  By session - Test:  index={test_session}, group={subjects[test_session]}, sessions={sessions[test_session]}")
+
+    Fold 0:
+      Train: index=[ 4  5  6  7  8  9 10 11], group=[2 2 2 2 3 3 3 3], sessions=[0 0 1 1 0 0 1 1]
+      Test:  index=[0 1 2 3], group=[1 1 1 1], sessions=[0 0 1 1]
+      By session - Test:  index=[0, 1], group=[1 1], sessions=[0 0]
+      By session - Test:  index=[2, 3], group=[1 1], sessions=[1 1]
+    Fold 1:
+      Train: index=[ 0  1  2  3  8  9 10 11], group=[1 1 1 1 3 3 3 3], sessions=[0 0 1 1 0 0 1 1]
+      Test:  index=[4 5 6 7], group=[2 2 2 2], sessions=[0 0 1 1]
+      By session - Test:  index=[4, 5], group=[2 2], sessions=[0 0]
+      By session - Test:  index=[6, 7], group=[2 2], sessions=[1 1]
+    Fold 2:
+      Train: index=[0 1 2 3 4 5 6 7], group=[1 1 1 1 2 2 2 2], sessions=[0 0 1 1 0 0 1 1]
+      Test:  index=[ 8  9 10 11], group=[3 3 3 3], sessions=[0 0 1 1]
+      By session - Test:  index=[8, 9], group=[3 3], sessions=[0 0]
+      By session - Test:  index=[10, 11], group=[3 3], sessions=[1 1]
+
+    """
+
+    def __init__(self, n_folds=None, run=False):
+        self.n_folds = n_folds
+        self.run = run
+
+    def get_n_splits(self, metadata):
+        return metadata.groupby(["subject", "session"]).ngroups
+
+    def split(self, X, y, metadata):
+
+        subjects = metadata["subject"]
+
+        for subject in subjects.unique():
+            mask = subjects == subject
+            X_, y_, meta_ = X[mask], y[mask], metadata[mask]
+            sessions = meta_.session.unique()
+
+            for session in sessions:
+                session_mask = meta_["session"] == session
+                _, _, meta_session = (
+                    X_[session_mask],
+                    y_[session_mask],
+                    meta_[session_mask],
+                )
+
+                # If you can (amd want) to split by run also
+                if self.run and "run" in meta_session.columns:
+                    runs = meta_session["run"].unique()
+
+                    for run in runs:
+                        run_mask = meta_session["run"] == run
+                        ix_test = meta_session[run_mask].index
+                        yield list(ix_test)
+
+                else:
+                    ix_test = meta_session.index
+                    yield list(ix_test)
+
+
+class PseudoOnlineSplit(BaseCrossValidator):
+    """Pseudo-online split for evaluation test data.
+
+    It takes into account the time sequence for obtaining the test data, and uses first run,
+    or first #calib_size trials as calibration data, and the rest as evaluation data.
+    Calibration data is important in the context where data alignment or filtering is used on
+    training data.
+
+    OBS: Be careful! Since this inference split is based on time disposition of obtained data,
+    if your data is not organized by time, but by other parameter, such as class, you may want to
+    be extra careful when using this split.
+
+    Parameters
+    ----------
+    calib_size: int
+    Size of calibration set, used if there is just one run.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> from moabb.evaluations.splitters import CrossSubjectSplitter
+    >>> from moabb.evaluations.metasplitters import PseudoOnlineSplit
+    >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [8, 9], [5, 4], [2, 5], [1, 7]])
+    >>> y = np.array([1, 2, 1, 2, 1, 2, 1, 2])
+    >>> subjects = np.array([1, 1, 1, 1, 2, 2, 2, 2])
+    >>> sessions = np.array([0, 0, 1, 1, 0, 0, 1, 1])
+    >>> runs = np.array(['0', '1', '0', '1', '0', '1', '0', '1'])
+    >>> metadata = pd.DataFrame(data={'subject': subjects, 'session': sessions, 'run':runs})
+    >>> csubj = CrossSubjectSplitter()
+    >>> posplit = PseudoOnlineSplit()
+    >>> posplit.get_n_splits(metadata)
+    4
+    >>> for i, (train_index, test_index) in enumerate(csubj.split(X, y, metadata)):
+    >>>     print(f"Fold {i}:")
+    >>>     print(f"  Train: index={train_index}, group={subjects[train_index]}, sessions={sessions[train_index]}, runs={runs[train_index]}")
+    >>>     print(f"  Test:  index={test_index}, group={subjects[test_index]}, sessions={sessions[test_index]}, runs={runs[test_index]}")
+    >>>     X_test, y_test, meta_test = X[test_index], y[test_index], metadata.loc[test_index]
+    >>>     for j, (test_ix, calib_ix) in enumerate(posplit.split(X_test, y_test, meta_test)):
+    >>>         print(f"     Evaluation:  index={test_ix}, group={subjects[test_ix]}, sessions={sessions[test_ix]}, runs={runs[test_ix]}")
+    >>>         print(f"     Calibration:  index={calib_ix}, group={subjects[calib_ix]}, sessions={sessions[calib_ix]}, runs={runs[calib_ix]}")
+
+    Fold 0:
+      Train: index=[4 5 6 7], group=[2 2 2 2], sessions=[0 0 1 1], runs=['0' '1' '0' '1']
+      Test:  index=[0 1 2 3], group=[1 1 1 1], sessions=[0 0 1 1], runs=['0' '1' '0' '1']
+         Evaluation:  index=[1], group=[1], sessions=[0], runs=['1']
+         Calibration:  index=[0], group=[1], sessions=[0], runs=['0']
+         Evaluation:  index=[3], group=[1], sessions=[1], runs=['1']
+         Calibration:  index=[2], group=[1], sessions=[1], runs=['0']
+    Fold 1:
+      Train: index=[0 1 2 3], group=[1 1 1 1], sessions=[0 0 1 1], runs=['0' '1' '0' '1']
+      Test:  index=[4 5 6 7], group=[2 2 2 2], sessions=[0 0 1 1], runs=['0' '1' '0' '1']
+         Evaluation:  index=[5], group=[2], sessions=[0], runs=['1']
+         Calibration:  index=[4], group=[2], sessions=[0], runs=['0']
+         Evaluation:  index=[7], group=[2], sessions=[1], runs=['1']
+         Calibration:  index=[6], group=[2], sessions=[1], runs=['0']
+
+    """
+
+    def __init__(self, calib_size: int = None):
+        self.calib_size = calib_size
+
+    def get_n_splits(self, metadata):
+        return len(metadata.groupby(["subject", "session"]))
+
+    def split(self, X, y, metadata):
+
+        for _, group in metadata.groupby(["subject", "session"]):
+            runs = group.run.unique()
+            if len(runs) > 1:
+                # To guarantee that the runs are on the right order
+                runs = sort_group(runs)
+                for run in runs:
+                    test_ix = group[group["run"] != run].index
+                    calib_ix = group[group["run"] == run].index
+                    yield list(test_ix), list(calib_ix)
+                    break  # Take the fist run as calibration
+            else:
+                calib_size = self.calib_size
+                calib_ix = group[:calib_size].index
+                test_ix = group[calib_size:].index
+
+                yield list(test_ix), list(
+                    calib_ix
+                )  # Take first #calib_size samples as calibration
+
+
+class SamplerSplit(BaseCrossValidator):
+    """Return subsets of the training data with different number of samples.
+
+    This splitter can be used for estimating a model's performance when using different
+    numbers of training samples, and for plotting the learning curve per number of training
+    samples. You must define the data evaluation type (WithinSubject, CrossSession, CrossSubject)
+    so the training set can be sampled. It is also needed to pass a dictionary indicating the
+    policy used for sampling the training set and the number of examples (or the percentage) that
+    each sample must contain.
+
+    Parameters
+    ----------
+    data_eval: BaseCrossValidator object
+        Evaluation splitter already initialized. It can be WithinSubject, CrossSession,
+        or CrossSubject Splitters.
+    data_size : dict
+        Contains the policy to pick the datasizes to evaluate, as well as the actual values.
+        The dict has the key 'policy' with either 'ratio' or 'per_class', and the key
+        'value' with the actual values as a numpy array. This array should be
+        sorted, such that values in data_size are strictly monotonically increasing.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> X = np.array([[[5, 6]]*12])[0]
+    >>> y = np.array([[1, 2]*12])[0]
+    >>> subjects = np.array([1]*12)
+    >>> sessions = np.array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1])
+    >>> runs = np.array(['0', '0', '1', '1', '2', '2', '0', '0', '1', '1', '2', '2',])
+    >>> metadata = pd.DataFrame(data={'subject': subjects, 'session': sessions, 'run':runs})
+    >>> from moabb.evaluations.metasplitters import SamplerSplit
+    >>> from moabb.evaluations.splitters import CrossSessionSplitter
+    >>> data_size = dict(policy="per_class", value=np.array([2,3]))
+    >>> data_eval = CrossSessionSplitter()
+    >>> sampler = SamplerSplit(data_eval, data_size)
+    >>> for i, (train_index, test_index) in enumerate(sampler.split(X, y, metadata)):
+    >>>    print(f"Fold {i}:")
+    >>>    print(f"  Train: index={train_index}, sessions={sessions[train_index]}")
+    >>>    print(f"  Test:  index={test_index}, sessions={sessions[test_index]}")
+
+    Fold 0:
+      Train: index=[6 8 7 9], sessions=[1 1 1 1]
+      Test:  index=[0 1 2 3 4 5], sessions=[0 0 0 0 0 0]
+    Fold 1:
+      Train: index=[ 6  8 10  7  9 11], sessions=[1 1 1 1 1 1]
+      Test:  index=[0 1 2 3 4 5], sessions=[0 0 0 0 0 0]
+    Fold 2:
+      Train: index=[0 2 1 3], sessions=[0 0 0 0]
+      Test:  index=[ 6  7  8  9 10 11], sessions=[1 1 1 1 1 1]
+    Fold 3:
+      Train: index=[0 2 4 1 3 5], sessions=[0 0 0 0 0 0]
+      Test:  index=[ 6  7  8  9 10 11], sessions=[1 1 1 1 1 1]
+
+    """
+
+    def __init__(self, data_eval, data_size):
+        self.data_eval = data_eval
+        self.data_size = data_size
+
+        self.sampler = IndividualSamplerSplit(self.data_size)
+
+    def get_n_splits(self, y, metadata):
+        return self.data_eval.get_n_splits(metadata) * len(
+            self.sampler.get_data_size_subsets(y)
+        )
+
+    def split(self, X, y, metadata, **kwargs):
+        cv = self.data_eval
+        sampler = self.sampler
+
+        for ix_train, ix_test in cv.split(X, y, metadata, **kwargs):
+            X_train, y_train, meta_train = (
+                X[ix_train],
+                y[ix_train],
+                metadata.iloc[ix_train],
+            )
+            for ix_train_sample in sampler.split(X_train, y_train, meta_train):
+                ix_train_sample = ix_train[ix_train_sample]
+                yield ix_train_sample, ix_test
+
+
+class IndividualSamplerSplit(BaseCrossValidator):
+    """Return subsets of the training data with different number of samples.
+
+    Util for estimating the performance of a model when using different number of
+    training samples and plotting the learning curve. It must be used after already splitting
+    data using one of the other evaluation data splitters (WithinSubject, CrossSession, CrossSubject)
+    since it corresponds to a subsampling of the training data.
+
+    This 'Individual' Sampler Split assumes that data and metadata being passed is training, and was
+    already split by WithinSubject, CrossSession, or CrossSubject splitters.
+
+    Parameters
+    ----------
+    data_size : dict
+        Contains the policy to pick the datasizes to
+        evaluate, as well as the actual values. The dict has the
+        key 'policy' with either 'ratio' or 'per_class', and the key
+        'value' with the actual values as a numpy array. This array should be
+        sorted, such that values in data_size are strictly monotonically increasing.
+
+    """
+
+    def __init__(self, data_size):
+        self.data_size = data_size
+
+    def get_n_splits(self, y=None):
+        return len(self.get_data_size_subsets(y))
+
+    def get_data_size_subsets(self, y):
+        if self.data_size is None:
+            raise ValueError(
+                "Cannot create data subsets without valid policy for data_size."
+            )
+        if self.data_size["policy"] == "ratio":
+            vals = np.array(self.data_size["value"])
+            if np.any(vals < 0) or np.any(vals > 1):
+                raise ValueError("Data subset ratios must be in range [0, 1]")
+            upto = np.ceil(vals * len(y)).astype(int)
+            indices = [np.array(range(i)) for i in upto]
+        elif self.data_size["policy"] == "per_class":
+            classwise_indices = dict()
+            n_smallest_class = np.inf
+            for cl in np.unique(y):
+                cl_i = np.where(cl == y)[0]
+                classwise_indices[cl] = cl_i
+                n_smallest_class = (
+                    len(cl_i) if len(cl_i) < n_smallest_class else n_smallest_class
+                )
+            indices = []
+            for ds in self.data_size["value"]:
+                if ds > n_smallest_class:
+                    raise ValueError(
+                        f"Smallest class has {n_smallest_class} samples. "
+                        f"Desired samples per class {ds} is too large."
+                    )
+                indices.append(
+                    np.concatenate(
+                        [classwise_indices[cl][:ds] for cl in classwise_indices]
+                    )
+                )
+        else:
+            raise ValueError(f"Unknown policy {self.data_size['policy']}")
+        return indices
+
+    def split(self, X, y, metadata):
+
+        data_size_steps = self.get_data_size_subsets(y)
+        for subset_indices in data_size_steps:
+            ix_train = subset_indices
+            yield ix_train
diff --git a/moabb/evaluations/splitters.py b/moabb/evaluations/splitters.py
new file mode 100644
index 000000000..30012fb5e
--- /dev/null
+++ b/moabb/evaluations/splitters.py
@@ -0,0 +1,319 @@
+import numpy as np
+from sklearn.model_selection import (
+    BaseCrossValidator,
+    GroupKFold,
+    LeaveOneGroupOut,
+    StratifiedKFold,
+)
+
+
+class WithinSessionSplitter(BaseCrossValidator):
+    """Data splitter for within session evaluation.
+
+    Within-session evaluation uses k-fold cross_validation to determine train
+    and test sets on separate session for each subject. This splitter assumes that
+    all data from all subjects is already known and loaded.
+
+     . image:: images/withinsess.pdf
+    :alt: The schematic diagram of the WithinSession split
+    :align: center
+
+    Parameters
+    ----------
+    n_folds : int
+        Number of folds. Must be at least 2.
+
+    Examples
+    -----------
+
+    >>> import pandas as pd
+    >>> import numpy as np
+    >>> from moabb.evaluations.splitters import WithinSessionSplitter
+    >>> X = np.array([[1, 2], [3, 4], [5, 6], [1,4], [7, 4], [5, 8], [0,3], [2,4]])
+    >>> y = np.array([1, 2, 1, 2, 1, 2, 1, 2])
+    >>> subjects = np.array([1, 1, 1, 1, 1, 1, 1, 1])
+    >>> sessions = np.array(['T', 'T', 'E', 'E', 'T', 'T', 'E', 'E'])
+    >>> metadata = pd.DataFrame(data={'subject': subjects, 'session': sessions})
+    >>> csess = WithinSessionSplitter(2)
+    >>> csess.get_n_splits(metadata)
+    >>> for i, (train_index, test_index) in enumerate(csess.split(X, y, metadata)):
+    ...    print(f"Fold {i}:")
+    ...    print(f"  Train: index={train_index}, group={subjects[train_index]}, session={sessions[train_index]}")
+    ...    print(f"  Test:  index={test_index}, group={subjects[test_index]}, sessions={sessions[test_index]}")
+    Fold 0:
+      Train: index=[2 7], group=[1 1], session=['E' 'E']
+      Test:  index=[3 6], group=[1 1], sessions=['E' 'E']
+    Fold 1:
+      Train: index=[3 6], group=[1 1], session=['E' 'E']
+      Test:  index=[2 7], group=[1 1], sessions=['E' 'E']
+    Fold 2:
+      Train: index=[4 5], group=[1 1], session=['T' 'T']
+      Test:  index=[0 1], group=[1 1], sessions=['T' 'T']
+    Fold 3:
+      Train: index=[0 1], group=[1 1], session=['T' 'T']
+      Test:  index=[4 5], group=[1 1], sessions=['T' 'T']
+
+
+    """
+
+    def __init__(self, n_folds=5):
+        self.n_folds = n_folds
+
+    def get_n_splits(self, metadata):
+        sessions_subjects = metadata.groupby(["subject", "session"]).ngroups
+        return self.n_folds * sessions_subjects
+
+    def split(self, X, y, metadata, **kwargs):
+
+        assert isinstance(self.n_folds, int)
+
+        subjects = metadata.subject.values
+        cv = StratifiedKFold(n_splits=self.n_folds, shuffle=True, **kwargs)
+
+        for subject in np.unique(subjects):
+            mask = subjects == subject
+            X_, y_, meta_ = (
+                X[mask],
+                y[mask],
+                metadata[mask],
+            )
+
+            sessions = meta_.session.values
+
+            for session in np.unique(sessions):
+                mask_s = sessions == session
+                X_s, y_s, _ = (
+                    X_[mask_s],
+                    y_[mask_s],
+                    meta_[mask_s],
+                )
+
+                for ix_train, ix_test in cv.split(X_s, y_s):
+
+                    ix_train_global = np.where(mask)[0][np.where(mask_s)[0][ix_train]]
+                    ix_test_global = np.where(mask)[0][np.where(mask_s)[0][ix_test]]
+                    yield ix_train_global, ix_test_global
+
+
+class IndividualWithinSessionSplitter(BaseCrossValidator):
+    """Data splitter for within session evaluation.
+
+    Within-session evaluation uses k-fold cross_validation to determine train
+    and test sets on separate session for each subject. This splitter does not assume
+    that all data and metadata from all subjects is already loaded. If X, y and metadata
+    are from a single subject, it returns data split for this subject only.
+
+    It can be used as basis for WithinSessionSplitter or to avoid downloading all data at
+    once when it is not needed,
+
+    Parameters
+    ----------
+    n_folds : int
+        Number of folds. Must be at least 2.
+
+    """
+
+    def __init__(self, n_folds: int):
+        self.n_folds = n_folds
+
+    def get_n_splits(self, metadata):
+        return self.n_folds
+
+    def split(self, X, y, metadata, **kwargs):
+
+        assert len(np.unique(metadata.subject)) == 1
+        assert isinstance(self.n_folds, int)
+
+        sessions = metadata.subject.values
+        cv = StratifiedKFold(n_splits=self.n_folds, shuffle=True, **kwargs)
+
+        for session in np.unique(sessions):
+            mask = sessions == session
+            X_, y_, _ = (
+                X[mask],
+                y[mask],
+                metadata[mask],
+            )
+
+            for ix_train, ix_test in cv.split(X_, y_):
+                yield ix_train, ix_test
+
+
+class CrossSessionSplitter(BaseCrossValidator):
+    """Data splitter for cross session evaluation.
+
+    Cross-session evaluation uses a Leave-One-Group-Out cross-validation to
+    evaluate performance across sessions, but for a single subject. This splitter
+    assumes that all data from all subjects is already known and loaded.
+
+     . image:: images/crosssess.pdf
+        :alt: The schematic diagram of the CrossSession split
+        :align: center
+
+    Parameters
+    ----------
+    n_folds :
+        Not used. For compatibility with other cross-validation splitters.
+        Default:None
+
+    Examples
+    ----------
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> from moabb.evaluations.splitters import CrossSessionSplitter
+    >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [8, 9], [5, 4], [2, 5], [1, 7]])
+    >>> y = np.array([1, 2, 1, 2, 1, 2, 1, 2])
+    >>> subjects = np.array([1, 1, 1, 1, 2, 2, 2, 2])
+    >>> sessions = np.array(['T', 'T', 'E', 'E', 'T', 'T', 'E', 'E'])
+    >>> metadata = pd.DataFrame(data={'subject': subjects, 'session': sessions})
+    >>> csess = CrossSessionSplitter()
+    >>> csess.get_n_splits(metadata)
+    4
+    >>> for i, (train_index, test_index) in enumerate(csess.split(X, y, metadata)):
+    ...     print(f"Fold {i}:")
+    ...     print(f"  Train: index={train_index}, group={subjects[train_index]}, session={sessions[train_index]}")
+    ...     print(f"  Test:  index={test_index}, group={subjects[test_index]}, sessions={sessions[test_index]}")
+    Fold 0:
+      Train: index=[0 1], group=[1 1], session=['T' 'T']
+      Test:  index=[2 3], group=[1 1], sessions=['E' 'E']
+    Fold 1:
+      Train: index=[2 3], group=[1 1], session=['E' 'E']
+      Test:  index=[0 1], group=[1 1], sessions=['T' 'T']
+    Fold 2:
+      Train: index=[4 5], group=[2 2], session=['T' 'T']
+      Test:  index=[6 7], group=[2 2], sessions=['E' 'E']
+    Fold 3:
+      Train: index=[6 7], group=[2 2], session=['E' 'E']
+      Test:  index=[4 5], group=[2 2], sessions=['T' 'T']
+
+    """
+
+    def __init__(self, n_folds=None):
+        self.n_folds = n_folds
+
+    def get_n_splits(self, metadata):
+        sessions_subjects = len(metadata.groupby(["subject", "session"]).first())
+        return sessions_subjects
+
+    def split(self, X, y, metadata):
+
+        subjects = metadata.subject.values
+        split = IndividualCrossSessionSplitter()
+
+        for subject in np.unique(subjects):
+            mask = subjects == subject
+            X_, y_, meta_ = (
+                X[mask],
+                y[mask],
+                metadata[mask],
+            )
+
+            for ix_train, ix_test in split.split(X_, y_, meta_):
+                ix_train = np.where(mask)[0][ix_train]
+                ix_test = np.where(mask)[0][ix_test]
+                yield ix_train, ix_test
+
+
+class IndividualCrossSessionSplitter(BaseCrossValidator):
+    """Data splitter for cross session evaluation.
+
+    Cross-session evaluation uses a Leave-One-Group-Out cross-validation to
+    evaluate performance across sessions, but for a single subject. This splitter does
+    not assumethat all data and metadata from all subjects is already loaded. If X, y
+    and metadata are from a single subject, it returns data split for this subject only.
+
+    It can be used as basis for CrossSessionSplitter or to avoid downloading all data at
+    once when it is not needed,
+
+    Parameters
+    ----------
+    n_folds :
+        Not used. For compatibility with other cross-validation splitters.
+        Default:None
+
+    """
+
+    def __init__(self, n_folds=None):
+        self.n_folds = n_folds
+
+    def get_n_splits(self, metadata):
+        sessions = metadata.session.values
+        return np.unique(sessions)
+
+    def split(self, X, y, metadata):
+        assert len(np.unique(metadata.subject)) == 1
+
+        cv = LeaveOneGroupOut()
+        sessions = metadata.session.values
+
+        for ix_train, ix_test in cv.split(X, y, groups=sessions):
+            yield ix_train, ix_test
+
+
+class CrossSubjectSplitter(BaseCrossValidator):
+    """Data splitter for cross session evaluation.
+
+    Cross-session evaluation uses a Leave-One-Group-Out cross-validation to
+    evaluate performance across sessions, but for a single subject. This splitter
+    assumes that all data from all subjects is already known and loaded.
+
+     . image:: images/crosssubj.pdf
+    :alt: The schematic diagram of the CrossSubj split
+    :align: center
+
+    Parameters
+    ----------
+    n_groups : int or None
+        If None, Leave-One-Subject-Out is performed.
+        If int, Leave-k-Subjects-Out is performed.
+
+        Examples
+    --------
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> from moabb.evaluations.splitters import CrossSubjectSplitter
+    >>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8],[8,9],[5,4],[2,5],[1,7]])
+    >>> y = np.array([1, 2, 1, 2, 1, 2, 1, 2])
+    >>> subjects = np.array([1, 1, 2, 2, 3, 3, 4, 4])
+    >>> metadata = pd.DataFrame(data={'subject': subjects})
+    >>> csubj = CrossSubjectSplitter()
+    >>> csubj.get_n_splits(metadata)
+    4
+    >>> for i, (train_index, test_index) in enumerate(csubj.split(X, y, metadata)):
+    ...     print(f"Fold {i}:")
+    ...     print(f"  Train: index={train_index}, group={subjects[train_index]}")
+    ...     print(f"  Test:  index={test_index}, group={subjects[test_index]}")
+    Fold 0:
+      Train: index=[2 3 4 5 6 7], group=[2 2 3 3 4 4]
+      Test:  index=[0 1], group=[1 1]
+    Fold 1:
+      Train: index=[0 1 4 5 6 7], group=[1 1 3 3 4 4]
+      Test:  index=[2 3], group=[2 2]
+    Fold 2:
+      Train: index=[0 1 2 3 6 7], group=[1 1 2 2 4 4]
+      Test:  index=[4 5], group=[3 3]
+    Fold 3:
+      Train: index=[0 1 2 3 4 5], group=[1 1 2 2 3 3]
+      Test:  index=[6 7], group=[4 4]
+
+
+    """
+
+    def __init__(self, n_groups=None):
+        self.n_groups = n_groups
+
+    def get_n_splits(self, metadata):
+        return len(metadata.subject.unique())
+
+    def split(self, X, y, metadata):
+
+        groups = metadata.subject.values
+
+        # Define split
+        if self.n_groups is None:
+            cv = LeaveOneGroupOut()
+        else:
+            cv = GroupKFold(n_splits=self.n_groups)
+
+        for ix_train, ix_test in cv.split(metadata, groups=groups):
+            yield ix_train, ix_test
diff --git a/moabb/evaluations/utils.py b/moabb/evaluations/utils.py
index 4a28b8d48..6c7530a32 100644
--- a/moabb/evaluations/utils.py
+++ b/moabb/evaluations/utils.py
@@ -1,9 +1,11 @@
 from __future__ import annotations
 
+import re
 from pathlib import Path
 from pickle import HIGHEST_PROTOCOL, dump
 from typing import Sequence
 
+import numpy as np
 from numpy import argmax
 from sklearn.pipeline import Pipeline
 
@@ -222,6 +224,17 @@ def create_save_path(
         print("No hdf5_path provided, models will not be saved.")
 
 
+def sort_group(groups):
+    runs_sort = []
+    pattern = r"([0-9]+)(|[a-zA-Z]+[a-zA-Z0-9]*)"
+    for i, group in enumerate(groups):
+        index, description = re.fullmatch(pattern, group).groups()
+        index = int(index)
+        runs_sort.append(index)
+    sorted_ix = np.argsort(runs_sort)
+    return groups[sorted_ix]
+
+
 def _convert_sklearn_params_to_optuna(param_grid: dict) -> dict:
     """
     Function to convert the parameter in Optuna format. This function will
diff --git a/moabb/tests/metasplits.py b/moabb/tests/metasplits.py
new file mode 100644
index 000000000..9651fe71d
--- /dev/null
+++ b/moabb/tests/metasplits.py
@@ -0,0 +1,151 @@
+import numpy as np
+import pytest
+from sklearn.model_selection import LeaveOneGroupOut, StratifiedKFold
+
+from moabb.datasets.fake import FakeDataset
+from moabb.evaluations.metasplitters import OfflineSplit, PseudoOnlineSplit, SamplerSplit
+from moabb.evaluations.splitters import CrossSessionSplitter, CrossSubjectSplitter
+from moabb.paradigms.motor_imagery import FakeImageryParadigm
+
+
+dataset = FakeDataset(["left_hand", "right_hand"], n_subjects=3, seed=12)
+paradigm = FakeImageryParadigm()
+
+
+# Still working on this
+def eval_sampler_split():
+    for subject in dataset.subject_list:
+        X, y, metadata = paradigm.get_data(dataset=dataset, subjects=[subject])
+        sessions = metadata.session
+        for session in np.unique(sessions):
+            ix = sessions == session
+            cv = StratifiedKFold(5, shuffle=True, random_state=42)
+            X_, y_, _ = X[ix], y[ix], metadata.loc[ix]
+            for train, test in cv.split(X_, y_):
+                yield X_[train], X_[test]
+
+
+# Split done for the Cross Session evaluation
+def eval_split_cross_session():
+    for subject in dataset.subject_list:
+        X, y, metadata = paradigm.get_data(dataset=dataset, subjects=[subject])
+        groups = metadata.session.values
+        cv = LeaveOneGroupOut()
+        for _, test in cv.split(X, y, groups):
+            metadata_test = metadata.loc[test]
+            runs = metadata_test.run.values
+            for r in np.unique(runs):
+                ix = runs == r
+                yield X[test[ix]]
+
+
+def pseudo_split_cross_session():
+    for subject in dataset.subject_list:
+        X, y, metadata = paradigm.get_data(dataset=dataset, subjects=[subject])
+        groups = metadata.session.values
+        cv = LeaveOneGroupOut()
+        for _, test in cv.split(X, y, groups):
+            metadata_test = metadata.loc[test]
+            runs = metadata_test.run.values
+            ix = runs == runs[0]
+            yield X[test[ix]]
+
+
+# Split done for the Cross Subject evaluation
+def eval_split_cross_subject():
+    X, y, metadata = paradigm.get_data(dataset=dataset)
+    groups = metadata.subject.values
+    cv = LeaveOneGroupOut()
+    for _, test in cv.split(X, y, groups):
+        metadata_test = metadata.loc[test]
+        sessions = metadata_test.session.values
+        for sess in np.unique(sessions):
+            ix = sessions == sess
+            yield X[test[ix]]
+
+
+# Split done for the Cross Subject evaluation
+def pseudo_split_cross_subject():
+    X, y, metadata = paradigm.get_data(dataset=dataset)
+    groups = metadata.subject.values
+    cv = LeaveOneGroupOut()
+    for _, test in cv.split(X, y, groups):
+        metadata_test = metadata.loc[test]
+        sessions = metadata_test.session.values
+
+        for sess in np.unique(sessions):
+            ix = sessions == sess
+            X_sess, metadata_sess = X[test[ix]], metadata_test.loc[test[ix]].reset_index(
+                drop=True
+            )
+
+            runs_in_session = metadata_sess.run.values
+            # yield just calibration part
+            yield X_sess[runs_in_session == runs_in_session[0]]
+
+
+@pytest.mark.parametrize("split", [CrossSubjectSplitter, CrossSessionSplitter])
+def test_offline(split):
+    X, y, metadata = paradigm.get_data(dataset=dataset)
+
+    run = True if isinstance(split, CrossSessionSplitter) else False
+
+    if isinstance(split, CrossSessionSplitter):
+        eval_split = eval_split_cross_session
+    else:
+        eval_split = eval_split_cross_subject
+
+    split = split()
+    metasplit = OfflineSplit(run=run)
+
+    Tests = []
+    for _, test in split.split(X, y, metadata):
+        X_test, y_test, metadata_test = X[test], y[test], metadata.loc[test]
+        for i, (test_index) in enumerate(metasplit.split(X_test, y_test, metadata_test)):
+            Tests.append(X[test_index])
+
+    for ix, X_test_t in enumerate(eval_split()):
+        # Check if the output is the same as the input
+        assert np.array_equal(Tests[ix], X_test_t)
+
+
+@pytest.mark.parametrize("split", [CrossSubjectSplitter, CrossSessionSplitter])
+def test_pseudoonline(split):
+    X, y, metadata = paradigm.get_data(dataset=dataset)
+
+    if isinstance(split, CrossSessionSplitter):
+        eval_split = pseudo_split_cross_session
+    else:
+        eval_split = pseudo_split_cross_subject
+
+    split = split()
+    metasplit = PseudoOnlineSplit()
+
+    Tests = []
+    for _, test in split.split(X, y, metadata):
+        X_test, y_test, metadata_test = X[test], y[test], metadata.loc[test]
+        for i, (_, calib_index) in enumerate(
+            metasplit.split(X_test, y_test, metadata_test)
+        ):
+            Tests.append(X[calib_index])
+
+    for ix, X_calib_t in enumerate(eval_split()):
+        # Check if the output is the same as the input
+        assert np.array_equal(Tests[ix], X_calib_t)
+
+
+@pytest.mark.skip(reason="Still working on that")
+def test_sampler(data_eval):
+    X, y, metadata = paradigm.get_data(dataset=dataset)
+    data_size = dict(policy="per_class", value=np.array([5, 10, 30, 50]))
+
+    split = SamplerSplit(data_eval=data_eval, data_size=data_size)
+
+    for ix, ((X_train_t, X_test_t), (train, test)) in enumerate(
+        zip(eval_split_cross_subject(), split.split(X, y, metadata))
+    ):
+        X_train, X_test = X[train], X[test]
+
+        # Check if the output is the same as the input
+        assert np.array_equal(X_train, X_train_t)
+        assert np.array_equal(X_test, X_test_t)
diff --git a/moabb/tests/splits.py b/moabb/tests/splits.py
new file mode 100644
index 000000000..02b326c9d
--- /dev/null
+++ b/moabb/tests/splits.py
@@ -0,0 +1,91 @@
+import numpy as np
+from sklearn.model_selection import LeaveOneGroupOut, StratifiedKFold
+
+from moabb.datasets.fake import FakeDataset
+from moabb.evaluations.splitters import (
+    CrossSessionSplitter,
+    CrossSubjectSplitter,
+    WithinSessionSplitter,
+)
+from moabb.paradigms.motor_imagery import FakeImageryParadigm
+
+
+dataset = FakeDataset(["left_hand", "right_hand"], n_subjects=3, seed=12)
+paradigm = FakeImageryParadigm()
+
+
+# Split done for the Within Session evaluation
+def eval_split_within_session():
+    for subject in dataset.subject_list:
+        X, y, metadata = paradigm.get_data(dataset=dataset, subjects=[subject])
+        sessions = metadata.session
+        for session in np.unique(sessions):
+            ix = sessions == session
+            cv = StratifiedKFold(5, shuffle=True, random_state=42)
+            X_, y_ = X[ix], y[ix]
+            for train, test in cv.split(X_, y_):
+                yield X_[train], X_[test]
+
+
+# Split done for the Cross Session evaluation
+def eval_split_cross_session():
+    for subject in dataset.subject_list:
+        X, y, metadata = paradigm.get_data(dataset=dataset, subjects=[subject])
+        groups = metadata.session.values
+        cv = LeaveOneGroupOut()
+        for train, test in cv.split(X, y, groups):
+            yield X[train], X[test]
+
+
+# Split done for the Cross Subject evaluation
+def eval_split_cross_subject():
+    X, y, metadata = paradigm.get_data(dataset=dataset)
+    groups = metadata.subject.values
+    cv = LeaveOneGroupOut()
+    for train, test in cv.split(X, y, groups):
+        yield X[train], X[test]
+
+
+def test_within_session():
+    X, y, metadata = paradigm.get_data(dataset=dataset)
+
+    split = WithinSessionSplitter(n_folds=5)
+
+    for ix, ((X_train_t, X_test_t), (train, test)) in enumerate(
+        zip(eval_split_within_session(), split.split(X, y, metadata, random_state=42))
+    ):
+        X_train, X_test = X[train], X[test]
+
+        # Check if the output is the same as the input
+        assert np.array_equal(X_train, X_train_t)
+        assert np.array_equal(X_test, X_test_t)
+
+
+def test_cross_session():
+    X, y, metadata = paradigm.get_data(dataset=dataset)
+
+    split = CrossSessionSplitter()
+
+    for ix, ((X_train_t, X_test_t), (train, test)) in enumerate(
+        zip(eval_split_cross_session(), split.split(X, y, metadata))
+    ):
+        X_train, X_test = X[train], X[test]
+
+        # Check if the output is the same as the input
+        assert np.array_equal(X_train, X_train_t)
+        assert np.array_equal(X_test, X_test_t)
+
+
+def test_cross_subject():
+    X, y, metadata = paradigm.get_data(dataset=dataset)
+
+    split = CrossSubjectSplitter()
+
+    for ix, ((X_train_t, X_test_t), (train, test)) in enumerate(
+        zip(eval_split_cross_subject(), split.split(X, y, metadata))
+    ):
+        X_train, X_test = X[train], X[test]
+
+        # Check if the output is the same as the input
+        assert np.array_equal(X_train, X_train_t)
+        assert np.array_equal(X_test, X_test_t)