Skip to content

Commit

Permalink
added readmes
Browse files Browse the repository at this point in the history
  • Loading branch information
mikhmed-nabiev committed Apr 16, 2024
1 parent 8619828 commit 07f79ca
Show file tree
Hide file tree
Showing 23 changed files with 1,379 additions and 33 deletions.
10 changes: 10 additions & 0 deletions baseline_tensorflow/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Запус эксперимента

Загрузить данные из репозитория https://github.com/exporl/auditory-eeg-dataset/tree/master.

В файле `config.json` меняем `--absolute path to dataset folder--` на абсолютный путь к датасету.

Важно согласно репозиторию датасета предварительно разделить данные на тренировочные, валидационные и тестовые.

Запуска эксперимента: `python3 baseline_experiment.py`
Создание графиков: `plot_results.py`
173 changes: 173 additions & 0 deletions baseline_tensorflow/baseline_experiment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
"""Example experiment for the 2 mismatched segments dilation model."""
import glob
import json
import logging
import os, sys
import tensorflow as tf
import keras

import sys

# add base path to sys
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
from baseline_model import dilation_model

from dataset_generator import DataGenerator, batch_equalizer_fn, create_tf_dataset


def evaluate_model(model, test_dict):
"""Evaluate a model.
Parameters
----------
model: tf.keras.Model
Model to evaluate.
test_dict: dict
Mapping between a subject and a tf.data.Dataset containing the test
set for the subject.
Returns
-------
dict
Mapping between a subject and the loss/evaluation score on the test set
"""
evaluation = {}
for subject, ds_test in test_dict.items():
logging.info(f"Scores for subject {subject}:")
results = model.evaluate(ds_test, verbose=2)
metrics = model.metrics_names
evaluation[subject] = dict(zip(metrics, results))
return evaluation


if __name__ == "__main__":
# Parameters
# Length of the decision window
window_length_s = 5
fs = 64

window_length = window_length_s * fs # 5 seconds
# Hop length between two consecutive decision windows
hop_length = 64

epochs = 2
patience = 5
batch_size = 16
only_evaluate = False
number_mismatch = 4

training_log_filename = "training_log_{}_{}.csv".format(number_mismatch, window_length_s)

# Get the path to the config file
experiments_folder = os.path.dirname(__file__)
task_folder = os.path.dirname(experiments_folder)
config_path = os.path.join(experiments_folder, 'config.json')

# Load the config
with open(config_path) as fp:
config = json.load(fp)

# Provide the path of the dataset
# which is split already to train, val, test
data_folder = os.path.join(config["dataset_folder"], config['derivatives_folder'], config["split_folder"])

# stimulus feature which will be used for training the model. Can be either 'envelope' ( dimension 1) or 'mel' (dimension 28)
stimulus_features = ["envelope"]
stimulus_dimension = 1

# uncomment if you want to train with the mel spectrogram stimulus representation
# stimulus_features = ["mel"]
# stimulus_dimension = 10

features = ["eeg"] + stimulus_features

# Create a directory to store (intermediate) results
results_folder = os.path.join(experiments_folder,
"results_dilated_convolutional_model_{}_MM_{}_s_{}".format(number_mismatch,
window_length_s,
stimulus_features[0]))
os.makedirs(results_folder, exist_ok=True)

# create dilation model
model = dilation_model(time_window=window_length, eeg_input_dimension=64, env_input_dimension=stimulus_dimension,
num_mismatched_segments=number_mismatch)

model_path = os.path.join(results_folder,
"model_{}_MM_{}_s_{}.keras".format(number_mismatch, window_length_s,
stimulus_features[0]))

if only_evaluate:
model = tf.keras.models.load_model(model_path)

else:

train_files = [x for x in glob.glob(os.path.join(data_folder, "train_-_*")) if
os.path.basename(x).split("_-_")[-1].split(".")[0] in features]
print(features)
# Create list of numpy array files
train_generator = DataGenerator(train_files, window_length)
import pdb

dataset_train = create_tf_dataset(train_generator, window_length, batch_equalizer_fn,
hop_length, batch_size,
number_mismatch=number_mismatch,
data_types=(tf.float32, tf.float32),
feature_dims=(64, stimulus_dimension))

# Create the generator for the validation set
val_files = [x for x in glob.glob(os.path.join(data_folder, "val_-_*")) if
os.path.basename(x).split("_-_")[-1].split(".")[0] in features]
val_generator = DataGenerator(val_files, window_length)
dataset_val = create_tf_dataset(val_generator, window_length, batch_equalizer_fn,
hop_length, batch_size,
number_mismatch=number_mismatch,
data_types=(tf.float32, tf.float32),
feature_dims=(64, stimulus_dimension))

# Train the model
model.fit(
dataset_train,
epochs=epochs,
validation_data=dataset_val,
callbacks=[
tf.keras.callbacks.ModelCheckpoint(model_path, save_best_only=True),
tf.keras.callbacks.CSVLogger(os.path.join(results_folder, training_log_filename)),
tf.keras.callbacks.EarlyStopping(patience=patience, restore_best_weights=True),
],
)

test_window_lengths = [2, 4]
number_mismatch_test = [2, 3, 4, 8]
for number_mismatch in number_mismatch_test:
for window_length_s in test_window_lengths:
window_length = window_length_s * fs
results_filename = 'eval_{}_{}_s.json'.format(number_mismatch, window_length_s)

model = dilation_model(time_window=window_length, eeg_input_dimension=64,
env_input_dimension=stimulus_dimension, num_mismatched_segments=number_mismatch)

model.load_weights(model_path)
# Evaluate the model on test set
# Create a dataset generator for each test subject
test_files = [x for x in glob.glob(os.path.join(data_folder, "test_-_*")) if
os.path.basename(x).split("_-_")[-1].split(".")[0] in features]
# Get all different subjects from the test set
subjects = list(set([os.path.basename(x).split("_-_")[1] for x in test_files]))
datasets_test = {}
# Create a generator for each subject
for sub in subjects:
files_test_sub = [f for f in test_files if sub in os.path.basename(f)]
test_generator = DataGenerator(files_test_sub, window_length)
datasets_test[sub] = create_tf_dataset(test_generator, window_length, batch_equalizer_fn,
hop_length, batch_size=1,
number_mismatch=number_mismatch,
data_types=(tf.float32, tf.float32),
feature_dims=(64, stimulus_dimension))

evaluation = evaluate_model(model, datasets_test)

# We can save our results in a json encoded file
results_path = os.path.join(results_folder, results_filename)
with open(results_path, "w") as fp:
json.dump(evaluation, fp)
logging.info(f"Results saved at {results_path}")
130 changes: 130 additions & 0 deletions baseline_tensorflow/baseline_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import tensorflow as tf

def dilation_model(
time_window=None,
eeg_input_dimension=64,
env_input_dimension=1,
layers=3,
kernel_size=3,
spatial_filters=8,
dilation_filters=16,
activation="relu",
compile=True,
num_mismatched_segments=2
):
"""Convolutional dilation model.
Code was taken and adapted from
https://github.com/exporl/eeg-matching-eusipco2020
Parameters
----------
time_window : int or None
Segment length. If None, the model will accept every time window input
length.
eeg_input_dimension : int
number of channels of the EEG
env_input_dimension : int
dimemsion of the stimulus representation.
if stimulus == envelope, env_input_dimension =1
if stimulus == mel, env_input_dimension =28
layers : int
Depth of the network/Number of layers
kernel_size : int
Size of the kernel for the dilation convolutions
spatial_filters : int
Number of parallel filters to use in the spatial layer
dilation_filters : int
Number of parallel filters to use in the dilation layers
activation : str or list or tuple
Name of the non-linearity to apply after the dilation layers
or list/tuple of different non-linearities
compile : bool
If model should be compiled
inputs : tuple
Alternative inputs
Returns
-------
tf.Model
The dilation model
References
----------
Accou, B., Jalilpour Monesi, M., Montoya, J., Van hamme, H. & Francart, T.
Modeling the relationship between acoustic stimulus and EEG with a dilated
convolutional neural network. In 2020 28th European Signal Processing
Conference (EUSIPCO), 1175–1179, DOI: 10.23919/Eusipco47968.2020.9287417
(2021). ISSN: 2076-1465.
Accou, B., Monesi, M. J., hamme, H. V. & Francart, T.
Predicting speech intelligibility from EEG in a non-linear classification
paradigm. J. Neural Eng. 18, 066008, DOI: 10.1088/1741-2552/ac33e9 (2021).
Publisher: IOP Publishing
"""

eeg = tf.keras.layers.Input(shape=[time_window, eeg_input_dimension])
stimuli_input = [tf.keras.layers.Input(shape=[time_window, env_input_dimension]) for _ in range(num_mismatched_segments+1)]

all_inputs = [eeg]
all_inputs.extend(stimuli_input)


stimuli_proj = [x for x in stimuli_input]

# Activations to apply
if isinstance(activation, str):
activations = [activation] * layers
else:
activations = activation


# Spatial convolution
eeg_proj_1 = tf.keras.layers.Conv1D(spatial_filters, kernel_size=1)(eeg)

# Construct dilation layers
for layer_index in range(layers):
# dilation on EEG
eeg_proj_1 = tf.keras.layers.Conv1D(
dilation_filters,
kernel_size=kernel_size,
dilation_rate=kernel_size ** layer_index,
strides=1,
activation=activations[layer_index],
)(eeg_proj_1)

# Dilation on envelope data, share weights
env_proj_layer = tf.keras.layers.Conv1D(
dilation_filters,
kernel_size=kernel_size,
dilation_rate=kernel_size ** layer_index,
strides=1,
activation=activations[layer_index],
)

stimuli_proj = [env_proj_layer(stimulus_proj) for stimulus_proj in stimuli_proj]

# Comparison
cos = [tf.keras.layers.Dot(1, normalize=True)([eeg_proj_1, stimulus_proj]) for stimulus_proj in stimuli_proj]

linear_proj_sim = tf.keras.layers.Dense(1, activation="linear")

# Linear projection of similarity matrices
cos_proj = [linear_proj_sim(tf.keras.layers.Flatten()(cos_i)) for cos_i in cos]

# Classification
out = tf.keras.activations.softmax((tf.keras.layers.Concatenate()(cos_proj)))



model = tf.keras.Model(inputs=all_inputs, outputs=[out])

if compile:
model.compile(
optimizer=tf.keras.optimizers.Adam(),
metrics=["accuracy"],
loss=["categorical_crossentropy"],
)
print(model.summary())
return model
8 changes: 8 additions & 0 deletions baseline_tensorflow/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"dataset_folder": "/home/bukkacha/Desktop/EEGDataset",
"derivatives_folder": "derivatives",
"preprocessed_eeg_folder": "preprocessed_eeg",
"preprocessed_stimuli_folder": "preprocessed_stimuli",
"split_folder": "split_data",
"stimuli": "stimuli"
}
Loading

0 comments on commit 07f79ca

Please sign in to comment.