diff --git a/examples/plot_multivariate_fadin.py b/examples/plot_multivariate_fadin.py index e61620d..8e1f886 100644 --- a/examples/plot_multivariate_fadin.py +++ b/examples/plot_multivariate_fadin.py @@ -35,7 +35,8 @@ discretization = torch.linspace(0, kernel_length, L) ############################################################################### -# Here, we set the parameters of a Hawkes process with a Exponential(1) distributions. +# Here, we set the parameters of a Hawkes process with a Exponential(1) +# distribution. baseline = np.array([.1, .5]) alpha = np.array([[0.6, 0.3], [0.25, 0.7]]) @@ -50,13 +51,14 @@ ############################################################################### # Here, we apply FaDIn. -solver = FaDIn(n_dim=n_dim, - kernel="truncated_exponential", - kernel_length=kernel_length, - delta=dt, optim="RMSprop", - params_optim={'lr': 1e-3}, - max_iter=10000, criterion='l2' - ) +solver = FaDIn( + n_dim=n_dim, + kernel="truncated_exponential", + kernel_length=kernel_length, + delta=dt, optim="RMSprop", + params_optim={'lr': 1e-3}, + max_iter=10000 +) solver.fit(events, T) # We average on the 10 last values of the optimization. diff --git a/examples/plot_univariate_fadin.py b/examples/plot_univariate_fadin.py index 3a8faf1..9c47e61 100644 --- a/examples/plot_univariate_fadin.py +++ b/examples/plot_univariate_fadin.py @@ -56,7 +56,7 @@ kernel_length=kernel_length, delta=dt, optim="RMSprop", params_optim={'lr': 1e-3}, - max_iter=2000, criterion='l2' + max_iter=2000 ) solver.fit(events, T) diff --git a/experiments/benchmark/run_benchmark_exp.py b/experiments/benchmark/run_benchmark_exp.py index b5a6ecb..c3ab80d 100644 --- a/experiments/benchmark/run_benchmark_exp.py +++ b/experiments/benchmark/run_benchmark_exp.py @@ -25,8 +25,13 @@ def simulate_data(baseline, alpha, decay, T, dt, seed=0): L = int(1 / dt) discretization = torch.linspace(0, 1, L) n_dim = decay.shape[0] - EXP = DiscreteKernelFiniteSupport(dt, n_dim=n_dim, kernel='truncated_exponential', - lower=0, upper=1) + EXP = DiscreteKernelFiniteSupport( + dt, + n_dim=n_dim, + kernel='truncated_exponential', + lower=0, + upper=1 + ) kernel_values = EXP.kernel_eval([torch.Tensor(decay)], discretization) @@ -62,14 +67,16 @@ def simulate_data(baseline, alpha, decay, T, dt, seed=0): def run_fadin(events, decay_init, baseline_init, alpha_init, T, dt, seed=0): start = time.time() max_iter = 2000 + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(decay_init)] + } solver = FaDIn(2, "truncated_exponential", - [torch.tensor(decay_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, max_iter=max_iter, - optimize_kernel=True, precomputations=True, ztzG_approx=True, device='cpu', log=False ) @@ -258,14 +265,17 @@ def run_experiment(baseline, alpha, decay, T, dt, seed=0): alpha_hat = results['param_alpha'] decay_hat = results['param_kernel'][0] - RC = DiscreteKernelFiniteSupport(dt, n_dim=2, kernel='truncated_exponential', + RC = DiscreteKernelFiniteSupport(dt, n_dim=2, + kernel='truncated_exponential', lower=0, upper=1) intens_fadin = RC.intensity_eval(torch.tensor(baseline_hat), torch.tensor(alpha_hat), [torch.Tensor(decay_hat)], events_grid, torch.linspace(0, 1, L)) - res['err_fadin'] = np.absolute(intens.numpy() - intens_fadin.numpy()).mean() + res['err_fadin'] = np.absolute( + intens.numpy() - intens_fadin.numpy() + ).mean() res['time_fadin'] = results['time'] results = run_gibbs(S, size_grid, dt, seed=seed) diff --git a/experiments/benchmark/run_benchmark_rc.py b/experiments/benchmark/run_benchmark_rc.py index 633697d..fbc8f4d 100644 --- a/experiments/benchmark/run_benchmark_rc.py +++ b/experiments/benchmark/run_benchmark_rc.py @@ -63,15 +63,16 @@ def simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0): def run_fadin(events, u_init, sigma_init, baseline_init, alpha_init, T, dt, seed=0): start = time.time() max_iter = 2000 + init = { + 'kernel': [torch.tensor(u_init), torch.tensor(sigma_init)], + 'baseline': torch.tensor(baseline_init), + 'alpha': torch.tensor(alpha_init) + } solver = FaDIn(2, "raised_cosine", - [torch.tensor(u_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, max_iter=max_iter, - optimize_kernel=True, precomputations=True, ztzG_approx=True, device='cpu', log=False ) diff --git a/experiments/benchmark/run_benchmark_tg.py b/experiments/benchmark/run_benchmark_tg.py index 4e5c97e..24ffd2b 100644 --- a/experiments/benchmark/run_benchmark_tg.py +++ b/experiments/benchmark/run_benchmark_tg.py @@ -62,15 +62,16 @@ def simulate_data(baseline, alpha, m, sigma, T, dt, seed=0): def run_fadin(events, m_init, sigma_init, baseline_init, alpha_init, T, dt, seed=0): start = time.time() max_iter = 2000 + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(m_init), torch.tensor(sigma_init)] + } solver = FaDIn(2, "truncated_gaussian", - [torch.tensor(m_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, max_iter=max_iter, - optimize_kernel=True, precomputations=True, ztzG_approx=True, device='cpu', log=False ) diff --git a/experiments/benchmark/run_comparison_ll.py b/experiments/benchmark/run_comparison_ll.py index c3c893a..1c7a29d 100644 --- a/experiments/benchmark/run_comparison_ll.py +++ b/experiments/benchmark/run_comparison_ll.py @@ -11,7 +11,55 @@ from fadin.kernels import DiscreteKernelFiniteSupport from fadin.solver import FaDIn -# %% simulate data +# %% +################################ +# Define solver with loglikelihood criterion +################################ + + +def discrete_ll_loss_conv(intensity, events_grid, delta): + """Compute the LL discrete loss using convolutions. + + Parameters + ---------- + intensity : tensor, shape (n_dim, n_grid) + Values of the intensity function evaluated on the grid. + + events_grid : tensor, shape (n_dim, n_grid) + Events projected on the pre-defined grid. + + delta : float + Step size of the discretization grid. + """ + mask = events_grid > 0 + intens = torch.log(intensity[mask]) + return (intensity.sum(1) * delta - + intens.sum()).sum() / events_grid.sum() + + +def compute_gradient_loglikelihood(solver, events_grid, discretization, + i, n_events, end_time): + """One optimizer iteration of FaDIn_loglikelihood solver, + with loglikelihood loss. + """ + intens = solver.kernel_model.intensity_eval( + solver.params_intens[0], + solver.params_intens[1], + solver.params_intens[2:], + events_grid, + discretization + ) + loss = discrete_ll_loss_conv(intens, events_grid, solver.delta) + loss.backward() + + +class FaDInLogLikelihood(FaDIn): + """Define the FaDIn framework for estimated Hawkes processes *with + loglikelihood criterion instead of l2 loss*.""" + compute_gradient = staticmethod(compute_gradient_loglikelihood) + precomputations = False + +################################ # Simulated data ################################ @@ -56,23 +104,40 @@ def simulate_data(baseline, alpha, kernel_params, def run_solver(criterion, events, kernel_params_init, - baseline_init, alpha_init, T, dt, seed=0, kernel='raised_cosine'): - k_params_init = [torch.tensor(a) for a in kernel_params_init] + baseline_init, alpha_init, T, dt, seed=0, + kernel='raised_cosine'): max_iter = 2000 - solver = FaDIn(1, - kernel, - k_params_init, - torch.tensor(baseline_init), - torch.tensor(alpha_init), - delta=dt, - optim="RMSprop", - step_size=1e-3, - max_iter=max_iter, - log=False, - random_state=seed, - device="cpu", - optimize_kernel=True, - criterion=criterion) + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(a) for a in kernel_params_init] + } + if criterion == 'l2': + solver = FaDIn( + 1, + kernel, + init=init, + delta=dt, + optim="RMSprop", + step_size=1e-3, + max_iter=max_iter, + log=False, + random_state=seed, + device="cpu" + ) + elif criterion == 'll': + solver = FaDInLogLikelihood( + 1, + kernel, + init=init, + delta=dt, + optim="RMSprop", + step_size=1e-3, + max_iter=max_iter, + log=False, + random_state=seed, + device="cpu" + ) results = solver.fit(events, T) if kernel == 'truncated_exponential': results_ = dict(param_baseline=results['param_baseline'][-10:].mean().item(), diff --git a/experiments/benchmark/run_nonparam_exp.py b/experiments/benchmark/run_nonparam_exp.py index 6e001dd..d2d0210 100644 --- a/experiments/benchmark/run_nonparam_exp.py +++ b/experiments/benchmark/run_nonparam_exp.py @@ -61,11 +61,14 @@ def simulate_data(baseline, alpha, decay, T, dt, seed=0): # @mem.cache def run_solver(events, decay_init, baseline_init, alpha_init, T, dt, seed=0): max_iter = 800 + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(decay_init)] + } solver = FaDIn(1, "truncated_exponential", - [torch.tensor(decay_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, @@ -73,7 +76,7 @@ def run_solver(events, decay_init, baseline_init, alpha_init, T, dt, seed=0): log=False, random_state=seed, device="cpu", - optimize_kernel=True) + ) results = solver.fit(events, T) return results diff --git a/experiments/benchmark/run_nonparam_rc.py b/experiments/benchmark/run_nonparam_rc.py index bef6635..5fc6345 100644 --- a/experiments/benchmark/run_nonparam_rc.py +++ b/experiments/benchmark/run_nonparam_rc.py @@ -64,12 +64,14 @@ def simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0): # @mem.cache def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, T, dt, seed=0): max_iter = 800 + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(u_init), torch.tensor(sigma_init)] + } solver = FaDIn(1, "raised_cosine", - [torch.tensor(u_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, @@ -77,7 +79,7 @@ def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, T, dt, see log=False, random_state=seed, device="cpu", - optimize_kernel=True) + ) results = solver.fit(events, T) return results diff --git a/experiments/benchmark/run_nonparam_tg.py b/experiments/benchmark/run_nonparam_tg.py index 5147d3c..abeedb3 100644 --- a/experiments/benchmark/run_nonparam_tg.py +++ b/experiments/benchmark/run_nonparam_tg.py @@ -63,12 +63,14 @@ def simulate_data(baseline, alpha, m, sigma, T, dt, seed=0): # @mem.cache def run_solver(events, m_init, sigma_init, baseline_init, alpha_init, T, dt, seed=0): max_iter = 800 + init = { + 'alpha': torch.tensor(alpha_init), + 'baseline': torch.tensor(baseline_init), + 'kernel': [torch.tensor(m_init), torch.tensor(sigma_init)] + } solver = FaDIn(1, "truncated_gaussian", - [torch.tensor(m_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), + init=init, delta=dt, optim="RMSprop", step_size=1e-3, @@ -76,7 +78,7 @@ def run_solver(events, m_init, sigma_init, baseline_init, alpha_init, T, dt, see log=False, random_state=seed, device="cpu", - optimize_kernel=True) + ) results = solver.fit(events, T) return results diff --git a/experiments/example_multivariate.py b/experiments/example_multivariate.py deleted file mode 100644 index 989da2d..0000000 --- a/experiments/example_multivariate.py +++ /dev/null @@ -1,105 +0,0 @@ -# %% import stuff -# import libraries -import time -import numpy as np -import torch -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc - -from fadin.kernels import DiscreteKernelFiniteSupport -from fadin.solver import FaDIn -# from tick.hawkes import HawkesBasisKernels -################################ -# Meta parameters -################################ -dt = 0.01 -L = int(1 / dt) -T = 1_000_000 -size_grid = int(T / dt) + 1 - -# mem = Memory(location=".", verbose=2) - -# %% Experiment -################################ - - -# @mem.cache -def simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0): - L = int(1 / dt) - discretization = torch.linspace(0, 1, L) - u = mu - sigma - n_dim = u.shape[0] - RC = DiscreteKernelFiniteSupport(dt, n_dim=n_dim, kernel='raised_cosine', - lower=0, upper=1) - - kernel_values = RC.kernel_eval([torch.Tensor(u), torch.Tensor(sigma)], - discretization) - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k11 = kernel_values[0, 0].double().numpy() - k12 = kernel_values[0, 1].double().numpy() - k21 = kernel_values[1, 0].double().numpy() - k22 = kernel_values[1, 1].double().numpy() - - tf11 = HawkesKernelTimeFunc(t_values=t_values, y_values=k11) - tf12 = HawkesKernelTimeFunc(t_values=t_values, y_values=k12) - tf21 = HawkesKernelTimeFunc(t_values=t_values, y_values=k21) - tf22 = HawkesKernelTimeFunc(t_values=t_values, y_values=k22) - - kernels = [[tf11, tf12], [tf21, tf22]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps - return events - - -# @mem.cache -def run_solver(events, dt, T, - ztzG_approx, seed=0): - start = time.time() - max_iter = 2000 - solver = FaDIn(2, - "raised_cosine", - delta=dt, optim="RMSprop", max_iter=max_iter, - optimize_kernel=True, precomputations=True, - ztzG_approx=ztzG_approx, device='cpu', log=False, tol=10e-6 - ) - - print(time.time() - start) - solver.fit(events, T) - results_ = dict(param_baseline=solver.param_baseline[-10:].mean(0), - param_alpha=solver.param_alpha[-10:].mean(0), - param_kernel=[solver.param_kernel[0][-10:].mean(0), - solver.param_kernel[1][-10:].mean(0)]) - results_["time"] = time.time() - start - results_["seed"] = seed - results_["T"] = T - results_["dt"] = dt - return results_ - - -baseline = np.array([.1, .2]) -alpha = np.array([[1.5, 0.1], [0.1, 1.5]]) -mu = np.array([[0.4, 0.6], [0.55, 0.6]]) -sigma = np.array([[0.3, 0.3], [0.25, 0.3]]) -u = mu - sigma -events = simulate_data(baseline, alpha, mu, sigma, T, dt, seed=1) - -print("events of the first process: ", events[0].shape[0]) -print("events of the second process: ", events[1].shape[0]) - - -# %% -v = 0.2 -baseline_init = baseline + v -alpha_init = alpha + v -mu_init = mu -sigma_init = sigma + v -u_init = mu_init - sigma_init -ztzG_approx = True -results = run_solver(events, dt, T, ztzG_approx, seed=0) - -# %% diff --git a/experiments/example_univariate.py b/experiments/example_univariate.py deleted file mode 100644 index 3335b4c..0000000 --- a/experiments/example_univariate.py +++ /dev/null @@ -1,106 +0,0 @@ -# %% import stuff -# import libraries -import time -import numpy as np -import torch -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc - -from fadin.kernels import DiscreteKernelFiniteSupport -from fadin.solver import FaDIn - -################################ -# Meta parameters -################################ -dt = 0.01 -T = 1_000_000 -kernel_length = 1.5 -size_grid = int(T / dt) + 1 - -# mem = Memory(location=".", verbose=2) - -# %% Experiment -################################ - - -# @mem.cache -def simulate_data(baseline, alpha, a, b, kernel_length, T, dt, seed=0): - L = int(kernel_length / dt) - discretization = torch.linspace(0, kernel_length, L) - n_dim = a.shape[0] - kuma = DiscreteKernelFiniteSupport(dt, n_dim, kernel='kumaraswamy') - kernel_values = kuma.kernel_eval([torch.Tensor(a), torch.Tensor(b)], - discretization) - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k = kernel_values[0, 0].double().numpy() - - tf = HawkesKernelTimeFunc(t_values=t_values, y_values=k) - kernels = [[tf]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps - return events - -# %% solver - - -# @mem.cache -def run_solver(events, u_init, sigma_init, baseline_init, - alpha_init, kernel_length, dt, T, seed=0): - start = time.time() - max_iter = 10000 - solver = FaDIn(1, - "kumaraswamy", - [torch.tensor(u_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), - kernel_length=kernel_length, - delta=dt, optim="RMSprop", - max_iter=max_iter, criterion='l2' - ) - - print(time.time() - start) - solver.fit(events, T) - results_ = dict(param_baseline=solver.param_baseline[-10:].mean().item(), - param_alpha=solver.param_alpha[-10:].mean().item(), - param_kernel=[solver.param_kernel[0][-10:].mean().item(), - solver.param_kernel[1][-10:].mean().item()] - ) - - results_["time"] = time.time() - start - results_["seed"] = seed - results_["T"] = T - results_["dt"] = dt - return results_ - -# %% Test - - -baseline = np.array([.1]) -alpha = np.array([[0.8]]) -middle = kernel_length / 2 -a = np.array([[2.]]) -b = np.array([[2.]]) - - -events = simulate_data(baseline, alpha, a, b, kernel_length, T, dt, seed=0) - -v = 0.2 -baseline_init = baseline + v -alpha_init = alpha + v -a_init = np.array([[1.]]) -b_init = np.array([[1.]]) - -results = run_solver(events, a_init, b_init, - baseline_init, alpha_init, 1.5, - dt, T, seed=0) - -print(np.abs(results['param_baseline'] - baseline)) -print(np.abs(results['param_alpha'] - alpha)) -print(np.abs(results['param_kernel'][0] - a)) -print(np.abs(results['param_kernel'][1] - b)) diff --git a/experiments/inference_error/plot_comp_time_dim.py b/experiments/inference_error/plot_comp_time_dim.py deleted file mode 100644 index 669eecb..0000000 --- a/experiments/inference_error/plot_comp_time_dim.py +++ /dev/null @@ -1,82 +0,0 @@ -# %% get results -import numpy as np -import pickle -import matplotlib.pyplot as plt - - -FONTSIZE = 14 -plt.rcParams["figure.figsize"] = (5, 2.9) -plt.rcParams["axes.grid"] = True -plt.rcParams["axes.grid.axis"] = "y" -plt.rcParams["grid.linestyle"] = "--" -plt.rcParams["xtick.labelsize"] = FONTSIZE -plt.rcParams["ytick.labelsize"] = FONTSIZE -plt.rcParams["font.size"] = FONTSIZE -plt.rc("legend", fontsize=FONTSIZE - 1) - -# Load non-param results and FaDIn -file_name = "results/comp_time_dim.pkl" -open_file = open(file_name, "rb") -all_results = pickle.load(open_file) -open_file.close() - - -def get_results(results): - dim_list = results[-1]["n_dim_list"] - dt_list = results[-1]["dt_list"] - T_list = results[-1]["T_list"] - seeds = results[-1]["seeds"] - n_dim = len(dim_list) - n_dt = len(dt_list) - n_seeds = len(seeds) - n_T = len(T_list) - - our_results = np.zeros((n_dim, n_T, n_dt, n_seeds)) - tick_results = np.zeros((n_dim, n_T, n_dt, n_seeds)) - for i in range(n_dim): - for j in range(n_T): - for k in range(n_dt): - for m in range(n_seeds): - idx = ( - i * (n_T * n_dt * n_seeds) - + j * (n_dt * n_seeds) - + k * (n_seeds) - + m - ) - our_results[i, j, k, m] = all_results[idx][0]["comp_time"] - tick_results[i, j, k, m] = all_results[idx][1]["comp_time"] - return our_results, tick_results - - -comp_time_FaDIn, comp_time_tick = get_results(all_results) -# %% -n_dim_list = all_results[-1]['n_dim_list'][::2] -T_list = all_results[-1]['T_list'] -fig, ax = plt.subplots(1, 1) - -mk = ["s", "h"] -colors = ['C1', 'C0'] -ls = ['-.', '-'] -mksize = 8 -lw = 2 - -for i in range(len(T_list)): - ax.loglog(n_dim_list, comp_time_FaDIn.mean(3)[::2, i, 0], marker=mk[0], - markevery=1, linestyle=ls[i], markersize=mksize, c=colors[0]) - ax.fill_between(n_dim_list, - np.percentile(comp_time_FaDIn[::2, i, 0, :], 20, axis=1), - np.percentile(comp_time_FaDIn[::2, i, 0, :], 80, axis=1), - alpha=0.1, color=colors[0] - ) - ax.loglog(n_dim_list, comp_time_tick.mean(3)[::2, i, 0], marker=mk[1], - markevery=1, linestyle=ls[i], markersize=mksize, c=colors[1]) - ax.fill_between(n_dim_list, - np.percentile(comp_time_tick[::2, i, 0, :], 20, axis=1), - np.percentile(comp_time_tick[::2, i, 0, :], 80, axis=1), - alpha=0.1, color=colors[1]) - - -ax.set_xlim(2, 100) -ax.set_xlabel(r'$p$') -fig.tight_layout() -plt.savefig("plots/comparison/time_comparison_nonparam_dim.pdf", bbox_inches="tight") diff --git a/experiments/inference_error/plot_sensi_analysis_length.py b/experiments/inference_error/plot_sensi_analysis_length.py deleted file mode 100644 index 10ca49b..0000000 --- a/experiments/inference_error/plot_sensi_analysis_length.py +++ /dev/null @@ -1,78 +0,0 @@ -# %% -import numpy as np -import pandas as pd -import matplotlib -import matplotlib.pyplot as plt - -# from tueplots.bundles.iclr2023() -FONTSIZE = 14 -plt.rcParams["figure.figsize"] = (5, 3.2) -plt.rcParams["axes.grid"] = False -plt.rcParams["axes.grid.axis"] = "y" -plt.rcParams["grid.linestyle"] = "--" -plt.rcParams['xtick.labelsize'] = FONTSIZE -plt.rcParams['ytick.labelsize'] = FONTSIZE -plt.rcParams['font.size'] = FONTSIZE -plt.rcParams['mathtext.fontset'] = 'cm' -plt.rc('legend', fontsize=FONTSIZE - 1) - - -def plot_length_analysis(): - df = pd.read_csv('results/sensitivity_length.csv') - T = df["T"].unique() - T.sort() - - palette = [matplotlib.cm.viridis_r(x) for x in np.linspace(0, 1, 5)][1:] - - fig1 = plt.figure() - - df['estimates'] = 'EXP' - methods = [("EXP", "s-", None)] - - for m, ls, hatch in methods: - for i, t in enumerate(T): - this_df = df.query("T == @t and estimates == @m") - curve = this_df.groupby("W")["err_norm2"].quantile( - [0.25, 0.5, 0.75]).unstack() - plt.loglog( - curve.index, curve[0.5], ls, lw=4, c=palette[i], - markersize=10, markevery=3 - ) - plt.fill_between( - curve.index, curve[0.25], curve[0.75], alpha=0.2, - color=palette[i], hatch=hatch - ) - plt.xlim(1e-0, 1e2) - - fig1.tight_layout() - plt.xlabel(r'$W$') - plt.ylabel(r'$\ell_2$ error', fontdict={'color': 'k'}) - plt.savefig("plots/approx/sensi_w_exp_stat.pdf", bbox_inches='tight') - - fig2 = plt.figure() - - for m, ls, hatch in methods: - for i, t in enumerate(T): - this_df = df.query("T == @t and estimates == @m") - curve = this_df.groupby("W")["time"].quantile( - [0.25, 0.5, 0.75]).unstack() - plt.loglog( - curve.index, curve[0.5], ls, lw=4, c=palette[i], - markersize=10, markevery=3 - ) - plt.fill_between( - curve.index, curve[0.25], curve[0.75], alpha=0.2, - color=palette[i], hatch=hatch - ) - plt.xlim(1e-0, 1e2) - - fig2.tight_layout() - plt.xlabel(r'$W$') - plt.ylabel('Time (s.)', fontdict={'color': 'k'}) - plt.savefig("plots/approx/sensi_w_exp_time.pdf", bbox_inches='tight') - - -plt.close('all') - - -plot_length_analysis() diff --git a/experiments/inference_error/run_comp_autodiff.py b/experiments/inference_error/run_comp_autodiff.py deleted file mode 100644 index b8de2fd..0000000 --- a/experiments/inference_error/run_comp_autodiff.py +++ /dev/null @@ -1,139 +0,0 @@ -# %% import stuff -# import libraries -import itertools -import pickle -import time -import numpy as np -import torch -from joblib import Memory, Parallel, delayed - -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc - -from fadin.kernels import DiscreteKernelFiniteSupport -from fadin.solver import FaDIn - -################################ -# Meta parameters -################################ -dt = 0.1 -T = 1000 -size_grid = int(T / dt) + 1 - -mem = Memory(location=".", verbose=2) - -# %% simulate data -# Simulated data -################################ - -baseline = np.array([1.1]) -alpha = np.array([[0.8]]) -mu = np.array([[0.5]]) -sigma = np.array([[0.3]]) -u = mu - sigma - - -@mem.cache -def simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0): - L = int(1 / dt) - discretization = torch.linspace(0, 1, L) - u = mu - sigma - RC = DiscreteKernelFiniteSupport(dt, 1, kernel='raised_cosine') - kernel_values = RC.kernel_eval([torch.Tensor(u), torch.Tensor(sigma)], - discretization) # * dt - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k = kernel_values[0, 0].double().numpy() - - tf = HawkesKernelTimeFunc(t_values=t_values, y_values=k) - kernels = [[tf]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps - return events - - -@mem.cache -def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, T, dt, seed=0): - - max_iter = 800 - solver_autodiff = FaDIn(1, - "raised_cosine", - [torch.tensor(u_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), - delta=dt, optim="RMSprop", - step_size=1e-3, max_iter=max_iter, - precomputations=False, random_state=0) - start_autodiff = time.time() - solver_autodiff.fit(events, T) - time_autodiff = time.time() - start_autodiff - - solver_FaDIn = FaDIn(1, - "raised_cosine", - [torch.tensor(u_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), - delta=dt, optim="RMSprop", - step_size=1e-3, max_iter=max_iter, - precomputations=True, random_state=0) - start_FaDIn = time.time() - solver_FaDIn.fit(events, T) - time_FaDIn = time.time() - start_FaDIn - - results = dict(time_autodiff=time_autodiff, time_FaDIn=time_FaDIn) - - results["seed"] = seed - results["T"] = T - results["dt"] = dt - - return results - - -# %% example -v = 0.2 -events = simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0) -baseline_init = baseline + v -alpha_init = alpha + v -mu_init = mu + v -sigma_init = sigma - v -u_init = mu_init - sigma_init -# run_solver(events, u_init, sigma_init, baseline_init, alpha_init, dt, T, seed=0) -# %% eval on grid - - -def run_experiment(baseline, alpha, mu, sigma, T, dt, seed=0): - v = 0.2 - events = simulate_data(baseline, alpha, mu, sigma, T, dt, seed=seed) - baseline_init = baseline + v - alpha_init = alpha + v - mu_init = mu + v - sigma_init = sigma - v - u_init = mu_init - sigma_init - results = run_solver(events, u_init, sigma_init, baseline_init, alpha_init, - T, dt, seed) - return results - - -T_list = [100000, 1000000] -dt_list = [0.1, 0.01] -seeds = np.arange(10) -info = dict(T_list=T_list, dt_list=dt_list, seeds=seeds) - -n_jobs = 1 -all_results = Parallel(n_jobs=n_jobs, verbose=10)( - delayed(run_experiment)(baseline, alpha, mu, sigma, T, dt, seed=seed) - for T, dt, seed in itertools.product( - T_list, dt_list, seeds - ) -) -all_results.append(info) -file_name = "results/comp_autodiff.pkl" -open_file = open(file_name, "wb") -pickle.dump(all_results, open_file) -open_file.close() diff --git a/experiments/inference_error/run_error_discrete_EXP.py b/experiments/inference_error/run_error_discrete_EXP.py index d75f882..d99a361 100644 --- a/experiments/inference_error/run_error_discrete_EXP.py +++ b/experiments/inference_error/run_error_discrete_EXP.py @@ -3,14 +3,11 @@ import itertools import time import numpy as np -import torch import pandas as pd from joblib import Parallel, delayed -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc -from fadin.kernels import DiscreteKernelFiniteSupport from fadin.solver import FaDIn - +from fadin.utils.utils_simu import simu_hawkes_cluster ################################ # Meta parameters @@ -32,54 +29,37 @@ # @mem.cache -def simulate_data(baseline, alpha, decay, T, dt, seed=0): - L = int(1 / dt) - discretization = torch.linspace(0, 1, L) - Exp = DiscreteKernelFiniteSupport(dt, 1, kernel='truncated_exponential') - kernel_values = Exp.kernel_eval([torch.Tensor(decay)], discretization) - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k = kernel_values[0, 0].double().numpy() - - tf = HawkesKernelTimeFunc(t_values=t_values, y_values=k) - kernels = [[tf]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps +def simulate_data(baseline, alpha, decay, T, seed=0): + kernel = 'expon' + events = simu_hawkes_cluster(T, baseline, alpha, kernel, + params_kernel={'scale': 1 / decay}, + random_state=seed) return events -events = simulate_data(baseline, alpha, decay, T, dt, seed=0) +events = simulate_data(baseline, alpha, decay, T, seed=0) # @mem.cache -def run_solver(events, decay_init, baseline_init, alpha_init, T, dt, seed=0): +def run_solver(events, T, dt, seed=0): start = time.time() max_iter = 2000 - solver = FaDIn(2, + + solver = FaDIn(1, "truncated_exponential", - [torch.tensor(decay_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), delta=dt, optim="RMSprop", - step_size=1e-3, max_iter=max_iter, kernel_length=10, log=False, - random_state=0, - device="cpu", - optimize_kernel=True + random_state=0 ) + print(time.time() - start) - results = solver.fit(events, T) - results_ = dict(param_baseline=results['param_baseline'][-10:].mean().item(), - param_alpha=results['param_alpha'][-10:].mean().item(), - param_kernel=[results['param_kernel'][0][-10:].mean().item()]) + solver.fit(events, T) + results_ = dict(param_baseline=solver.param_baseline[-10:].mean().item(), + param_alpha=solver.param_alpha[-10:].mean().item(), + param_kernel=[solver.param_kernel[0][-10:].mean().item()]) results_["time"] = time.time() - start results_["seed"] = seed results_["T"] = T @@ -91,13 +71,8 @@ def run_solver(events, decay_init, baseline_init, alpha_init, T, dt, seed=0): def run_experiment(baseline, alpha, decay, T, dt, seed=0): - v = 0.2 - events = simulate_data(baseline, alpha, decay, T, dt, seed=seed) - baseline_init = baseline + v - alpha_init = alpha + v - decay_init = decay + v - - results = run_solver(events, decay_init, baseline_init, alpha_init, T, dt, seed) + events = simulate_data(baseline, alpha, decay, T, seed=seed) + results = run_solver(events, T, dt, seed) return results @@ -134,3 +109,5 @@ def compute_norm2_error(s): # df['param_sigma'] = df['param_kernel'].apply(lambda x: x[1]) # , 'sigma': 0.3} + +# %% diff --git a/experiments/inference_error/run_error_discrete_EXP_m.py b/experiments/inference_error/run_error_discrete_EXP_m.py index c6c3fa4..5361aad 100644 --- a/experiments/inference_error/run_error_discrete_EXP_m.py +++ b/experiments/inference_error/run_error_discrete_EXP_m.py @@ -4,14 +4,11 @@ import pandas as pd import time import numpy as np -import torch from joblib import Parallel, delayed -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc -from fadin.kernels import DiscreteKernelFiniteSupport from fadin.solver import FaDIn from fadin.utils.utils import l2_error - +from fadin.utils.utils_simu import simu_hawkes_cluster # mem = Memory(location=".", verbose=2) @@ -26,67 +23,33 @@ # @mem.cache -def simulate_data(baseline, alpha, decay, T, dt, seed=0): - L = int(1 / dt) - discretization = torch.linspace(0, 1, L) - n_dim = decay.shape[0] - EXP = DiscreteKernelFiniteSupport(dt, n_dim, kernel='truncated_exponential') - - kernel_values = EXP.kernel_eval([torch.Tensor(decay)], - discretization) - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k11 = kernel_values[0, 0].double().numpy() - k12 = kernel_values[0, 1].double().numpy() - k21 = kernel_values[1, 0].double().numpy() - k22 = kernel_values[1, 1].double().numpy() - - tf11 = HawkesKernelTimeFunc(t_values=t_values, y_values=k11) - tf12 = HawkesKernelTimeFunc(t_values=t_values, y_values=k12) - tf21 = HawkesKernelTimeFunc(t_values=t_values, y_values=k21) - tf22 = HawkesKernelTimeFunc(t_values=t_values, y_values=k22) - - kernels = [[tf11, tf12], [tf21, tf22]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps +def simulate_data(baseline, alpha, decay, T, seed=0): + kernel = 'expon' + events = simu_hawkes_cluster(T, baseline, alpha, kernel, + params_kernel={'scale': 1 / decay}, + random_state=seed) return events - # %% solver -## - # %% solver -# @mem.cache -def run_solver(events, decay_init, baseline_init, alpha_init, dt, T, seed=0): +def run_solver(events, dt, T, seed=0): start = time.time() max_iter = 2000 solver = FaDIn(2, "truncated_exponential", - [torch.tensor(decay_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), delta=dt, optim="RMSprop", - step_size=1e-3, max_iter=max_iter, log=False, random_state=0, - device="cpu", - optimize_kernel=True, - precomputations=True, ztzG_approx=True) print(time.time() - start) - results = solver.fit(events, T) - results_ = dict(param_baseline=results['param_baseline'][-10:].mean(0), - param_alpha=results['param_alpha'][-10:].mean(0), - param_kernel=[results['param_kernel'][0][-10:].mean(0)]) + solver.fit(events, T) + results_ = dict(param_baseline=solver.param_baseline[-10:].mean(0), + param_alpha=solver.param_alpha[-10:].mean(0), + param_kernel=[solver.param_kernel[0][-10:].mean(0)]) results_["time"] = time.time() - start results_["seed"] = seed results_["T"] = T @@ -97,15 +60,9 @@ def run_solver(events, decay_init, baseline_init, alpha_init, dt, T, seed=0): def run_experiment(baseline, alpha, decay, T, dt, seed=0): - v = 0.2 - events = simulate_data(baseline, alpha, decay, T, dt, seed=seed) - baseline_init = baseline + v - alpha_init = alpha + v - decay_init = decay + v - - results = run_solver(events, decay_init, - baseline_init, alpha_init, - dt, T, seed) + events = simulate_data(baseline, alpha, decay, T, seed=seed) + results = run_solver(events, dt, T, seed) + return results diff --git a/experiments/inference_error/run_error_discrete_RC.py b/experiments/inference_error/run_error_discrete_RC.py index f1d6a3e..78977a8 100644 --- a/experiments/inference_error/run_error_discrete_RC.py +++ b/experiments/inference_error/run_error_discrete_RC.py @@ -6,10 +6,10 @@ import numpy as np import torch from joblib import Parallel, delayed, Memory -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc from fadin.kernels import DiscreteKernelFiniteSupport from fadin.solver import FaDIn +from fadin.utils.utils_simu import simu_hawkes_cluster ################################ # Meta parameters @@ -31,57 +31,50 @@ u = mu - sigma -@mem.cache def simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0): - L = int(1 / dt) - discretization = torch.linspace(0, 1, L) u = mu - sigma - RC = DiscreteKernelFiniteSupport(dt, 1, kernel='raised_cosine') - kernel_values = RC.kernel_eval([torch.Tensor(u), torch.Tensor(sigma)], - discretization) - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k = kernel_values[0, 0].double().numpy() - - tf = HawkesKernelTimeFunc(t_values=t_values, y_values=k) - kernels = [[tf]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps + params = {'u': u, 'sigma': sigma} + + def raised_cosine(x, **params): + rc = DiscreteKernelFiniteSupport(delta=dt, n_dim=1, + kernel='raised_cosine') + u = params['u'] + sigma = params['sigma'] + kernel_values = rc.kernel_eval([torch.Tensor(u), torch.Tensor(sigma)], + torch.tensor(x)) + + return kernel_values.double().numpy() + + events = simu_hawkes_cluster(T, baseline, alpha, + raised_cosine, + params_kernel=params, + random_state=seed) return events + +events = simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0) # %% solver ## -@mem.cache -def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, dt, T, seed=0): +def run_solver(events, dt, T, seed=0): start = time.time() max_iter = 2000 - solver = FaDIn(2, + + solver = FaDIn(1, "raised_cosine", - [torch.tensor(u_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), delta=dt, optim="RMSprop", - step_size=1e-3, max_iter=max_iter, log=False, - random_state=0, - device="cpu", - optimize_kernel=True) + random_state=0) + print(time.time() - start) - results = solver.fit(events, T) - results_ = dict(param_baseline=results['param_baseline'][-10:].mean().item(), - param_alpha=results['param_alpha'][-10:].mean().item(), - param_kernel=[results['param_kernel'][0][-10:].mean().item(), - results['param_kernel'][1][-10:].mean().item()]) + solver.fit(events, T) + results_ = dict(param_baseline=solver.param_baseline[-10:].mean().item(), + param_alpha=solver.param_alpha[-10:].mean().item(), + param_kernel=[solver.param_kernel[0][-10:].mean().item(), + solver.param_kernel[1][-10:].mean().item()]) results_["time"] = time.time() - start results_["seed"] = seed results_["T"] = T @@ -92,16 +85,9 @@ def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, dt, T, see # %% eval on grid ## def run_experiment(baseline, alpha, mu, sigma, T, dt, seed=0): - v = 0.2 events = simulate_data(baseline, alpha, mu, sigma, T, dt, seed=seed) - baseline_init = baseline + v - alpha_init = alpha + v - mu_init = mu + v - sigma_init = sigma - v - u_init = mu_init - sigma_init - results = run_solver(events, u_init, sigma_init, - baseline_init, alpha_init, - dt, T, seed) + results = run_solver(events, dt, T, seed) + return results diff --git a/experiments/inference_error/run_error_discrete_RC_m.py b/experiments/inference_error/run_error_discrete_RC_m.py index c0159d1..7f427a1 100644 --- a/experiments/inference_error/run_error_discrete_RC_m.py +++ b/experiments/inference_error/run_error_discrete_RC_m.py @@ -6,12 +6,11 @@ import numpy as np import torch from joblib import Parallel, delayed, Memory -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc from fadin.kernels import DiscreteKernelFiniteSupport from fadin.solver import FaDIn from fadin.utils.utils import l2_error - +from fadin.utils.utils_simu import simu_hawkes_cluster mem = Memory(location=".", verbose=2) @@ -20,74 +19,61 @@ ################################ baseline = np.array([.1, .2]) -alpha = np.array([[1.5, 0.1], [0.1, 1.5]]) +alpha = np.array([[0.75, 0.1], [0.1, 0.75]]) mu = np.array([[0.4, 0.6], [0.55, 0.6]]) sigma = np.array([[0.3, 0.3], [0.25, 0.3]]) u = mu - sigma +dt = 0.01 +T = 1000 +size_grid = int(T / dt) + 1 + -@mem.cache def simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0): - L = int(1 / dt) - discretization = torch.linspace(0, 1, L) u = mu - sigma - n_dim = u.shape[0] - RC = DiscreteKernelFiniteSupport(dt, n_dim, kernel='raised_cosine') - - kernel_values = RC.kernel_eval([torch.Tensor(u), torch.Tensor(sigma)], - discretization) - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k11 = kernel_values[0, 0].double().numpy() - k12 = kernel_values[0, 1].double().numpy() - k21 = kernel_values[1, 0].double().numpy() - k22 = kernel_values[1, 1].double().numpy() - - tf11 = HawkesKernelTimeFunc(t_values=t_values, y_values=k11) - tf12 = HawkesKernelTimeFunc(t_values=t_values, y_values=k12) - tf21 = HawkesKernelTimeFunc(t_values=t_values, y_values=k21) - tf22 = HawkesKernelTimeFunc(t_values=t_values, y_values=k22) - - kernels = [[tf11, tf12], [tf21, tf22]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps + params = {'u': u, 'sigma': sigma} + + def raised_cosine(x, **params): + rc = DiscreteKernelFiniteSupport(delta=dt, n_dim=2, + kernel='raised_cosine') + u = params['u'] + sigma = params['sigma'] + kernel_values = rc.kernel_eval([torch.Tensor(u), torch.Tensor(sigma)], + torch.tensor(x)) + + return kernel_values.double().numpy() + + events = simu_hawkes_cluster(T, baseline, alpha, + raised_cosine, + params_kernel=params, + random_state=seed) return events +events = simulate_data(baseline, alpha, mu, sigma, T, dt, seed=0) + # %% solver ## -@mem.cache -def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, dt, T, seed=0): + +def run_solver(events, dt, T, seed=0): start = time.time() max_iter = 2000 solver = FaDIn(2, "raised_cosine", - [torch.tensor(u_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), - delta=dt, optim="RMSprop", - step_size=1e-3, + delta=dt, + optim="RMSprop", max_iter=max_iter, log=False, random_state=0, - device="cpu", - optimize_kernel=True, - precomputations=True, ztzG_approx=True) print(time.time() - start) - results = solver.fit(events, T) - results_ = dict(param_baseline=results['param_baseline'][-10:].mean(0), - param_alpha=results['param_alpha'][-10:].mean(0), - param_kernel=[results['param_kernel'][0][-10:].mean(0), - results['param_kernel'][1][-10:].mean(0)]) + solver.fit(events, T) + results_ = dict(param_baseline=solver.param_baseline[-10:].mean(0), + param_alpha=solver.param_alpha[-10:].mean(0), + param_kernel=[solver.param_kernel[0][-10:].mean(0), + solver.param_kernel[1][-10:].mean(0)]) results_["time"] = time.time() - start results_["seed"] = seed results_["T"] = T @@ -98,16 +84,8 @@ def run_solver(events, u_init, sigma_init, baseline_init, alpha_init, dt, T, see def run_experiment(baseline, alpha, mu, sigma, T, dt, seed=0): - v = 0.2 events = simulate_data(baseline, alpha, mu, sigma, T, dt, seed=seed) - baseline_init = baseline + v - alpha_init = alpha + v - mu_init = mu - sigma_init = sigma + v - u_init = mu_init - sigma_init - results = run_solver(events, u_init, sigma_init, - baseline_init, alpha_init, - dt, T, seed) + results = run_solver(events, dt, T, seed) return results @@ -138,3 +116,5 @@ def run_experiment(baseline, alpha, mu, sigma, T, dt, seed=0): df['err_u']**2 + df['err_sigma']**2) df.to_csv('results/error_discrete_RC_m.csv', index=False) + +# %% diff --git a/experiments/inference_error/run_error_discrete_TG.py b/experiments/inference_error/run_error_discrete_TG.py index 86891a7..d882aa9 100644 --- a/experiments/inference_error/run_error_discrete_TG.py +++ b/experiments/inference_error/run_error_discrete_TG.py @@ -7,11 +7,10 @@ import numpy as np import torch from joblib import Memory, Parallel, delayed -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc from fadin.kernels import DiscreteKernelFiniteSupport from fadin.solver import FaDIn - +from fadin.utils.utils_simu import simu_hawkes_cluster ################################ # Meta parameters @@ -33,58 +32,49 @@ sigma = np.array([[0.3]]) -@mem.cache def simulate_data(baseline, alpha, m, sigma, T, dt, seed=0): - L = int(1 / dt) - discretization = torch.linspace(0, 1, L) - TG = DiscreteKernelFiniteSupport(dt, 1, kernel='truncated_gaussian') - kernel_values = TG.kernel_eval([torch.Tensor(m), torch.Tensor(sigma)], - discretization) # * dt - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k = kernel_values[0, 0].double().numpy() - - tf = HawkesKernelTimeFunc(t_values=t_values, y_values=k) - kernels = [[tf]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps + params = {'mu': m, 'sigma': sigma} + + def truncated_gaussian(x, **params): + tg = DiscreteKernelFiniteSupport(delta=dt, n_dim=1, + kernel='truncated_gaussian') + mu = params['mu'] + sigma = params['sigma'] + kernel_values = tg.kernel_eval( + [torch.Tensor(mu), torch.Tensor(sigma)], torch.tensor(x)) + + return kernel_values.double().numpy() + + events = simu_hawkes_cluster(T, baseline, alpha, + truncated_gaussian, + params_kernel=params, + random_state=seed) return events events = simulate_data(baseline, alpha, m, sigma, T, dt, seed=0) +# %% -@mem.cache -def run_solver(events, m_init, sigma_init, baseline_init, alpha_init, T, dt, seed=0): + +def run_solver(events, T, dt, seed=0): start = time.time() max_iter = 2000 - solver = FaDIn(2, + solver = FaDIn(1, "truncated_gaussian", - [torch.tensor(m_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), delta=dt, optim="RMSprop", - step_size=1e-3, max_iter=max_iter, log=False, - random_state=0, - device="cpu", - optimize_kernel=True) + random_state=seed) print(time.time() - start) - results = solver.fit(events, T) + solver.fit(events, T) - results_ = dict(param_baseline=results['param_baseline'][-10:].mean().item(), - param_alpha=results['param_alpha'][-10:].mean().item(), - param_kernel=[results['param_kernel'][0][-10:].mean().item(), - results['param_kernel'][1][-10:].mean().item()]) + results_ = dict(param_baseline=solver.param_baseline[-10:].mean().item(), + param_alpha=solver.param_alpha[-10:].mean().item(), + param_kernel=[solver.param_kernel[0][-10:].mean().item(), + solver.param_kernel[1][-10:].mean().item()]) results_["time"] = time.time() - start results_["seed"] = seed results_["T"] = T @@ -99,9 +89,7 @@ def run_solver(events, m_init, sigma_init, baseline_init, alpha_init, T, dt, see sigma_init = np.array([[np.random.rand() * 0.5]]) start = time.time() -results_1 = run_solver( - events, m_init, sigma_init, baseline_init, alpha_init, T, dt, seed=0 -) +results_1 = run_solver(events, T, dt, seed=0) baseline_our = results_1['param_baseline'] alpha_our = results_1['param_alpha'] print(np.abs(results_1['param_baseline'])) # baseline)) @@ -113,15 +101,8 @@ def run_solver(events, m_init, sigma_init, baseline_init, alpha_init, T, dt, see def run_experiment(baseline, alpha, m, sigma, T, dt, seed=0): - v = 0.2 events = simulate_data(baseline, alpha, m, sigma, T, dt, seed=seed) - baseline_init = baseline + v - alpha_init = alpha + v - m_init = m + v - sigma_init = sigma - v - results = run_solver(events, m_init, sigma_init, - baseline_init, alpha_init, - T, dt, seed) + results = run_solver(events, T, dt, seed) return results @@ -156,3 +137,5 @@ def compute_norm2_error(s): lambda x: compute_norm2_error(x), axis=1) df.to_csv('results/error_discrete_TG.csv', index=False) + +# %% diff --git a/experiments/inference_error/run_error_discrete_TG_m.py b/experiments/inference_error/run_error_discrete_TG_m.py index 93dc325..5cd2c9b 100644 --- a/experiments/inference_error/run_error_discrete_TG_m.py +++ b/experiments/inference_error/run_error_discrete_TG_m.py @@ -6,11 +6,12 @@ import numpy as np import torch from joblib import Parallel, delayed, Memory -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc + from fadin.kernels import DiscreteKernelFiniteSupport from fadin.solver import FaDIn from fadin.utils.utils import l2_error +from fadin.utils.utils_simu import simu_hawkes_cluster mem = Memory(location=".", verbose=2) @@ -24,66 +25,46 @@ sigma = np.array([[0.3, 0.3], [0.25, 0.3]]) -@mem.cache def simulate_data(baseline, alpha, m, sigma, T, dt, seed=0): - L = int(1 / dt) - discretization = torch.linspace(0, 1, L) - n_dim = m.shape[0] - TG = DiscreteKernelFiniteSupport(dt, n_dim, kernel='truncated_gaussian') - - kernel_values = TG.kernel_eval([torch.Tensor(m), torch.Tensor(sigma)], - discretization) - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k11 = kernel_values[0, 0].double().numpy() - k12 = kernel_values[0, 1].double().numpy() - k21 = kernel_values[1, 0].double().numpy() - k22 = kernel_values[1, 1].double().numpy() - - tf11 = HawkesKernelTimeFunc(t_values=t_values, y_values=k11) - tf12 = HawkesKernelTimeFunc(t_values=t_values, y_values=k12) - tf21 = HawkesKernelTimeFunc(t_values=t_values, y_values=k21) - tf22 = HawkesKernelTimeFunc(t_values=t_values, y_values=k22) - - kernels = [[tf11, tf12], [tf21, tf22]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps + params = {'mu': m, 'sigma': sigma} + + def truncated_gaussian(x, **params): + tg = DiscreteKernelFiniteSupport(delta=dt, n_dim=2, + kernel='truncated_gaussian') + mu = params['mu'] + sigma = params['sigma'] + kernel_values = tg.kernel_eval( + [torch.Tensor(mu), torch.Tensor(sigma)], torch.tensor(x)) + + return kernel_values.double().numpy() + + events = simu_hawkes_cluster(T, baseline, alpha, + truncated_gaussian, + params_kernel=params, + random_state=seed) return events # %% solver -@mem.cache -def run_solver(events, m_init, sigma_init, baseline_init, alpha_init, dt, T, seed=0): +def run_solver(events, dt, T, seed=0): start = time.time() max_iter = 2000 + solver = FaDIn(2, "truncated_gaussian", - [torch.tensor(m_init), - torch.tensor(sigma_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), delta=dt, optim="RMSprop", - step_size=1e-3, max_iter=max_iter, log=False, - random_state=0, - device="cpu", - optimize_kernel=True, - precomputations=True, + random_state=seed, ztzG_approx=True) print(time.time() - start) - results = solver.fit(events, T) - results_ = dict(param_baseline=results['param_baseline'][-10:].mean(0), - param_alpha=results['param_alpha'][-10:].mean(0), - param_kernel=[results['param_kernel'][0][-10:].mean(0), - results['param_kernel'][1][-10:].mean(0)]) + solver.fit(events, T) + results_ = dict(param_baseline=solver.param_baseline[-10:].mean(0), + param_alpha=solver.param_alpha[-10:].mean(0), + param_kernel=[solver.param_kernel[0][-10:].mean(0), + solver.param_kernel[1][-10:].mean(0)]) results_["time"] = time.time() - start results_["seed"] = seed results_["T"] = T @@ -94,16 +75,8 @@ def run_solver(events, m_init, sigma_init, baseline_init, alpha_init, dt, T, see def run_experiment(baseline, alpha, m, sigma, T, dt, seed=0): - v = 0.2 events = simulate_data(baseline, alpha, m, sigma, T, dt, seed=seed) - baseline_init = baseline + v - alpha_init = alpha + v - m_init = m + v - sigma_init = sigma + v - - results = run_solver(events, m_init, sigma_init, - baseline_init, alpha_init, - dt, T, seed) + results = run_solver(events, dt, T, seed) return results @@ -133,3 +106,5 @@ def run_experiment(baseline, alpha, m, sigma, T, dt, seed=0): df['err_m']**2 + df['err_sigma']**2) df.to_csv('results/error_discrete_TG_m.csv', index=False) + +# %% diff --git a/experiments/inference_error/run_sensi_analysis_length.py b/experiments/inference_error/run_sensi_analysis_length.py deleted file mode 100644 index 0105199..0000000 --- a/experiments/inference_error/run_sensi_analysis_length.py +++ /dev/null @@ -1,168 +0,0 @@ -# %% import stuff -# import libraries -import time -import itertools -import numpy as np -import torch -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc, HawkesKernelExp -from joblib import Parallel, delayed -import pandas as pd - -from fadin.kernels import DiscreteKernelFiniteSupport -from fadin.solver import FaDIn - -################################ -# Meta parameters -################################ -dt = 0.01 -T = 10_000 -kernel_length = 20 - -# %% Experiment -################################ - - -def simulate_data(baseline, alpha, decay, kernel_length, T, dt, seed=0): - L = int(kernel_length / dt) - discretization = torch.linspace(0, kernel_length, L) - n_dim = decay.shape[0] - EXP = DiscreteKernelFiniteSupport(dt, n_dim, kernel='truncated_exponential', - kernel_length=kernel_length, - upper=kernel_length) - kernel_values = EXP.kernel_eval([torch.Tensor(decay)], - discretization) - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - k = kernel_values[0, 0].double().numpy() - - tf = HawkesKernelTimeFunc(t_values=t_values, y_values=k) - kernels = [[tf]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events_discrete = hawkes.timestamps - - tf = HawkesKernelExp(alpha.item(), decay.item()) - kernels = [[tf]] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events_continuous = hawkes.timestamps - - return events_discrete, events_continuous - - -baseline = np.array([.1]) -alpha = np.array([[0.8]]) -decay = np.array([[1.]]) - -events_d, events_c = simulate_data(baseline, alpha, decay, kernel_length, T, dt, seed=0) -# %% solver - - -def run_solver(events, decay_init, baseline_init, - alpha_init, kernel_length, T, dt, seed=0): - start = time.time() - max_iter = 2000 - solver = FaDIn(1, - "truncated_exponential", - [torch.tensor(decay_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), - kernel_length=kernel_length, - delta=dt, optim="RMSprop", - step_size=1e-3, max_iter=max_iter - ) - - print(time.time() - start) - results = solver.fit(events, T) - results_ = dict(param_baseline=results['param_baseline'][-10:].mean().item(), - param_alpha=results['param_alpha'][-10:].mean().item(), - param_kernel=[results['param_kernel'][0][-10:].mean().item()] - ) - - results_["time"] = time.time() - start - results_['W'] = kernel_length - results_["seed"] = seed - results_["T"] = T - results_["dt"] = dt - return results_ - - -# %% Test - - -baseline = np.array([1.1]) -alpha = np.array([[0.8]]) -decay = np.array([[0.5]]) - -events_d, events_c = simulate_data(baseline, alpha, decay, kernel_length, T, dt, seed=0) - -v = 0.2 -baseline_init = baseline + v -alpha_init = alpha + v -decay_init = decay + v - -results = run_solver(events_c, decay_init, - baseline_init, alpha_init, - kernel_length, T, dt, seed=0) - -print(np.abs(results['param_baseline'] - baseline)) -print(np.abs(results['param_alpha'] - alpha)) -print(np.abs(results['param_kernel'][0] - decay)) - -# %% - - -def run_experiment(baseline, alpha, decay, kernel_length, T, dt, seed=0): - v = 0.2 - events_d, events_c = simulate_data(baseline, alpha, decay, kernel_length, - T, dt, seed=seed) - baseline_init = baseline + v - alpha_init = alpha + v - decay_init = decay + v - - # results_d = run_solver(events_d, decay_init, baseline_init, alpha_init, - # kernel_length, T, dt, seed) - results_c = run_solver(events_c, decay_init, baseline_init, alpha_init, - kernel_length, T, dt, seed) - - return results_c # results_d - - -W_list = [1, 5, 10, 20, 50, 100] -T_list = [1000, 10000, 100_000, 1_000_000] -dt_list = [0.01] -seeds = np.arange(10) - - -n_jobs = 60 -all_results = Parallel(n_jobs=n_jobs, verbose=10)( - delayed(run_experiment)(baseline, alpha, decay, W, T, dt, seed=seed) - for W, T, dt, seed in itertools.product( - W_list, T_list, dt_list, seeds - ) -) - -# save results -df = pd.DataFrame(all_results) -df['param_decay'] = df['param_kernel'].apply(lambda x: x[0]) -true_param = {'baseline': 1.1, 'alpha': 0.8, 'decay': 0.5} -for param, value in true_param.items(): - df[param] = value - - -def compute_norm2_error(s): - return np.sqrt(np.array([(s[param] - s[f'param_{param}'])**2 - for param in ['baseline', 'alpha', 'decay']]).sum()) - - -df['err_norm2'] = df.apply( - lambda x: compute_norm2_error(x), axis=1) - -df.to_csv('results/sensitivity_length.csv', index=False) diff --git a/experiments/inference_error/run_time_dim.py b/experiments/inference_error/run_time_dim.py deleted file mode 100644 index 7e1c959..0000000 --- a/experiments/inference_error/run_time_dim.py +++ /dev/null @@ -1,142 +0,0 @@ -# %% import stuff -# import libraries -import itertools -import time -import pickle -import numpy as np -import torch -from joblib import Parallel, delayed -from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc -from tick.hawkes import HawkesBasisKernels - -from fadin.kernels import DiscreteKernelFiniteSupport -from fadin.solver import FaDIn - - -# %% simulate data -# Simulated data -################################ - -def simulate_data(n_dim, T, dt, seed=0): - L = int(1 / dt) - discretization = torch.linspace(0, 1, L) - - baseline = 0.1 + np.zeros(n_dim) - - alpha = 0.001 + np.zeros((n_dim, n_dim)) - decay = 5 + np.zeros((n_dim, n_dim)) - - Exp = DiscreteKernelFiniteSupport(dt, n_dim, kernel='truncated_exponential') - kernel_values = Exp.kernel_eval([torch.Tensor(decay)], discretization) - kernel_values = kernel_values * alpha[:, :, None] - - t_values = discretization.double().numpy() - ker = [] - for i in range(n_dim): - for j in range(n_dim): - k = kernel_values[i, j].double().numpy() - tf = HawkesKernelTimeFunc(t_values=t_values, y_values=k) - ker.append(tf) - - kernels = [[ker[j * n_dim + i] for i in range(n_dim)] for j in range(n_dim)] - hawkes = SimuHawkes( - baseline=baseline, kernels=kernels, end_time=T, verbose=False, seed=int(seed) - ) - - hawkes.simulate() - events = hawkes.timestamps - return events - - -# %% -# %% solver - -def run_solver(events, decay_init, baseline_init, alpha_init, T, dt, seed=0): - start = time.time() - max_iter = 800 - n_dim = baseline_init.shape[0] - solver = FaDIn(n_dim, - "truncated_exponential", - [torch.tensor(decay_init)], - torch.tensor(baseline_init), - torch.tensor(alpha_init), - delta=dt, optim="RMSprop", - step_size=1e-3, - max_iter=max_iter, - log=False, - random_state=0, - device="cpu", - optimize_kernel=True, - precomputations=True, - ztzG_approx=True) - - print(time.time() - start) - results = solver.fit(events, T) - results_ = dict(param_baseline=results['param_baseline'][-10:].mean(0), - param_alpha=results['param_alpha'][-10:].mean(0), - param_kernel=[results['param_kernel'][0][-10:].mean(0)]) - results_["time"] = time.time() - start - results_["seed"] = seed - results_["T"] = T - results_["dt"] = dt - return results_ - - -def run_experiment(n_dim, T, dt, seed=0): - events = simulate_data(n_dim, T, dt, seed=seed) - baseline_init = np.array([np.random.rand()]) - alpha_init = np.array([[np.random.rand()]]) - decay_init = np.array([[np.random.rand()]]) - - start_our = time.time() - run_solver(events, decay_init, baseline_init, alpha_init, - T, dt, seed=0) - time_our = time.time() - start_our - - start_tick = time.time() - non_param = HawkesBasisKernels(1, n_basis=1, kernel_size=int(1 / dt), - max_iter=800, ode_tol=1e-15) - non_param.fit(events) - time_tick = time.time() - start_tick - - res_our = dict(comp_time=time_our, n_dim=n_dim, T=T, dt=dt, seed=seed) - - res_tick = dict(comp_time=time_tick, n_dim=n_dim, T=T, dt=dt, seed=seed) - - return res_our, res_tick - - -n_dim = 10 -dt = 0.01 -T = 1000 - -us, tick = run_experiment(n_dim, T, dt, seed=0) - -print("us is:", us['comp_time']) -print("tick is:", tick['comp_time']) -# %% run - - -n_dim_list = [2, 5, 10, 50, 100] -dt_list = [0.1] -T_list = [100_000, 1_000_000] -seeds = np.arange(10) - -info = dict(n_dim_list=n_dim_list, T_list=T_list, dt_list=dt_list, seeds=seeds) - -n_jobs = 70 -all_results = Parallel(n_jobs=n_jobs, verbose=10)( - delayed(run_experiment)(n_dim, T, dt, seed=seed) - for n_dim, T, dt, seed in itertools.product( - n_dim_list, T_list, dt_list, seeds - ) -) - - -all_results.append(info) -file_name = "results/comp_time_dim.pkl" -open_file = open(file_name, "wb") -pickle.dump(all_results, open_file) -open_file.close() - -# %% diff --git a/experiments/meg/sample.py b/experiments/meg/sample.py index a24ae68..d7a3cda 100644 --- a/experiments/meg/sample.py +++ b/experiments/meg/sample.py @@ -2,21 +2,23 @@ import numpy as np import copy import torch -from fadin.solver import FaDIn, plot +from fadin.solver import FaDIn +from fadin.utils.vis import plot from fadin.utils.utils_meg import proprocess_tasks, filter_activation, \ get_atoms_timestamps # Load CDL output -with open("experiments/meg/dict_sample.json", "r") as fp: +with open("experiments/meg/cdl_sample.json", "r") as fp: dict_cdl = json.load(fp) BL_MASK = torch.Tensor([1, 1, 1]) ALPHA_MASK = torch.Tensor([[0, 0, 0], [0, 0, 0], [1, 1, 0]]) +OPTIM_MASK = {'baseline': BL_MASK, 'alpha': ALPHA_MASK} def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, - kernel, baseline_mask, alpha_mask, + kernel, optim_mask, plotfig=False, figtitle=None, savefig=None, **fadin_init): """ @@ -119,7 +121,7 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, # Fit Hawkes process to data solver = FaDIn(n_dim=len(events_acti_tt), kernel=kernel, - baseline_mask=baseline_mask, alpha_mask=alpha_mask, + optim_mask=optim_mask, **fadin_init) solver.fit(events_acti_tt, T) # Return results @@ -132,278 +134,98 @@ def fit_fadin_sample(list_tasks, atom, cdl_dict, filter_interval, thresh_acti, ############################################################################## -# REPRODUCE SAMPLE RESULTS IN [1] +# REPRODUCE SAMPLE RESULTS IN FaDIn PAPER ############################################################################## -fig, tg_atom6_mask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), - atom=6, - cdl_dict=dict_cdl, - filter_interval=0.01, - thresh_acti=0.6e-10, - kernel='truncated_gaussian', - baseline_mask=BL_MASK, - alpha_mask=ALPHA_MASK, - kernel_length=0.5, - delta=0.02, - optim='RMSprop', - params_optim={'lr': 1e-3}, - max_iter=10000, - ztzG_approx=False, - figtitle='Masked FaDIn with TG kernels, atom 6 filt and thresh actis', - savefig='fit_fadin_sample_plots/nomarked_masked_tg_6_practi.png', - plotfig=True - ) -print('Truncated gaussian, atom 6, with mask') +fig, tg_atom6_mask = fit_fadin_sample( + list_tasks=(['1', '2'], ['3', '4']), + atom=6, + cdl_dict=dict_cdl, + filter_interval=0.01, + thresh_acti=0.6e-10, + kernel='truncated_gaussian', + optim_mask=OPTIM_MASK, + kernel_length=0.5, + delta=0.02, + optim='RMSprop', + params_optim={'lr': 1e-3}, + max_iter=10000, + ztzG_approx=False, + figtitle='Masked FaDIn with TG kernels, atom 6 filt and thresh actis', + savefig='fit_fadin_sample_plots/nomarked_masked_tg_6_practi.png', + plotfig=True +) +print('\nTruncated gaussian, atom 6, with mask') print('Estimated baseline:', tg_atom6_mask[0]) print('Estimated alpha:', tg_atom6_mask[1]) print('Estimated kernel parameters', tg_atom6_mask[2:]) - -fig, tg_atom3_allmask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), - atom=3, - cdl_dict=dict_cdl, - filter_interval=0.01, - thresh_acti=0.6e-10, - kernel='truncated_gaussian', - baseline_mask=BL_MASK, - alpha_mask=ALPHA_MASK, - kernel_length=0.5, - delta=0.02, - optim='RMSprop', - params_optim={'lr': 1e-3}, - max_iter=10000, - ztzG_approx=False, - figtitle='Masked FaDIn with TG kernels, atom 3 filt and thresh actis', - savefig='fit_fadin_sample_plots/nomarked_masked_tg_3_practi.png', - plotfig=True - ) -print('Truncated gaussian, atom 3, with mask') +fig, tg_atom3_allmask = fit_fadin_sample( + list_tasks=(['1', '2'], ['3', '4']), + atom=3, + cdl_dict=dict_cdl, + filter_interval=0.01, + thresh_acti=0.6e-10, + kernel='truncated_gaussian', + optim_mask=OPTIM_MASK, + kernel_length=0.5, + delta=0.02, + optim='RMSprop', + params_optim={'lr': 1e-3}, + max_iter=10000, + ztzG_approx=False, + figtitle='Masked FaDIn with TG kernels, atom 3 filt and thresh actis', + savefig='fit_fadin_sample_plots/nomarked_masked_tg_3_practi.png', + plotfig=True +) +print('\nTruncated gaussian, atom 3, with mask') print('Estimated baseline:', tg_atom3_allmask[0]) print('Estimated alpha:', tg_atom3_allmask[1]) print('Estimated kernel parameters', tg_atom3_allmask[2:]) -fig, rc_atom3_mask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), - atom=3, - cdl_dict=dict_cdl, - filter_interval=0.01, - thresh_acti=0.6e-10, - kernel='raised_cosine', - baseline_mask=BL_MASK, - alpha_mask=ALPHA_MASK, - kernel_length=0.5, - delta=0.02, - optim='RMSprop', - params_optim={'lr': 1e-3}, - max_iter=20000, - ztzG_approx=False, - figtitle='Masked FaDIn with RC kernels, atom 3 filt and thresh actis', - savefig='fit_fadin_sample_plots/nomarked_masked_rc_3_practi.png', - plotfig=True - ) -print('Raised_cosine, atom 3, with mask') +fig, rc_atom3_mask = fit_fadin_sample( + list_tasks=(['1', '2'], ['3', '4']), + atom=3, + cdl_dict=dict_cdl, + filter_interval=0.01, + thresh_acti=0.6e-10, + kernel='raised_cosine', + optim_mask=OPTIM_MASK, + kernel_length=0.5, + delta=0.02, + optim='RMSprop', + params_optim={'lr': 1e-3}, + max_iter=20000, + ztzG_approx=False, + figtitle='Masked FaDIn with RC kernels, atom 3 filt and thresh actis', + savefig='fit_fadin_sample_plots/nomarked_masked_rc_3_practi.png', + plotfig=True +) +print('\nRaised_cosine, atom 3, with mask') print('Estimated baseline:', rc_atom3_mask[0]) print('Estimated alpha:', 2 * rc_atom3_mask[3] * rc_atom3_mask[1]) print('Estimated kernel parameters u and s', rc_atom3_mask[2:]) -fig, rc_atom6_mask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), - atom=6, - cdl_dict=dict_cdl, - filter_interval=0.01, - thresh_acti=0.6e-10, - kernel='raised_cosine', - baseline_mask=BL_MASK, - alpha_mask=ALPHA_MASK, - kernel_length=0.5, - delta=0.02, - optim='RMSprop', - params_optim={'lr': 1e-3}, - max_iter=20000, - ztzG_approx=False, - figtitle='Masked FaDIn with RC kernels, atom 6 filt and thresh actis', - savefig='fit_fadin_sample_plots/nomarked_masked_rc_6_practi.png', - plotfig=True - ) - -print('Raised_cosine, atom 6, with mask') +fig, rc_atom6_mask = fit_fadin_sample( + list_tasks=(['1', '2'], ['3', '4']), + atom=6, + cdl_dict=dict_cdl, + filter_interval=0.01, + thresh_acti=0.6e-10, + kernel='raised_cosine', + optim_mask=OPTIM_MASK, + kernel_length=0.5, + delta=0.02, + optim='RMSprop', + params_optim={'lr': 1e-3}, + max_iter=20000, + ztzG_approx=False, + figtitle='Masked FaDIn with RC kernels, atom 6 filt and thresh actis', + savefig='fit_fadin_sample_plots/nomarked_masked_rc_6_practi.png', + plotfig=True +) + +print('nRaised_cosine, atom 6, with mask') print('Estimated baseline:', rc_atom6_mask[0]) print('Estimated alpha:', 2 * rc_atom6_mask[3] * rc_atom6_mask[1]) print('Estimated kernel parameters u and s', rc_atom6_mask[2:]) - -# ############################################################################## -# # OTHER EXPERIMENTS WITH TRUNCATED GAUSSIAN KERNELS -# ############################################################################## - -# fig, tg_atom6_nomask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), -# atom=6, -# cdl_dict=dict_cdl, -# filter_interval=0.01, -# thresh_acti=0.6e-10, -# kernel='truncated_gaussian', -# baseline_mask=None, -# alpha_mask=None, -# kernel_length=0.5, -# delta=0.02, -# optim='RMSprop', -# params_optim={'lr': 1e-3}, -# max_iter=10000, -# ztzG_approx=False, -# figtitle='FaDIn with TG kernels, atom 6 filt and thresh actis', -# savefig='fit_fadin_sample_plots/nomarked_nomasked_tg_6_practi.png', -# plotfig=True -# ) - -# print('Truncated gaussian, atom 6, without mask') -# print('Estimated baseline:', tg_atom6_nomask[0]) -# print('Estimated alpha:', 2 * tg_atom6_nomask[3] * tg_atom6_nomask[1]) -# print('Estimated kernel parameters u and s', tg_atom6_nomask[2:]) - -# fig, tg_atom3_nomask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), -# atom=3, -# cdl_dict=dict_cdl, -# filter_interval=0.01, -# thresh_acti=0.6e-10, -# kernel='truncated_gaussian', -# baseline_mask=None, -# alpha_mask=None, -# kernel_length=0.5, -# delta=0.02, -# optim='RMSprop', -# params_optim={'lr': 1e-3}, -# max_iter=10000, -# ztzG_approx=False, -# figtitle='FaDIn with TG kernels, atom 3 filt and thresh actis', -# savefig='fit_fadin_sample_plots/nomarked_nomasked_tg_3_practi.png', -# plotfig=True -# ) - -# print('Truncated gaussian, atom 3 without mask') -# print('Estimated baseline:', tg_atom3_nomask[0]) -# print('Estimated alpha:', 2 * tg_atom3_nomask[3] * tg_atom3_nomask[1]) -# print('Estimated kernel parameters u and s', tg_atom3_nomask[2:]) - -# fig, tg_atom3_nofilt_nomask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), -# atom=3, -# cdl_dict=dict_cdl, -# filter_interval=None, -# thresh_acti=0., -# kernel='truncated_gaussian', -# baseline_mask=None, -# alpha_mask=None, -# kernel_length=0.5, -# delta=0.02, -# optim='RMSprop', -# params_optim={'lr': 1e-3}, -# max_iter=10000, -# ztzG_approx=False, -# figtitle='FaDIn with TG kernels, atom 3 raw actis', -# savefig='fit_fadin_sample_plots/nomarked_nomasked_tg_3_rawacti.png', -# plotfig=True -# ) - -# fig, tg_atom3_nofilt = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), -# atom=3, -# cdl_dict=dict_cdl, -# filter_interval=None, -# thresh_acti=0., -# kernel='truncated_gaussian', -# baseline_mask=BL_MASK, -# alpha_mask=ALPHA_MASK, -# kernel_length=0.5, -# delta=0.02, -# optim='RMSprop', -# params_optim={'lr': 1e-3}, -# max_iter=10000, -# ztzG_approx=False, -# figtitle='Masked FaDIn with TG kernels, atom 3 raw actis', -# savefig='fit_fadin_sample_plots/nomarked_masked_tg_3_rawacti.png', -# plotfig=True -# ) - -# fig, tg_atom6_nofilt_nomask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), -# atom=6, -# cdl_dict=dict_cdl, -# filter_interval=None, -# thresh_acti=0., -# kernel='truncated_gaussian', -# baseline_mask=None, -# alpha_mask=None, -# kernel_length=0.5, -# delta=0.02, -# optim='RMSprop', -# params_optim={'lr': 1e-3}, -# max_iter=10000, -# ztzG_approx=False, -# figtitle='FaDIn with TG kernels, atom 6 raw actis', -# savefig='fit_fadin_sample_plots/nomarked_masked_tg_6_rawacti.png', -# plotfig=True -# ) - -# fig, tg_atom6_nofilt = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), -# atom=6, -# cdl_dict=dict_cdl, -# filter_interval=None, -# thresh_acti=0., -# kernel='truncated_gaussian', -# baseline_mask=BL_MASK, -# alpha_mask=ALPHA_MASK, -# kernel_length=0.5, -# delta=0.02, -# optim='RMSprop', -# params_optim={'lr': 1e-3}, -# max_iter=10000, -# ztzG_approx=False, -# figtitle='Masked FaDIn with TG kernels, atom 6 raw actis', -# savefig='fit_fadin_sample_plots/nomarked_masked_tg_6_rawacti.png', -# plotfig=True -# ) - -############################################################################## -# OTHER EXPERIMENTS WITH RAISED_COSINE KERNELS -############################################################################## - -# fig, rc_atom3_nomask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), -# atom=3, -# cdl_dict=dict_cdl, -# filter_interval=0.01, -# thresh_acti=0.6e-10, -# kernel='raised_cosine', -# baseline_mask=None, -# alpha_mask=None, -# kernel_length=0.5, -# delta=0.02, -# optim='RMSprop', -# params_optim={'lr': 1e-3}, -# max_iter=20000, -# ztzG_approx=False, -# figtitle='FaDIn with RC kernels, atom 3 filt and thresh actis', -# savefig='fit_fadin_sample_plots/nomarked_nomasked_rc_3_practi.png', -# plotfig=True -# ) -# print('Raised_cosine, atom 3, no mask') -# print('Estimated baseline:', rc_atom3_nomask[0]) -# print('Estimated alpha:', 2 * rc_atom3_nomask[3] * rc_atom3_nomask[1]) -# print('Estimated kernel parameters', rc_atom3_nomask[2:]) - -# fig, rc_atom6_nomask = fit_fadin_sample(list_tasks=(['1', '2'], ['3', '4']), -# atom=6, -# cdl_dict=dict_cdl, -# filter_interval=0.01, -# thresh_acti=0.6e-10, -# kernel='raised_cosine', -# baseline_mask=None, -# alpha_mask=None, -# kernel_length=0.5, -# delta=0.02, -# optim='RMSprop', -# params_optim={'lr': 1e-3}, -# max_iter=20000, -# ztzG_approx=False, -# figtitle='FaDIn with RC kernels, atom 6 filt and thresh actis', -# savefig='fit_fadin_sample_plots/nomarked_nomasked_rc_6_practi.png', -# plotfig=True -# ) - -# print('Raised_cosine, atom 6, without mask') -# print('Estimated baseline:', rc_atom6_nomask[0]) -# print('Estimated alpha:', 2 * rc_atom6_nomask[3] * rc_atom6_nomask[1]) -# print('Estimated kernel parameters', rc_atom6_nomask[2:]) diff --git a/fadin/init.py b/fadin/init.py new file mode 100644 index 0000000..0984260 --- /dev/null +++ b/fadin/init.py @@ -0,0 +1,259 @@ +import torch +import matplotlib.pyplot as plt + + +def init_hawkes_params(solver, init, events, n_ground_events, end_time): + """ + Computes the initial Hawkes parameters for the FaDIn solver. + + The function supports three modes of initialization: + - 'random': random initialization of parameters. + - 'moment_matching': moment matching initialization of parameters. + - given: parameters are given by user. + + Parameters + ---------- + solver : FaDIn + FaDIn solver. + init: `str` or `dict` + Mode of initialization. Supported values are 'random', + 'moment_matching', and a dictionary with keys 'baseline', 'alpha' and + 'kernel'. + events: list of array of size number of timestamps, + list size is self.n_dim. + n_ground_events : torch.tensor + Number of ground events for each dimension + end_time: float + End time of the events time series. + + Returns: + -------- + params_intens: list + List of parameters of the Hawkes process. + [baseline, alpha, kernel_params] + baseline : `tensor`, shape `(solver.n_dim)` + Baseline parameter of the Hawkes process. + alpha : `tensor`, shape `(solver.n_dim, n_dim)` + Weight parameter of the Hawkes process. + kernel_params : `list` of `tensor` + list containing tensor array of kernels parameters. + The size of the list varies depending the number of + parameters. The shape of each tensor is + `(solver.n_dim, solver.n_dim)`. + """ + # Compute initial Hawkes parameters + if 'moment_matching' in init: + mm_mode = init.split('_')[-1] + baseline, alpha, kernel_params_init = momentmatching_nomark( + solver, + events, + n_ground_events, + end_time, + mm_mode + ) + elif init == 'random': + baseline, alpha, kernel_params_init = random_params(solver) + else: + baseline = init['baseline'].float() + alpha = init['alpha'].float() + kernel_params_init = init['kernel'] + + # Format initial parameters for optimization + solver.baseline = (baseline * solver.baseline_mask).requires_grad_(True) + solver.alpha = (alpha * solver.alpha_mask).requires_grad_(True) + params_intens = [solver.baseline, solver.alpha] + solver.n_kernel_params = len(kernel_params_init) + for i in range(solver.n_kernel_params): + kernel_param = kernel_params_init[i].float().clip(1e-4) + kernel_param.requires_grad_(True) + params_intens.append(kernel_param) + + return params_intens + + +def momentmatching_kernel_nomark(solver, events, n_ground_events, + plot_delta=False, mode='max'): + """Moment matching initialization of kernel parameters. + + Implemented for 'truncated_gaussian' and 'raised_cosine' kernels. + For the truncated gaussian kernel, the means $m_{i,j}$ and std + $\\sigma_{i,j}$ are: + $m_{i, j} = + \\frac{1}{N_{g_i}(T)}\\sum_{t_n^i \\in \\mathscr{F}_T^i} + \\delta t^{i, j}_n$ + $\\sigma_{i, j} = + \\sqrt{\\dfrac{ + \\sum_{t_n^i \\in \\mathscr{F}_T^i} (\\delta t^{i, j}_n - m_{i, j})^2 + }{N_{g_i}(T) - 1}}. + For the raised cosine kernel, the parameters $u_{i,j}$ and $s_{i,j} are: + $u^{\\text{m}}_{i, j} = + \\max{(0, m^{\\text{m}}_{i, j} - \\sigma^{\\text{m}}_{i, j})}$ + $s^{\\text{m}}_{i, j} = \\sigma_{i, j}^{\\text{m}}$ + + Parameters + ---------- + solver : `FaDIn` or `MarkedFaDIn` object + The solver object + events : list of torch.tensor + List of events for each dimension + n_ground_events : torch.tensor + Number of ground events for each dimension + plot_delta : bool, default=False + Whether to plot the delta_t distribution + mode : str, default='max' + Mode to compute the delta_t distribution. Supported values are 'max' + and 'mean'. + + Returns + ------- + list of torch.tensor + List of kernel parameters + + """ + kernel_params_init = [torch.ones(solver.n_dim, solver.n_dim), + torch.ones(solver.n_dim, solver.n_dim)] + for i in range(solver.n_dim): + for j in range(solver.n_dim): + # Mean, std of time delta of [i, j] kernel + if mode == 'max': + delta_t = torch.zeros(int(n_ground_events[i].item())) + for n in range(int(n_ground_events[i])): + t_n_i = events[i][n] + t_n_j = torch.max( + torch.where(torch.tensor(events[j]) < t_n_i, + torch.tensor(events[j]), + 0.) + ) + delta_t[n] = t_n_i - t_n_j + avg = torch.mean(delta_t) + std = torch.std(delta_t) + if mode == 'mean': + delta_t = [] + for n in range(int(n_ground_events[i])): + t_n_i = events[i][n] + for t_n_j in events[j]: + if t_n_j < t_n_i - solver.kernel_length: + continue + if t_n_j >= t_n_i: + break + delta_t.append(t_n_i - t_n_j) + avg = torch.mean(torch.tensor(delta_t)) + std = torch.std(torch.tensor(delta_t)) + # Plot delta_t distribution + if plot_delta: + fig_delta, ax_delta = plt.subplots() + ax_delta.hist(delta_t, bins=20) + ax_delta.set_xlim([0, solver.kernel_length]) + ax_delta.set_xlabel('Time') + ax_delta.set_ylabel('Histogram') + fig_delta.suptitle('Moment Matching delta_t') + fig_delta.show() + # Parameters of [i, j] kernel + if solver.kernel == 'truncated_gaussian': + kernel_params_init[0][i, j] = avg + if solver.kernel == 'raised_cosine': + u = max(avg - std, 0) + kernel_params_init[0][i, j] = u + kernel_params_init[1][i, j] = std + return kernel_params_init + + +def momentmatching_nomark(solver, events, n_ground_events, end_time, + mode='max'): + """Moment matching initialization of baseline, alpha and kernel parameters. + + $\\mu_i^s = \frac{\\#\\mathscr{F}^i_T}{(D+1)T} \forall i \\in [1, D]$ + $\\alpha_{i, j}^s = \\frac{1}{D+1} \\forall i,j \\in [1, D]$ + Kernel parameters are initialized by function + momentmatching_kernel_nomark`. + + Parameters + ---------- + solver: FaDIn + FaDIn solver. + events: list of array of size number of timestamps, + list size is self.n_dim. + n_ground_events : torch.tensor + Number of ground events for each dimension + end_time: float + End time of the events time series. + mode: `str` + Mode to compute the delta_t distribution. Supported values are 'max' + and 'mean'. + + Returns + ------- + baseline: torch.tensor + Baseline parameter of the Hawkes process. + alpha: torch.tensor + Weight parameter of the Hawkes process. + kernel_params_init: list of torch.tensor + List of kernel parameters. + """ + assert solver.kernel in ['truncated_gaussian', 'raised_cosine'], ( + f"Smart initialization not implemented for kernel {solver.kernel}" + ) + # Baseline init + baseline = n_ground_events / (end_time * (solver.n_dim + 1)) + + # Alpha init + alpha = torch.ones(solver.n_dim, solver.n_dim) / (solver.n_dim + 1) + + # Kernel parameters init + kernel_params_init = momentmatching_kernel_nomark( + solver, events, n_ground_events, mode=mode + ) + return baseline, alpha, kernel_params_init + + +def random_params(solver): + """Random initialization of baseline, alpha, and kernel parameters. + + Hawkes parameters are initialized using a random distribution. + + Parameters + ---------- + solver : FaDIn + FaDIn solver. + + Returns + ------- + baseline: torch.tensor + Baseline parameter of the Hawkes process. + alpha: torch.tensor + Weight parameter of the Hawkes process. + kernel_params_init: list of torch.tensor + List of kernel parameters. + """ + # Baseline init + baseline = torch.rand(solver.n_dim) + + # Alpha init + alpha = torch.rand(solver.n_dim, solver.n_dim) + + # Kernel parameters init + kernel_params_init = [] + if solver.kernel == 'raised_cosine': + temp = 0.5 * solver.kernel_length * \ + torch.rand(solver.n_dim, solver.n_dim) + temp2 = 0.5 * solver.kernel_length * \ + torch.rand(solver.n_dim, solver.n_dim) + kernel_params_init.append(temp) + kernel_params_init.append(temp2) + elif solver.kernel == 'truncated_gaussian': + temp = 0.25 * solver.kernel_length * \ + torch.rand(solver.n_dim, solver.n_dim) + temp2 = 0.5 * solver.kernel_length * \ + torch.rand(solver.n_dim, solver.n_dim) + kernel_params_init.append(temp) + kernel_params_init.append(temp2) + elif solver.kernel == 'truncated_exponential': + kernel_params_init.append( + 2 * torch.rand(solver.n_dim, solver.n_dim) + ) + else: + raise NotImplementedError( + 'kernel initial parameters of not \ + implemented kernel have to be given' + ) + return baseline, alpha, kernel_params_init diff --git a/fadin/loss_and_gradient.py b/fadin/loss_and_gradient.py index 6f1cded..da7f445 100644 --- a/fadin/loss_and_gradient.py +++ b/fadin/loss_and_gradient.py @@ -1,6 +1,85 @@ import torch +def compute_gradient_fadin(solver, events_grid, discretization, + i, n_events, end_time): + """Updates gradients for optimizer iteration of FaDIn solver, + with l2 loss and precomputations. Gradients are updated inplace. + + Parameters + ---------- + solver : FaDIn + The FaDIn solver. + events_grid : tensor, shape (n_dim, n_grid) (optionnal) + Not necessary in this function, present for FaDIn derived classes. + discretization : tensor, shape (L,) + Discretization grid. + i : int + Optimizer iteration number. + n_events : tensor, shape (n_dim,) + Number of events for each dimension. + end_time : float + The end time of the Hawkes process. + + Returns + ------- + None + """ + # Compute kernel and gradient + kernel = solver.kernel_model.kernel_eval( + solver.params_intens[2:], + discretization + ) + grad_theta = solver.kernel_model.grad_eval( + solver.params_intens[2:], + discretization + ) + + if solver.log: + solver.v_loss[i] = \ + discrete_l2_loss_precomputation(solver.zG, solver.zN, solver.ztzG, + solver.params_intens[0], + solver.params_intens[1], + kernel, n_events, + solver.delta, + end_time).detach() + # Update baseline gradient + solver.params_intens[0].grad = get_grad_baseline( + solver.zG, + solver.params_intens[0], + solver.params_intens[1], + kernel, + solver.delta, + n_events, + end_time + ) + # Update alpha gradient + solver.params_intens[1].grad = get_grad_alpha( + solver.zG, + solver.zN, + solver.ztzG, + solver.params_intens[0], + solver.params_intens[1], + kernel, + solver.delta, + n_events + ) + # Update kernel gradient + for j in range(solver.n_kernel_params): + solver.params_intens[2 + j].grad = \ + get_grad_eta( + solver.zG, + solver.zN, + solver.ztzG, + solver.params_intens[0], + solver.params_intens[1], + kernel, + grad_theta[j], + solver.delta, + n_events + ) + + def discrete_l2_loss_conv(intensity, events_grid, delta): """Compute the l2 discrete loss using convolutions. @@ -25,26 +104,6 @@ def discrete_l2_loss_conv(intensity, events_grid, delta): (intensity * events_grid).sum(1)).sum()) / events_grid.sum() -def discrete_ll_loss_conv(intensity, events_grid, delta): - """Compute the LL discrete loss using convolutions. - - Parameters - ---------- - intensity : tensor, shape (n_dim, n_grid) - Values of the intensity function evaluated on the grid. - - events_grid : tensor, shape (n_dim, n_grid) - Events projected on the pre-defined grid. - - delta : float - Step size of the discretization grid. - """ - mask = events_grid > 0 - intens = torch.log(intensity[mask]) - return (intensity.sum(1) * delta - - intens.sum()).sum() / events_grid.sum() - - def discrete_l2_loss_precomputation(zG, zN, ztzG, baseline, alpha, kernel, n_events, delta, end_time): """Compute the l2 discrete loss using precomputation terms. @@ -109,7 +168,8 @@ def squared_compensator_2(zG, baseline, alpha, kernel): .. math:: \\sum_{i=1}^{p}\\mu_i \\sum_{j=1}^{p} \\sum_{\tau=1}^{L} - \\phi_{ij}^\\Delta[\\tau] \\left(\\sum_{s=1}^{G} z_{j}[s-\\tau] \\right) + \\phi_{ij}^\\Delta[\\tau] \\left(\\sum_{s=1}^{G} z_{j}[s-\\tau] + \\right) Parameters ---------- @@ -175,8 +235,9 @@ def squared_compensator_3(ztzG, alpha, kernel): for k in range(n_dim): for j in range(n_dim): alpha_prod_ijk = alpha[i, j] * alpha[i, k] - temp2 = kernel[i, k].view(1, L) * (ztzG[j, k] - * kernel[i, j].view(L, 1)).sum(0) + temp2 = kernel[i, k].view(1, L) * ( + ztzG[j, k] * kernel[i, j].view(L, 1) + ).sum(0) res += alpha_prod_ijk * temp2.sum() return res @@ -357,7 +418,8 @@ def get_grad_alpha(zG, zN, ztzG, baseline, alpha, kernel, delta, n_events): def get_grad_eta(zG, zN, ztzG, baseline, alpha, kernel, grad_kernel, delta, n_events): - """Return the gradient of the discrete l2 loss w.r.t. one kernel parameters. + """Return the gradient of the discrete l2 loss w.r.t. one kernel + parameter. .. math:: N_T\\frac{\\partial\\mathcal{L}_G}{\\partial \\eta{ml}} = diff --git a/fadin/solver.py b/fadin/solver.py index 18a3a7f..687f387 100644 --- a/fadin/solver.py +++ b/fadin/solver.py @@ -1,15 +1,11 @@ import torch import time -import matplotlib.pyplot as plt -import numpy as np from fadin.utils.utils import optimizer, projected_grid -from fadin.utils.compute_constants import get_zG, get_zN, get_ztzG, \ - get_ztzG_approx -from fadin.loss_and_gradient import discrete_l2_loss_precomputation, \ - discrete_l2_loss_conv, get_grad_baseline, get_grad_alpha, get_grad_eta, \ - discrete_ll_loss_conv +from fadin.utils.compute_constants import compute_constants_fadin +from fadin.loss_and_gradient import compute_gradient_fadin from fadin.kernels import DiscreteKernelFiniteSupport +from fadin.init import init_hawkes_params class FaDIn(object): @@ -49,27 +45,34 @@ class FaDIn(object): Either define a kernel in ``{'raised_cosine' | 'truncated_gaussian' | 'truncated_exponential'}`` or a custom kernel. - kernel_params_init : `list` of tensor of shape (n_dim, n_dim) - Initial parameters of the kernel. - - baseline_init : `tensor`, shape (n_dim,) - Initial baseline parameters of the intensity of the Hawkes process. - - baseline_mask : `tensor` of shape (n_dim,), or `None` - Tensor of same shape as the baseline vector, with values in (0, 1). - `baseline` coordinates where `baseline_mask` is equal to 0 - will stay constant equal to zero and not be optimized. - If set to `None`, all coordinates of baseline will be optimized. - - alpha_init : `tensor`, shape (n_dim, n_dim) - Initial alpha parameters of the intensity of the Hawkes process. - - alpha_mask : `tensor` of shape (n_dim, n_dim), or `None` - Tensor of same shape as the `alpha` tensor, with values in (0, 1). - `alpha` coordinates and kernel parameters where `alpha_mask` = 0 - will not be optimized. - If set to `None`, all coordinates of alpha will be optimized, - and all kernel parameters will be optimized if optimize_kernel=`True`. + init: `str` or `dict`, default='random' + Initialization strategy of the parameters of the Hawkes process. + If set to 'random', the parameters are initialized randomly. + If set to 'moment_matching_max', the parameters are initialized + using the moment matching method with max mode. + If set to 'moment_matching_mean', the parameters are initialized + using the moment matching method with mean mode. + Otherwise, the parameters are initialized using the given dictionary, + , which must contain the following keys: + - 'baseline': `tensor`, shape (n_dim,): Initial baseline + - 'alpha': `tensor`, shape (n_dim, n_dim): Initial alpha + - 'kernel': `list` of tensors of shape (n_dim, n_dim): + Initial kernel parameters. + + optim_mask: `dict` of `tensor` or `None`, default=`None`. + Dictionary containing the masks for the optimization of the parameters + of the Hawkes process. If set to `None`, all parameters are optimized. + The dictionary must contain the following keys: + - 'baseline': `tensor` of shape (n_dim,), or `None`. + Tensor of same shape as the baseline vector, with values in (0, 1). + `baseline` coordinates where then tensor is equal to 0 + will not be optimized. If set to `None`, all coordinates of + baseline will be optimized. + - 'alpha': `tensor` of shape (n_dim, n_dim), or `None`. + Tensor of same shape as the `alpha` tensor, with values in (0, 1). + `alpha` coordinates and kernel parameters where `alpha_mask` = 0 + will not be optimized. If set to `None`, all coordinates of alpha + and kernel parameters will be optimized. kernel_length : `float`, `default=1.` Length of kernels in the Hawkes process. @@ -86,23 +89,12 @@ class FaDIn(object): max_iter : `int`, `default=1000` Maximum number of iterations during fit. - optimize_kernel : `boolean`, `default=True` - If optimize_kernel is false, kernel parameters are not optimized - and only the baseline and alpha are optimized. - - precomputations : `boolean`, `default=True` - If precomputations is false, pytorch autodiff is applied on the loss. - If precomputations is true, then FaDIn is computed. - ztzG_approx : `boolean`, `default=True` If ztzG_approx is false, compute the true ztzG precomputation constant that is the computational bottleneck of FaDIn. if ztzG_approx is true, ztzG is approximated with Toeplitz matrix not taking into account edge effects. - device : `str` in ``{'cpu' | 'cuda'}`` - Computations done on cpu or gpu. Gpu is not implemented yet. - log : `boolean`, `default=False` Record the loss values during the optimization. @@ -111,10 +103,6 @@ class FaDIn(object): 'truncated_exponential'}`` the gradient function is implemented. If kernel is custom, the custom gradient must be given. - criterion : `str` in ``{'l2' | 'll'}``, `default='l2'` - The criterion to minimize. if not l2, FaDIn minimize - the Log-Likelihood loss through AutoDifferentiation. - tol : `float`, `default=1e-5` The tolerance of the solver (iterations stop when the stopping criterion is below it). If not reached the solver does 'max_iter' @@ -140,113 +128,78 @@ class FaDIn(object): If `log=True`, compute the loss accross iterations. If no early stopping, `n_iter` is equal to `max_iter`. """ + compute_gradient = staticmethod(compute_gradient_fadin) + precomputations = True - def __init__(self, n_dim, kernel, kernel_params_init=None, - baseline_init=None, baseline_mask=None, - alpha_init=None, alpha_mask=None, + def __init__(self, n_dim, kernel, init='random', optim_mask=None, kernel_length=1, delta=0.01, optim='RMSprop', - params_optim=dict(), max_iter=2000, optimize_kernel=True, - precomputations=True, ztzG_approx=True, - device='cpu', log=False, grad_kernel=None, criterion='l2', + params_optim=dict(), max_iter=2000, ztzG_approx=True, + log=False, grad_kernel=None, tol=10e-5, random_state=None): - # param discretisation + + # Discretization parameters self.delta = delta - self.W = kernel_length - self.L = int(self.W / delta) + self.kernel_length = kernel_length + self.L = int(kernel_length / delta) self.ztzG_approx = ztzG_approx - # param optim + # Optimizer parameters + self.kernel = kernel self.solver = optim self.max_iter = max_iter self.log = log self.tol = tol - - # params model self.n_dim = n_dim - if baseline_init is None: - self.baseline = torch.rand(self.n_dim) - else: - self.baseline = baseline_init.float() - if baseline_mask is None: + self.kernel_model = DiscreteKernelFiniteSupport( + self.delta, + self.n_dim, + kernel, + self.kernel_length, + 0, + self.kernel_length, + grad_kernel + ) + if optim_mask is None: + optim_mask = {'baseline': None, 'alpha': None} + if optim_mask['baseline'] is None: self.baseline_mask = torch.ones([n_dim]) else: - assert baseline_mask.shape == self.baseline.shape, \ + assert optim_mask['baseline'].shape == torch.Size([n_dim]), \ "Invalid baseline_mask shape, must be (n_dim,)" - self.baseline_mask = baseline_mask - self.baseline = (self.baseline * self.baseline_mask).requires_grad_(True) - if alpha_init is None: - self.alpha = torch.rand(self.n_dim, self.n_dim) - else: - self.alpha = alpha_init.float() - if alpha_mask is None: + self.baseline_mask = optim_mask['baseline'] + if optim_mask['alpha'] is None: self.alpha_mask = torch.ones([self.n_dim, self.n_dim]) else: - assert alpha_mask.shape == self.alpha.shape, \ + assert optim_mask['alpha'].shape == torch.Size([n_dim, n_dim]), \ "Invalid alpha_mask shape, must be (n_dim, n_dim)" - self.alpha_mask = alpha_mask - self.alpha = (self.alpha * self.alpha_mask).requires_grad_(True) - - if kernel_params_init is None: - kernel_params_init = [] - if kernel == 'raised_cosine': - temp = 0.5 * self.W * torch.rand(self.n_dim, self.n_dim) - temp2 = 0.5 * self.W * torch.rand(self.n_dim, self.n_dim) - kernel_params_init.append(temp) - kernel_params_init.append(temp2) - elif kernel == 'truncated_gaussian': - temp = 0.25 * self.W * torch.rand(self.n_dim, self.n_dim) - temp2 = 0.5 * self.W * torch.rand(self.n_dim, self.n_dim) - kernel_params_init.append(temp) - kernel_params_init.append(temp2) - elif kernel == 'truncated_exponential': - kernel_params_init.append(2 * torch.rand(self.n_dim, - self.n_dim)) - else: - raise NotImplementedError('kernel initial parameters of not \ - implemented kernel have to be given') - - self.kernel_params_fixed = kernel_params_init - - self.n_kernel_params = len(kernel_params_init) - - self.kernel_model = DiscreteKernelFiniteSupport(self.delta, self.n_dim, - kernel, - self.W, 0, self.W, - grad_kernel) - self.kernel = kernel - # Set optimizer - self.params_intens = [self.baseline, self.alpha] - - self.optimize_kernel = optimize_kernel + self.alpha_mask = optim_mask['alpha'] - if self.optimize_kernel: - for i in range(self.n_kernel_params): - self.params_intens.append( - kernel_params_init[i].float().clip(1e-4).requires_grad_(True)) - - self.precomputations = precomputations + # Initialization option for Hawkes parameters + s = ['random', 'moment_matching_max', 'moment_matching_mean'] + if isinstance(init, str): + assert init in s, ( + f"Invalid string init {init}. init must be a dict or in {s}." + ) + else: + keys = set(['baseline', 'alpha', 'kernel']) + is_dict = isinstance(init, dict) + assert is_dict and set(init.keys()) == keys, ( + f"If init is not a str, it should be a dict with keys {keys}. " + f"Got {init}." + ) + self.init = init # If the learning rate is not given, fix it to 1e-3 - if 'lr' in params_optim.keys(): + if 'lr' not in params_optim.keys(): params_optim['lr'] = 1e-3 + self.params_solver = params_optim - self.opt = optimizer(self.params_intens, params_optim, solver=optim) - - if criterion == 'll': - self.precomputations = False - - self.criterion = criterion # device and seed if random_state is None: torch.manual_seed(0) else: torch.manual_seed(random_state) - if torch.cuda.is_available() and device == 'cuda': - self.device = 'cuda' - else: - self.device = 'cpu' - def fit(self, events, end_time): """Learn the parameters of the Hawkes processes on a discrete grid. @@ -260,32 +213,42 @@ def fit(self, events, end_time): Returns ------- + TODO: attributes self : object Fitted parameters. """ + # Initialize solver parameters n_grid = int(1 / self.delta) * end_time + 1 - discretization = torch.linspace(0, self.W, self.L) + discretization = torch.linspace(0, self.kernel_length, self.L) events_grid = projected_grid(events, self.delta, n_grid) n_events = events_grid.sum(1) + n_ground_events = [events[i].shape[0] for i in range(len(events))] + print('number of events is:', n_ground_events) + n_ground_events = torch.tensor(n_ground_events) + # Initialize Hawkes parameters + self.params_intens = init_hawkes_params( + self, + self.init, + events, + n_ground_events, + end_time + ) + + # Initialize optimizer + self.opt = optimizer( + self.params_intens, + self.params_solver, + solver=self.solver + ) #################################################### # Precomputations #################################################### - if self.precomputations: - print('number of events is:', int(n_events[0])) - start = time.time() - zG = get_zG(events_grid.double().numpy(), self.L) - zN = get_zN(events_grid.double().numpy(), self.L) - - if self.ztzG_approx: - ztzG = get_ztzG_approx(events_grid.double().numpy(), self.L) - else: - ztzG = get_ztzG(events_grid.double().numpy(), self.L) - - zG = torch.tensor(zG).float() - zN = torch.tensor(zN).float() - ztzG = torch.tensor(ztzG).float() - print('precomput:', time.time() - start) + start = time.time() + self.zG, self.zN, self.ztzG = compute_constants_fadin(events_grid, + self.L, + self.ztzG_approx) + print('precomput:', time.time() - start) #################################################### # save results @@ -302,78 +265,20 @@ def fit(self, events, end_time): self.param_baseline[0] = self.params_intens[0].detach() self.param_alpha[0] = self.params_intens[1].detach() - # If kernel parameters are optimized - if self.optimize_kernel: - for i in range(self.n_kernel_params): - self.param_kernel[i, 0] = self.params_intens[2 + i].detach() + for i in range(self.n_kernel_params): + self.param_kernel[i, 0] = self.params_intens[2 + i].detach() #################################################### start = time.time() + # Optimize parameters for i in range(self.max_iter): print(f"Fitting model... {i/self.max_iter:6.1%}\r", end='', flush=True) self.opt.zero_grad() - if self.precomputations: - if self.optimize_kernel: - # Update kernel - kernel = self.kernel_model.kernel_eval(self.params_intens[2:], - discretization) - # print('kernel', kernel) - grad_theta = self.kernel_model.grad_eval(self.params_intens[2:], - discretization) - else: - kernel = self.kernel_model.kernel_eval(self.kernel_params_fixed, - discretization) - - if self.log: - self.v_loss[i] = \ - discrete_l2_loss_precomputation(zG, zN, ztzG, - self.params_intens[0], - self.params_intens[1], - kernel, n_events, - self.delta, - end_time).detach() - # Update baseline - self.params_intens[0].grad = get_grad_baseline(zG, - self.params_intens[0], - self.params_intens[1], - kernel, self.delta, - n_events, end_time) - # Update alpha - self.params_intens[1].grad = get_grad_alpha(zG, - zN, - ztzG, - self.params_intens[0], - self.params_intens[1], - kernel, - self.delta, - n_events) - if self.optimize_kernel: - for j in range(self.n_kernel_params): - self.params_intens[2 + j].grad = \ - get_grad_eta(zG, - zN, - ztzG, - self.params_intens[0], - self.params_intens[1], - kernel, - grad_theta[j], - self.delta, - n_events) - - else: - intens = self.kernel_model.intensity_eval(self.params_intens[0], - self.params_intens[1], - self.params_intens[2:], - events_grid, - discretization) - if self.criterion == 'll': - loss = discrete_ll_loss_conv(intens, events_grid, self.delta) - else: - loss = discrete_l2_loss_conv(intens, events_grid, self.delta) - loss.backward() - + self.compute_gradient( + self, events_grid, discretization, i, n_events, end_time + ) self.opt.step() # Save parameters @@ -383,13 +288,11 @@ def fit(self, events, end_time): self.alpha_mask self.param_baseline[i + 1] = self.params_intens[0].detach() self.param_alpha[i + 1] = self.params_intens[1].detach() - - # If kernel parameters are optimized - if self.optimize_kernel: - for j in range(self.n_kernel_params): - self.params_intens[2 + j].data = \ - self.params_intens[2 + j].data.clip(0) - self.param_kernel[j, i + 1] = self.params_intens[2 + j].detach() + for j in range(self.n_kernel_params): + self.params_intens[2 + j].data = \ + self.params_intens[2 + j].data.clip(0) + self.param_kernel[j, i + 1] = \ + self.params_intens[2 + j].detach() # Early stopping if i % 100 == 0: @@ -411,127 +314,3 @@ def fit(self, events, end_time): print('iterations in ', time.time() - start) return self - - -def plot(solver, plotfig=False, bl_noise=False, title=None, ch_names=None, savefig=None): - """ - Plots estimated kernels and baselines of solver. - Should be called after calling the `fit` method on solver. - - Parameters - ---------- - solver: |`MarkedFaDin` or `FaDIn` solver. - `fit` method should be called on the solver before calling `plot`. - - plotfig: bool (default `False`) - If set to `True`, the figure is plotted. - - bl_noise: bool (default`False`) - Whether to plot the baseline of noisy activations. - Only works if the solver has 'baseline_noise' attribute. - - title: `str` or `None`, default=`None` - Title of the plot. If set to `None`, the title text is generic. - - ch_names: list of `str` (default `None`) - Channel names for subplots. If set to `None`, will be set to - `np.arange(solver.n_dim).astype('str')`. - savefig: str or `None`, default=`None` - Path for saving the figure. If set to `None`, the figure is not saved. - - Returns - ------- - fig, axs : matplotlib.pyplot Figure - n_dim x n_dim subplots, where subplot of coordinates (i, j) shows the - kernel component $\\alpha_{i, j}\\phi_{i, j}$ and the baseline $\\mu_i$ - of the intensity function $\\lambda_i$. - - """ - # Recover kernel time values and y values for kernel plot - discretization = torch.linspace(0, solver.W, 200) - kernel = DiscreteKernelFiniteSupport(solver.delta, - solver.n_dim, - kernel=solver.kernel, - kernel_length=solver.W) - kappa_values = kernel.kernel_eval(solver.params_intens[-2:], - discretization).detach() - # Plot - if ch_names is None: - ch_names = np.arange(solver.n_dim).astype('str') - fig, axs = plt.subplots(nrows=solver.n_dim, - ncols=solver.n_dim, - figsize=(4 * solver.n_dim, 4 * solver.n_dim), - sharey=True, - sharex=True, - squeeze=False) - for i in range(solver.n_dim): - for j in range(solver.n_dim): - # Plot baseline - label = rf'$\mu_{{{ch_names[i]}}}$={round(solver.baseline[i].item(), 2)}' - axs[i, j].hlines( - y=solver.baseline[i].item(), - xmin=0, - xmax=solver.W, - label=label, - color='orange', - linewidth=4 - ) - if bl_noise: - # Plot noise baseline - mutilde = round(solver.baseline_noise[i].item(), 2) - label = rf'$\tilde{{\mu}}_{{{ch_names[i]}}}$={mutilde}' - axs[i, j].hlines( - y=solver.baseline_noise[i].item(), - xmin=0, - xmax=solver.W, - label=label, - color='green', - linewidth=4 - ) - # Plot kernel (i, j) - phi_values = solver.alpha[i, j].item() * kappa_values[i, j, 1:] - axs[i, j].plot( - discretization[1:], - phi_values, - label=rf'$\phi_{{{ch_names[i]},{ch_names[j]}}}$', - linewidth=4 - ) - if solver.kernel == 'truncated_gaussian': - # Plot mean of gaussian kernel - mean = round(solver.params_intens[-2][i, j].item(), 2) - axs[i, j].vlines( - x=mean, - ymin=0, - ymax=torch.max(phi_values).item(), - label=rf'mean={mean}', - color='pink', - linestyles='dashed', - linewidth=3, - ) - # Handle text - axs[i, j].set_xlabel('Time', size='x-large') - axs[i, j].tick_params( - axis='both', - which='major', - labelsize='x-large' - ) - axs[i, j].set_title( - f'{ch_names[j]}-> {ch_names[i]}', - size='x-large' - ) - axs[i, j].legend(fontsize='large', loc='best') - # Plot title - if title is None: - fig_title = 'Hawkes influence ' + solver.kernel + ' kernel' - else: - fig_title = title - fig.suptitle(fig_title, size=20) - fig.tight_layout() - # Save figure - if savefig is not None: - fig.savefig(savefig) - # Plot figure - if plotfig: - fig.show() - - return fig, axs diff --git a/fadin/tests/test_loss_and_gradient.py b/fadin/tests/test_loss_and_gradient.py index 03d3540..802b197 100644 --- a/fadin/tests/test_loss_and_gradient.py +++ b/fadin/tests/test_loss_and_gradient.py @@ -207,7 +207,8 @@ def test_l2loss(): kernel_RC, n_events, delta, end_time) - assert torch.isclose(loss_conv_RC, loss_precomp_RC) + assert torch.isclose(1 + loss_conv_RC, 1 + loss_precomp_RC) + # + 1 to avoid error when loss is close to 0 def test_gradients(): diff --git a/fadin/tests/test_mask.py b/fadin/tests/test_mask.py index 8185517..5dfb23a 100644 --- a/fadin/tests/test_mask.py +++ b/fadin/tests/test_mask.py @@ -9,22 +9,19 @@ # %% Function def maskedsolver(events, T, kernel, max_iter=1000, ztzG_approx=False, - baseline_mask=None, baseline_init=None, - alpha_mask=None, alpha_init=None, - random_state=0): - solver = FaDIn(n_dim=n_dim, - kernel=kernel, - baseline_mask=baseline_mask, - baseline_init=baseline_init, - alpha_init=alpha_init, - alpha_mask=alpha_mask, - kernel_length=kernel_length, - delta=dt, optim="RMSprop", - params_optim=params_optim, - max_iter=max_iter, criterion='l2', - ztzG_approx=ztzG_approx, - random_state=random_state - ) + optim_mask=None, init='random', random_state=0): + solver = FaDIn( + n_dim=n_dim, + kernel=kernel, + optim_mask=optim_mask, + init=init, + kernel_length=kernel_length, + delta=dt, optim="RMSprop", + params_optim=params_optim, + max_iter=max_iter, + ztzG_approx=ztzG_approx, + random_state=random_state + ) solver.fit(events, T) estimated_baseline = solver.params_intens[0] estimated_alpha = solver.params_intens[1] @@ -55,10 +52,24 @@ def maskedsolver(events, T, kernel, max_iter=1000, ztzG_approx=False, L = int(1 / dt) size_grid = int(T / dt) + 1 discretization = torch.linspace(0, kernel_length, L) -baseline_mask = torch.Tensor([0, 0]) -alpha_mask = torch.Tensor([[0, 0], [1, 0]]) -alpha_init = torch.Tensor([[0.2, 0.4], [0.7, 0.9]]) -baseline_init = torch.Tensor([0.7, 0.4]) +optim_mask = { + 'baseline': torch.Tensor([0, 0]), + 'alpha': torch.Tensor([[0, 0], [1, 0]]) +} +init1_rc = { + 'alpha': torch.Tensor([[0.2, 0.4], [0.7, 0.9]]), + 'baseline': torch.Tensor([0.7, 0.4]), + 'kernel': [torch.Tensor([[0.5, 0.5], [0.5, 0.5]]), + torch.Tensor([[0.25, 0.25], [0.25, 0.25]])] +} +init1_exp = { + 'alpha': torch.Tensor([[0.2, 0.4], [0.7, 0.9]]), + 'baseline': torch.Tensor([0.7, 0.4]), + 'kernel': [torch.Tensor([[0.5, 0.5], [0.5, 0.5]])] +} +init2 = 'random' +init3 = 'moment_matching_mean' +init4 = 'moment_matching_max' max_iter = 5000 ztzG_approx = True params_optim = {'lr': 1e-3} @@ -75,14 +86,15 @@ def test_exp_mask(): random_state=simu_random_state) # Fit Hawkes process to exponential simulation - exp_bl, exp_alpha = maskedsolver(kernel='truncated_exponential', - events=events, T=T, - baseline_mask=baseline_mask, - alpha_mask=alpha_mask, - baseline_init=baseline_init, - alpha_init=alpha_init, - ztzG_approx=ztzG_approx, - random_state=simu_random_state) + for init in [init1_exp, init2]: + exp_bl, exp_alpha = maskedsolver( + kernel='truncated_exponential', + events=events, T=T, + optim_mask=optim_mask, + init=init, + ztzG_approx=ztzG_approx, + random_state=simu_random_state + ) assert torch.allclose(exp_bl, torch.Tensor([0., 0.])) assert torch.allclose(exp_alpha * torch.Tensor([[1., 1.], [0., 1.]]), torch.zeros(2, 2)) @@ -105,14 +117,16 @@ def test_rc_mask(): random_state=simu_random_state) # %% Fit Hawkes process to raised_cosine simulation - rc_bl, rc_alpha = maskedsolver(kernel='raised_cosine', events=events_rc, - T=T, - baseline_mask=baseline_mask, - alpha_mask=alpha_mask, - baseline_init=baseline_init, - alpha_init=alpha_init, - ztzG_approx=ztzG_approx, - random_state=simu_random_state) + for init in [init1_rc, init2, init3, init4]: + rc_bl, rc_alpha = maskedsolver( + kernel='raised_cosine', + events=events_rc, + T=T, + optim_mask=optim_mask, + init=init, + ztzG_approx=ztzG_approx, + random_state=simu_random_state + ) assert torch.allclose(rc_bl, torch.Tensor([0., 0.])) assert torch.allclose(rc_alpha * torch.Tensor([[1., 1.], [0., 1.]]), torch.zeros(2, 2)) diff --git a/fadin/utils/compute_constants.py b/fadin/utils/compute_constants.py index 080d2c3..9515d61 100644 --- a/fadin/utils/compute_constants.py +++ b/fadin/utils/compute_constants.py @@ -1,5 +1,6 @@ import numba import numpy as np +import torch from scipy.linalg import toeplitz @@ -131,3 +132,20 @@ def get_ztzG_approx_(events_grid, L): for j in range(n_dim): ztzG[i, j] = toeplitz(diff_tau[i, j]) return ztzG + + +def compute_constants_fadin(events_grid, L, ztzG_approx=True): + """Compute all precomputations""" + zG = get_zG(events_grid.double().numpy(), L) + zN = get_zN(events_grid.double().numpy(), L) + + if ztzG_approx: + ztzG = get_ztzG_approx(events_grid.double().numpy(), L) + else: + ztzG = get_ztzG(events_grid.double().numpy(), L) + + zG = torch.tensor(zG).float() + zN = torch.tensor(zN).float() + ztzG = torch.tensor(ztzG).float() + + return zG, zN, ztzG diff --git a/fadin/utils/utils.py b/fadin/utils/utils.py index 36e9270..1800379 100644 --- a/fadin/utils/utils.py +++ b/fadin/utils/utils.py @@ -74,7 +74,7 @@ def projected_grid(events, grid_step, n_grid): events_grid = torch.zeros(n_dim, n_grid) for i in range(n_dim): ei_torch = torch.tensor(events[i]) - temp = torch.round(ei_torch / grid_step).long() # * grid_step + temp = torch.round(ei_torch / grid_step).long() # * grid_step # temp2 = torch.round(temp * size_discret) idx, data = np.unique(temp, return_counts=True) events_grid[i, idx] += torch.tensor(data) diff --git a/fadin/utils/utils_meg.py b/fadin/utils/utils_meg.py index 6ff1579..8cdfd9f 100644 --- a/fadin/utils/utils_meg.py +++ b/fadin/utils/utils_meg.py @@ -169,6 +169,6 @@ def get_atoms_timestamps(acti, sfreq=None, info=None, threshold=0, threshold = np.percentile(acti[acti > 0], threshold) atoms_timestamps = [np.where(acti[i] > threshold)[0] / sfreq - for i in range(n_atoms)] + for i in range(n_atoms)][0] return atoms_timestamps diff --git a/fadin/utils/utils_simu.py b/fadin/utils/utils_simu.py index 25dd64b..bd3a35f 100644 --- a/fadin/utils/utils_simu.py +++ b/fadin/utils/utils_simu.py @@ -1,7 +1,7 @@ import numpy as np from scipy.optimize import minimize_scalar from scipy.interpolate import interp1d -from scipy.integrate import simps +from scipy.integrate import simpson import scipy.stats as stats @@ -84,7 +84,7 @@ def __init__(self, custom_density, params=dict(), kernel_length=10): # function to normalise the pdf over chosen domain def normalisation(self, x): - return simps(self.pdf(x), x) + return simpson(self.pdf(x), x=x) def create_cdf_ppf(self): # define normalization support with the given kernel length diff --git a/fadin/utils/vis.py b/fadin/utils/vis.py new file mode 100644 index 0000000..07627bf --- /dev/null +++ b/fadin/utils/vis.py @@ -0,0 +1,132 @@ +import torch +import numpy as np +import matplotlib.pyplot as plt + +from fadin.kernels import DiscreteKernelFiniteSupport + + +def plot(solver, plotfig=False, bl_noise=False, title=None, ch_names=None, + savefig=None): + """ + Plots estimated kernels and baselines of `FaDIn` solver. + Should be called after calling the `fit` method on solver. + + Parameters + ---------- + solver: `FaDIn` solver. + `fit` method should be called on the solver before calling `plot`. + + plotfig: bool (default `False`) + If set to `True`, the figure is plotted. + + bl_noise: bool (default`False`) + Whether to plot the baseline of noisy activations. + Only works if the solver has 'baseline_noise' attribute. + + title: `str` or `None`, default=`None` + Title of the plot. If set to `None`, the title text is generic. + + ch_names: list of `str` (default `None`) + Channel names for subplots. If set to `None`, will be set to + `np.arange(solver.n_dim).astype('str')`. + savefig: str or `None`, default=`None` + Path for saving the figure. If set to `None`, the figure is not saved. + + Returns + ------- + fig, axs : matplotlib.pyplot Figure + n_dim x n_dim subplots, where subplot of coordinates (i, j) shows the + kernel component $\\alpha_{i, j}\\phi_{i, j}$ and the baseline $\\mu_i$ + of the intensity function $\\lambda_i$. + + """ + # Recover kernel time values and y values for kernel plot + discretization = torch.linspace(0, solver.kernel_length, 200) + kernel = DiscreteKernelFiniteSupport(solver.delta, + solver.n_dim, + kernel=solver.kernel, + kernel_length=solver.kernel_length) + + kappa_values = kernel.kernel_eval(solver.params_intens[-2:], + discretization).detach() + # Plot + if ch_names is None: + ch_names = np.arange(solver.n_dim).astype('str') + fig, axs = plt.subplots(nrows=solver.n_dim, + ncols=solver.n_dim, + figsize=(4 * solver.n_dim, 4 * solver.n_dim), + sharey=True, + sharex=True, + squeeze=False) + for i in range(solver.n_dim): + for j in range(solver.n_dim): + # Plot baseline + label = (rf'$\mu_{{{ch_names[i]}}}$=' + + f'{round(solver.baseline[i].item(), 2)}') + axs[i, j].hlines( + y=solver.baseline[i].item(), + xmin=0, + xmax=solver.kernel_length, + label=label, + color='orange', + linewidth=4 + ) + if bl_noise: + # Plot noise baseline + mutilde = round(solver.baseline_noise[i].item(), 2) + label = rf'$\tilde{{\mu}}_{{{ch_names[i]}}}$={mutilde}' + axs[i, j].hlines( + y=solver.baseline_noise[i].item(), + xmin=0, + xmax=solver.kernel_length, + label=label, + color='green', + linewidth=4 + ) + # Plot kernel (i, j) + phi_values = solver.alpha[i, j].item() * kappa_values[i, j, 1:] + axs[i, j].plot( + discretization[1:], + phi_values, + label=rf'$\phi_{{{ch_names[i]},{ch_names[j]}}}$', + linewidth=4 + ) + if solver.kernel == 'truncated_gaussian': + # Plot mean of gaussian kernel + mean = round(solver.params_intens[-2][i, j].item(), 2) + axs[i, j].vlines( + x=mean, + ymin=0, + ymax=torch.max(phi_values).item(), + label=rf'mean={mean}', + color='pink', + linestyles='dashed', + linewidth=3, + ) + # Handle text + axs[i, j].set_xlabel('Time', size='x-large') + axs[i, j].tick_params( + axis='both', + which='major', + labelsize='x-large' + ) + axs[i, j].set_title( + f'{ch_names[j]}-> {ch_names[i]}', + size='x-large' + ) + axs[i, j].legend(fontsize='large', loc='best') + # Plot title + if title is None: + fig_title = 'Hawkes influence ' + solver.kernel + ' kernel' + else: + fig_title = title + fig.suptitle(fig_title, size=20) + fig.tight_layout() + # Save figure + if savefig is not None: + fig.savefig(savefig) + # Plot figure + if plotfig: + fig.show() + + return fig, axs