-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathexperiment.py
62 lines (51 loc) · 2.75 KB
/
experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import numpy as np
from sgd import sgd_minibatches
from collections import defaultdict
from processing import *
from features import featurize_edges, get_full_fset
from util import save_weights, load_weights, partition, save_likelihoods
from predict import predict
import matplotlib.pyplot as plt
savepath = '../parses/eps-40k-ml10-3trans/'
predictpath = 'prediction/experiments/minibatch=1/lmbda=0.01/'
parses = [load_parses_separate(savepath, k) for k in range(10000)]
# Optional: training on parses with non-empty ref-forests.
cleaned_parses = [(target_forest, ref_forest, src_fsa, tgt_sent) for (target_forest, ref_forest, src_fsa, tgt_sent) in parses if ref_forest][0:1000]
print(len(cleaned_parses))
lexicon = load_lexicon(savepath)
fset = load_featureset(savepath)
# initialize weights uniformly
w_init = defaultdict(float)
for feature in fset:
w_init[feature] = 1e-2
k = 1
minibatches = partition(cleaned_parses, k)
w_trained, delta_ws, likelihoods1 = sgd_minibatches(iters=1, delta_0=10, w=w_init, minibatches=minibatches, batch_size=k, parses=cleaned_parses,
shuffle=False, sparse=True, scale_weight=2, regularizer=False, lmbda=0.01,
bar=True, log=False, log_last=False, check_convergence=False,
savepath=False, prediction=False, prediction_length=False)
print(likelihoods1)
w_trained, delta_ws, likelihoods2 = sgd_minibatches(iters=1, delta_0=1, w=w_init, minibatches=minibatches, batch_size=k, parses=cleaned_parses,
shuffle=False, sparse=True, scale_weight=2, regularizer=False, lmbda=0.01,
bar=True, log=False, log_last=False, check_convergence=False,
savepath=False, prediction=False, prediction_length=False)
print(likelihoods2)
w_trained, delta_ws, likelihoods3 = sgd_minibatches(iters=1, delta_0=0.1, w=w_init, minibatches=minibatches, batch_size=k, parses=cleaned_parses,
shuffle=False, sparse=True, scale_weight=2, regularizer=False, lmbda=0.01,
bar=True, log=False, log_last=False, check_convergence=False,
savepath=False, prediction=False, prediction_length=False)
print(likelihoods3)
# plotting
handles = []
ax1 = plt.plot(range(1, len(likelihoods1)+1), likelihoods1, '-', color='k', linewidth=0.8, label='100')
handles.extend(ax1)
ax2 = plt.plot(range(1, len(likelihoods2)+1), likelihoods2, '--', color='k', linewidth=0.8, label='10')
handles.extend(ax2)
ax3 = plt.plot(range(1, len(likelihoods3)+1), likelihoods3, '-.' ,color='k', linewidth=0.8, label='1')
handles.extend(ax3)
plt.legend(handles=handles)
plt.savefig(predictpath + 'likelihoods.pdf')
# # printing for verification
# w = w_trained[-1]
# for k, v in sorted(w.items(), key=lambda x: x[1], reverse=True):
# print('{}'.format(k).ljust(25) + '{}'.format(v))