-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcombiner_pymc.py
134 lines (122 loc) · 5.2 KB
/
combiner_pymc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import pymc, seaborn as sns
import gzip
import commons
import sys
import matplotlib
import matplotlib.ticker
import matplotlib.pyplot as plt
import pickle
import random
import time
from pymc import Exponential, deterministic, Poisson, Uniform, Normal, observed, MCMC, Matplot, stochastic
from argparse import ArgumentParser
from operator import itemgetter
import PyGammaCombo
tstr = PyGammaCombo.TString
import numpy as np
def parse_dv(args, bins):
arglen = 4 if bins else 3
names = args[0:len(args):arglen]
lower_bounds = list(map(float, args[1:len(args):arglen]))
upper_bounds = list(map(float, args[2:len(args):arglen]))
if bins:
bin_number = list(map(int, args[3:len(args):arglen]))
edges = {}
for i, name in enumerate(names):
edges[name] = np.linspace(lower_bounds[i], upper_bounds[i], bin_number[i] + 1)
return names, lower_bounds, upper_bounds, edges
else:
return names, lower_bounds, upper_bounds, None
ap = ArgumentParser("Using Monte-Carlo Markov-Chains for estimating LHC fits.")
# Default constants
ap.add_argument('-n', action='store', default=1000000, type=int, help='Number of events')
ap.add_argument('-b', action='store', default=5000, type=int, help='Number of burnout')
ap.add_argument('-c', action='store', required=True, type=int, help='Number of the combination')
ap.add_argument('-bins', action='store_true', help='If set, then output as histrograms')
ap.add_argument('-i', action='store_true', help='Print information about combiner')
ap.add_argument('-plot', action='store_true', help='Plot bars and graphs')
ap.add_argument('-u', action='store_true', help='Switch combination function to uniform')
ap.add_argument('-vars', nargs='+', required=True, help='Variables')
args = vars(ap.parse_args())
print(' '.join(sys.argv))
# Constants
N = args['n']
combination = args['c']
print_information = args['i']
is_mcmc = not args['u']
bins = args['bins']
plot_graph = args['plot']
burnout = args['b']
desired_variables, lower_bounds, upper_bounds, edges = parse_dv(args['vars'], bins)
sns.set_style("whitegrid")
matplotlib.rcParams.update({'font.size': 10})
pgc_utils = PyGammaCombo.gammacombo_utils
cout = PyGammaCombo.gammacombo_utils.getCout()
extract = PyGammaCombo.gammacombo_utils.extractFromRooArgSet
toRooRealVar = PyGammaCombo.gammacombo_utils.toRooRealVar
if __name__ == "__main__":
start_time = time.time()
gce = PyGammaCombo.gammacombo_utils.getGammaComboEngine("")
cmb = gce.getCombiner(combination)
cmb.combine()
parameters = cmb.getParameters()
parameter_names = list(cmb.getParameterNames())
pdf = cmb.getPdf()
if print_information:
print("Required parameters for combination:")
for p in parameter_names:
param = extract(parameters, p)
param.Print()
weights=None
if is_mcmc:
# Declare vars
var_dict = {}
for i, p in enumerate(desired_variables):
var_dict[p] = Uniform(p, doc="{}".format(p), lower=lower_bounds[i], upper=upper_bounds[i])
# Dynamically create PyMC sampling function
stochastic_args = ','.join(["{}=var_dict['{}']".format(k, k) for k in var_dict.keys()])
exec("@stochastic\n"
"def combi({}, value=0):\n"
"\tfor p in desired_variables:\n"
"\t\tparameters.setRealValue(p, var_dict[p])\n"
"\treturn pdf.getLogVal()\n".format(stochastic_args))
# Define and start sampling
mcmc = MCMC([combi] + list(var_dict.values()),
db='pickle',
dbmode='w',
dbname='mcmc-{}_combiner.pickle'.format(combination))
mcmc.sample(iter=N, burn=burnout, thin=1)
# Output
data = {v: mcmc.trace(v)[:] for v in desired_variables}
if bins:
with gzip.open('output/bins.dat.gz', 'w') as file:
data_to_save = {}
for v in data:
data_to_save[v] = np.histogram(data[v], bins=edges[v])
pickle.dump({'data': data_to_save}, file, protocol=2)
else:
with gzip.open('output/raw.dat.gz', 'w') as file:
pickle.dump({'data': data}, file, protocol=2)
else: #not MCMC
var_dict = {}
weights = []
data = {v: [] for v in desired_variables}
for i in range(N):
for i, p in enumerate(desired_variables):
rval = np.random.uniform(lower_bounds[i], upper_bounds[i])
data[p].append(rval)
parameters.setRealValue(p, rval)
weights.append(pdf.getVal())
if bins:
with gzip.open('output/bins.dat.gz', 'w') as file:
data_to_save = {}
for v in data:
data_to_save[v] = np.histogram(data[v], bins=edges[v], weights=weights)
pickle.dump({'data': data_to_save}, file, protocol=2)
else:
with gzip.open('output/raw.dat.gz', 'w') as file:
pickle.dump({'data': data, 'weights': weights}, file, protocol=2)
if plot_graph:
for v in desired_variables:
commons.plot(np.array(data[v]), combination, v, weights=weights)
print("Total time: {}".format(time.time() - start_time))