-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathextract_p.py
58 lines (52 loc) · 2.58 KB
/
extract_p.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import sys
import pandas as pd
import json
import argparse
def parse_args(args):
parser = argparse.ArgumentParser(description="Combine parameters from multiple mix3r runs.")
parser.add_argument("--input", required=True, nargs='+', help="A list of mix3r output json files.")
parser.add_argument("--out", required=True, help="Output file prefix.")
return parser.parse_args(args)
args = parse_args(sys.argv[1:])
fnames = args.input
print(f"Loading {fnames}")
outf = f"{args.out}.parameters.csv" # "aud_adhd_mig_aug22.parameters.csv"
df = pd.DataFrame(columns="run_id p_1 sb2_1 s02_1 h2_1 success_1 p_2 sb2_2 s02_2 h2_2 success_2 p_3 sb2_3 s02_3 h2_3 success_3 p_12 rho_12 rho0_12 rg_12 success_12 p_13 rho_13 rho0_13 rg_13 success_13 p_23 rho_23 rho0_23 rg_23 success_23 p_123 success_123".split())
for irow, fname in enumerate(fnames):
row = [fname]
with open(fname) as f:
d = json.load(f)
for i in "1 2 3".split():
k = f"opt_out_{i}"
row += d[k]["opt_par"]
row.append(d[k]["h2"])
row.append(d[k]["opt_res"]["success"])
if not d["opt_out_12_13_23"] is None:
if len(d["opt_out_12_13_23"]["opt_par"]) == 9:
p12, p13, p23, rho12, rho13, rho23, rho012, rho013, rho023 = d["opt_out_12_13_23"]["opt_par"]
rg12, rg13, rg23 = "NA", "NA", "NA"
else:
p12, p13, p23, rho12, rho13, rho23, rho012, rho013, rho023, rg12, rg13, rg23 = d["opt_out_12_13_23"]["opt_par"]
success = d["opt_out_12_13_23"]["opt_res"]["success"]
row += [p12, rho12, rho012, rg12, success, p13, rho13, rho013, rg13, success, p23, rho23, rho023, rg23, success]
else:
for i in "12 13 23".split():
k = f"opt_out_{i}"
row += d[k]["opt_par"]
row.append(d[k]["rg"])
row.append(d[k]["opt_res"]["success"])
i = "123"
k = f"opt_out_{i}"
row += d[k]["opt_par"]
row.append(d[k]["opt_res"]["success"])
df.loc[irow] = row
pcols = "p_1 p_2 p_3 p_12 p_13 p_23 p_123".split()
total = df["p_1"] + df["p_2"] + df["p_3"] - df["p_12"] - df["p_13"] - df["p_23"] + df["p_123"]
df_p_proportion = df[pcols].div(total,axis=0)
i_min = (df_p_proportion - df_p_proportion.median()).abs().sum(axis=1).argmin()
i_rank = (df_p_proportion - df_p_proportion.median()).abs().sum(axis=1).rank()
df["rank_p_proportion_deviation_from_median"] = i_rank
print(df)
print(f"Minimum deviation from median p proportions in run {i_min}")
df.to_csv(outf, sep='\t', index=False)
print(f"{outf} saved.")