-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathTest_TMRCA.py
95 lines (82 loc) · 3.56 KB
/
Test_TMRCA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import tskit
import numpy as np
tsChr = tskit.load("Chr16.trees")
window_breaks = [10, 100]
focal=None
def pairwise_coalescence_counts(ts, window_breaks, focal=None):
# Work in progress
# Calculate counts of pairwise coalescence events within
# time windows for a set of pairs of tips.
# Also returns the number of uncoalesced "pairs" at the beginning of
# each window (e.g. number of trees for which both members of pair are
# not missing and not coalesced).
#@@ -1597,86 +1598,83 @@ def pairwise_coalescence_counts(ts, window_breaks, focal=None):
if focal is None:
focal = []
for i in range(ts.num_samples - 1):
for j in range(i + 1, ts.num_samples):
focal += [(i, j)]
else:
if not np.all([len(x) == 2 for x in focal]):
raise Exception("'focal' must contain tuples of pairs of sample indices")
window_breaks = np.sort(np.unique(np.append(window_breaks, [0.0])))
if np.any(window_breaks < 0.0):
raise Exception("'window_breaks' must be non-negative")
num_windows = len(window_breaks) - 1
windows = [(window_breaks[i], window_breaks[i + 1]) for i in range(num_windows)]
counts = {}
for pair in focal:
# TMRCAs for each pair
i, j = pair
pairwise_tmrca = np.sort([t.tmrca(i, j) for t in ts.trees()])
not_missing = np.invert(np.isnan(pairwise_tmrca))
nonmissing_trees = np.sum(not_missing)
pairwise_tmrca = pairwise_tmrca[not_missing]
# TODO: How is missing data handled by tree.tmrca()? Does it return np.nan?
# tally coalesced trees during window, and the number of uncoalesced
# trees at the start of the window per pair
coalesced, _ = np.histogram(pairwise_tmrca, window_breaks)
total = nonmissing_trees - np.append([0], np.cumsum(coalesced))[:num_windows]
counts[pair] = {"coalesced": coalesced, "total": total}
return {"counts": counts, "windows": windows}
def pairwise_coalescence_rates(raw_counts):
# Work in progress.
# Calculate effective population size from counts
# of coalescing lineage pairs in time windows
def ne_from_counts(x, k, d):
# estimate haploid Ne from counts
p = np.where(k == 0.0, np.nan, x / k)
q = 1.0 - p
n = np.where(q == 0.0, np.nan, -d / np.log(q))
# standard error assuming binomial(x; p, k)
se = 1.0 / np.sqrt(
np.where(
q == 0.0,
np.nan,
-2.0 * k * np.log(q) / n ** 2
+ x * (1.0 + q / p) * np.log(q) / n ** 2 * (2.0 + q / p * np.log(q)),
)
)
return {"mle": n, "std_err": se}
windows = raw_counts["windows"]
counts = raw_counts["counts"]
window_duration = np.array([x[1] - x[0] for x in windows])
num_windows = len(windows)
# calculate Ne estimates from pairwise counts,
# and from counts summed over pairs
pairwise_estimates = {}
coalesced = np.zeros(num_windows)
total = np.zeros(num_windows)
for pair in counts.keys():
pairwise_estimates[pair] = ne_from_counts(
counts[pair]["coalesced"], counts[pair]["total"], window_duration
)
coalesced += counts[pair]["coalesced"]
total += counts[pair]["total"]
global_estimate = ne_from_counts(coalesced, total, window_duration)
return {
"global_estimate": global_estimate,
"pairwise_estimates": pairwise_estimates,
}
first = pairwise_coalescence_counts(ts = tsChr, window_breaks = [0, 10, 100])
second = pairwise_coalescence_rates(first)