forked from harbi811/genotyping_repeats_tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathqc.py
52 lines (46 loc) · 1.35 KB
/
qc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env python3
import datetime
import sys
import time
from collections import defaultdict
pop = sys.argv[1]
samples = []
ref_samples = defaultdict(int)
non_ref_samples = defaultdict(int)
while True:
line = sys.stdin.readline()
if not line:
break
if line[0] == "#":
if line[1] == "C":
samples = line.split("\t")
samples = samples[9:]
samples = [sample.strip() for sample in samples]
else:
continue
else:
calls = line.split("\t")
alts = calls[4]
ref = calls[3]
pos = calls[1]
period = int(calls[7].split(";")[2].replace("PERIOD=", ""))
if period == 1:
continue
calls = calls[9:]
assert (len(calls) == len(samples))
for i in range(len(calls)):
call = calls[i].split(":")
gt = call[0]
if gt == ".":
continue
gt = gt.split("/")
for al in gt:
if al == "0":
ref_samples[samples[i]] += 1
else:
non_ref_samples[samples[i]] += 1
print(ref_samples)
with open(f"{pop}_no_homo.txt",'w') as f:
# f.write("Sample\tref_chr\tnon_ref_chr\n") # Writing the header
for key in ref_samples:
f.write(f"{key}\t{ref_samples[key]}\t{non_ref_samples[key]}\n")