forked from eatkinson/ancestry_pipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclasses.py
50 lines (41 loc) · 1.44 KB
/
classes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#takes in a sample file output by shapeit2rfmix and writes classes
import argparse
import os
def read_ref(anc):
anc_set = set()
anc = open(anc)
for line in anc:
try:
anc_set.add(line.strip().split()[1]) #split?
except IndexError:
raise IOError('Input misspecified. 2nd column needs to correspond with individual ID.')
return(anc_set)
def main(args):
ref = args.ref.strip().split(',')
ancs = []
for anc in ref:
ancs.append(read_ref(anc))
ind_order = []
out = open(args.out, 'w')
sample = open(args.sample)
for line in sample:
line = line.strip()
if line == 'ID_1 ID_2 missing father mother sex plink_pheno':
raise IOError('sample file must be list of individual IDs output by shapeit2rfmix with order of inds in alleles file, not shapeit sample file.')
ind_order.append(line)
in_ref = 0
for anc in range(len(ancs)):
in_ref = in_ref + int(line in ancs[anc])
if line in ancs[anc]:
out.write(str(anc + 1) + ' ' + str(anc + 1) + ' ')
if in_ref == 0:
out.write('0 0 ')
out.write('\n')
out.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--ref', required=True)
parser.add_argument('--sample', required=True)
parser.add_argument('--out', required=True)
args = parser.parse_args()
main(args)