-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmake_otutable_from_readmap.py
74 lines (61 loc) · 1.99 KB
/
make_otutable_from_readmap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# -*- coding: utf-8 -*-
"""
May 2015
@author: Andrew Dopheide
Makes a table of OTU counts from USEARCH readmap .uc file.
"""
import glob
import sys
label = sys.argv[1] # Label from usearch pipeline
#listing = os.listdir("./")
listing1 = glob.glob("*readmap.uc") # Readmap files
for infile in listing1:
file1 = [x for x in listing1 if x.startswith(label)][0]
readmap = open(file1, "r")
print("Processing %s..." % label)
samples = []
otus = []
unmatched = []
# Get samples and otus from readmap
print("Getting samples and otus from readmap...")
for row in readmap:
fields = row.strip().split("\t")
query = fields[8]
#print("query: %s" % query)
sample = query.split("|")[2]
if sample not in samples:
samples.append(sample)
if fields[0] is "H": # OTU hit
otu = fields[9]
if otu not in otus:
otus.append(otu)
# Set up bins and tables for otu counts
bins = [[0 for row in range(len(samples))] for col in range(len(otus))]
otutable = open(("%s_otutable.txt" % label), "w")
# Get counts from readmap
print("Processing readmap...")
readmap = open(file1, "r")
for row in readmap:
fields = row.strip().split("\t")
query = fields[8]
sample = query.split("|")[2]
if fields[0] is "H": # OTU hit
otu = fields[9]
bins[otus.index(otu)][samples.index(sample)] += 1
elif fields[0] is "N": # unmatched
hit = fields[8]
match = fields[8]
sample = hit.split("|")[2]
plot = hit.split("|")[3]
bins[otulist.index(match)][samples.index(sample)] += 1
# Write counts to table
for item in samples:
otutable.write("\t%s" % item)
n = 0
for row in bins:
otutable.write("\n%s" % otus[n])
for item in row:
otutable.write("\t%s" % item)
n += 1
otutable.close()
print("Finished %s" % label)