Skip to content

Commit

Permalink
fix cluster/flanking annotated of SM genes
Browse files Browse the repository at this point in the history
  • Loading branch information
CFMR NGS authored and CFMR NGS committed Jun 6, 2018
1 parent db7c8e9 commit 617f92d
Showing 1 changed file with 16 additions and 8 deletions.
24 changes: 16 additions & 8 deletions bin/funannotate-functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,9 +654,9 @@ def parseEggNoggMapper(input, output, GeneDict):
RawProductNames = os.path.join(outputdir, 'annotate_misc', 'uniprot_eggnog_raw_names.txt')
#GeneDict[ID] = [{'name': passname, 'product': final_desc}]
with open(RawProductNames, 'w') as uniprottmp:
for k,v in natsorted(GeneProducts.items()):
for x in v: #v is list of dictionaries
uniprottmp.write('{:}\t{:}\t{:}\t{:}\n'.format(k, x['name'], x['product'], x['source']))
for k,v in natsorted(GeneProducts.items()):
for x in v: #v is list of dictionaries
uniprottmp.write('{:}\t{:}\t{:}\t{:}\n'.format(k, x['name'], x['product'], x['source']))

#combine the results from UniProt and Eggnog to parse Gene names and product descriptions
#load curated list
Expand Down Expand Up @@ -1047,10 +1047,17 @@ def parseEggNoggMapper(input, output, GeneDict):
lib.log.info("Cross referencing SM cluster hits with MIBiG database version %s" % versDB.get('mibig'))
#do a blast best hit search against MIBiG database for cluster annotation, but looping through gene cluster hits
AllProts = []
SMgenes = []
for k, v in lib.dictClusters.items():
for i in v:
if '-T' in i:
ID = i.split('-T')[0]
else:
ID = i
if not i in AllProts:
AllProts.append(i)
if not ID in SMgenes:
SMgenes.append(ID)
AllProts = set(AllProts)
mibig_fasta = os.path.join(AntiSmashFolder, 'smcluster.proteins.fasta')
mibig_blast = os.path.join(AntiSmashFolder, 'smcluster.MIBiG.blast.txt')
Expand All @@ -1071,9 +1078,9 @@ def parseEggNoggMapper(input, output, GeneDict):
for line in input:
cols = line.split('\t')
if '-T' in cols[0]:
ID = cols[0].split('-T')[0]
ID = cols[0].split('-T')[0]
else:
ID = cols[0]
ID = cols[0]
hit = cols[1].split('|')
desc = hit[5]
cluster = hit[0]
Expand Down Expand Up @@ -1151,7 +1158,10 @@ def parseEggNoggMapper(input, output, GeneDict):
pFAM = []
IPR = []
eggnogDesc = 'NA'
location = 'flanking'
if name in SMgenes:
location = 'cluster'
else:
location = 'flanking'
cog = '.'
for k,v in f.qualifiers.items():
if k == 'note':
Expand All @@ -1166,8 +1176,6 @@ def parseEggNoggMapper(input, output, GeneDict):
goTerms.append(goterm)
elif i.startswith('SMCOG'):
cog = i
elif i.startswith('antiSMASH:'):
location = 'cluster'
else:
note.append(i)
if k == 'db_xref':
Expand Down

0 comments on commit 617f92d

Please sign in to comment.