Skip to content

Commit

Permalink
Added functions to get terms
Browse files Browse the repository at this point in the history
  • Loading branch information
spficklin committed Oct 17, 2021
1 parent 8020690 commit 30d29ba
Show file tree
Hide file tree
Showing 22 changed files with 132 additions and 50 deletions.
4 changes: 0 additions & 4 deletions bin/AraCyc.sh

This file was deleted.

3 changes: 0 additions & 3 deletions bin/GO.sh

This file was deleted.

3 changes: 0 additions & 3 deletions bin/IPR.sh

This file was deleted.

16 changes: 0 additions & 16 deletions bin/KEGG.sh

This file was deleted.

2 changes: 0 additions & 2 deletions bin/PO.sh

This file was deleted.

6 changes: 0 additions & 6 deletions bin/Pfam.sh

This file was deleted.

13 changes: 0 additions & 13 deletions bin/README.md

This file was deleted.

1 change: 0 additions & 1 deletion bin/RiceCyc.sh

This file was deleted.

2 changes: 0 additions & 2 deletions func_e/FUNC_E.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,8 +550,6 @@ def _calculateClusterStats(self, clusters, module):
}, ignore_index=True)
return cluster_stats



def doModuleClustering(self, module):
"""
"""
Expand Down
26 changes: 26 additions & 0 deletions func_e/vocabs/GO.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import requests
import re
import pandas as pd

def getTerms():
url = 'http://purl.obolibrary.org/obo/go.obo'
r = requests.get(url, allow_redirects=True)

terms_list = []
cols = {}
for line in r.content.splitlines():
line = line.decode("utf-8")
if re.search(r'^id: GO', line):
if len(cols.keys()) == 3:
terms_list.append([cols['Vocab'], cols['Term'], cols['Name']])
cols = {}
m = re.search(r'^id: (GO:\d+)', line)
cols['Term'] = m.group(1)
if re.search(r'^name: ', line):
m = re.search(r'^name: (.+)', line)
cols['Name'] = m.group(1)
if re.search(r'^namespace: ', line):
m = re.search(r'^namespace: (.+)', line)
cols['Vocab'] = 'GO' #m.group(1)
terms = pd.DataFrame(terms_list, columns=['Vocab', 'Term', 'Name'])
return terms
17 changes: 17 additions & 0 deletions func_e/vocabs/IPR.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import requests
import pandas as pd

def getTerms():
url = 'http://ftp.ebi.ac.uk/pub/databases/interpro/entry.list'
r = requests.get(url, allow_redirects=True)

terms_list = []
in_header = True
for line in r.content.splitlines():
if in_header:
in_header = False
continue
cols = line.decode("utf-8").split("\t")
terms_list.append(['IPR', cols[0], cols[2]])
terms = pd.DataFrame(terms_list, columns=['Vocab', 'Term', 'Name'])
return terms
52 changes: 52 additions & 0 deletions func_e/vocabs/KEGG.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import requests
import pandas as pd

def getOrthologs():
"""
"""
url = 'http://rest.kegg.jp/list/ko'
r = requests.get(url, allow_redirects=True)

terms_list = []
for line in r.content.splitlines():
cols = line.decode("utf-8").split("\t")
terms_list.append(['KEGG', cols[0], cols[1]])
terms = pd.DataFrame(terms_list, columns=['Vocab', 'Term', 'Name'])
terms['Term'] = terms['Term'].str.replace(r'ko:','', regex=True)

return terms

def getPathways():
"""
"""
url = 'http://rest.kegg.jp/list/pathway'
r = requests.get(url, allow_redirects=True)

terms_list = []
for line in r.content.splitlines():
cols = line.decode("utf-8").split("\t")
terms_list.append(['KEGG', cols[0], cols[1]])
terms = pd.DataFrame(terms_list, columns=['Vocab', 'Term', 'Name'])
terms['Term'] = terms['Term'].str.replace(r'path:map','ko', regex=True)
return terms

def getModules():
"""
"""
url = 'http://rest.kegg.jp/list/md'
r = requests.get(url, allow_redirects=True)

terms_list = []
for line in r.content.splitlines():
cols = line.decode("utf-8").split("\t")
terms_list.append(['KEGG', cols[0], cols[1]])
terms = pd.DataFrame(terms_list, columns=['Vocab', 'Term', 'Name'])
terms['Term'] = terms['Term'].str.replace(r'md:','', regex=True)
return terms

def getTerms():
modules = getModules()
orthologs = getOrthologs()
pathways = getPathways()

return pd.concat([pathways, orthologs, modules])
19 changes: 19 additions & 0 deletions func_e/vocabs/Pfam.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import requests
import pandas as pd

def getTerms():
return

url = 'http://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.full.gz'
r = requests.get(url, allow_redirects=True)

terms_list = []
in_header = True
for line in r.content.splitlines():
if in_header:
in_header = False
continue
cols = line.decode("utf-8").split("\t")
terms_list.append(['Pfam', cols[1], cols[4]])
terms = pd.DataFrame(terms_list, columns=['Vocab', 'Term', 'Name'])
return terms
Empty file added func_e/vocabs/__init__.py
Empty file.
Binary file added func_e/vocabs/__pycache__/GO.cpython-38.pyc
Binary file not shown.
Binary file added func_e/vocabs/__pycache__/IPR.cpython-38.pyc
Binary file not shown.
Binary file added func_e/vocabs/__pycache__/KEGG.cpython-38.pyc
Binary file not shown.
Binary file added func_e/vocabs/__pycache__/__init__.cpython-38.pyc
Binary file not shown.
Binary file added func_e/vocabs/__pycache__/all.cpython-38.pyc
Binary file not shown.
16 changes: 16 additions & 0 deletions func_e/vocabs/all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from .GO import getTerms as GO_getTerms
from .KEGG import getTerms as KEGG_getTerms
from .IPR import getTerms as IPR_getTerms
import pandas as pd

def getTerms(vocabs = []):
"""
"""
terms = pd.DataFrame(columns=['Vocab', 'Term', 'Name'])
if 'GO' in vocabs:
terms = pd.concat([terms, GO_getTerms()])
if 'IPR' in vocabs:
terms = pd.concat([terms, IPR_getTerms()])
if 'KEGG' in vocabs:
terms = pd.concat([terms, KEGG_getTerms()])
return terms
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ statsmodels
sklearn
progressbar2
scipy
requests
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
sklearn
progressbar2
scipy
requests
""".split()

setup(
Expand Down

0 comments on commit 30d29ba

Please sign in to comment.