Skip to content

Commit

Permalink
Removed last bit of perl stuff and simplified organization
Browse files Browse the repository at this point in the history
  • Loading branch information
spficklin committed Oct 17, 2021
1 parent ad30c38 commit a157df4
Show file tree
Hide file tree
Showing 19 changed files with 40 additions and 54 deletions.
File renamed without changes.
2 changes: 0 additions & 2 deletions docker/README.md

This file was deleted.

15 changes: 0 additions & 15 deletions docker/pl.Dockerfile

This file was deleted.

16 changes: 4 additions & 12 deletions func_e/FUNC_E.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,31 +144,23 @@ def importTerms2FeaturesFiles(self, files):
def importFiles(self, fdict):
"""
"""
if fdict['background']:
if 'background' in fdict.keys():
self.importBackgroundFile(fdict['background'])
else:
raise Exception("A background file is required." )

if fdict['query']:
if 'query' in fdict.keys():
self.importQueryFile(fdict['query'])
else:
raise Exception("A query file is required." )

if fdict['terms']:
if 'terms' in fdict.keys():
if isinstance(fdict['terms'], list):
self.importTermsFiles(fdict['terms'])
else:
self.importTermsFiles([fdict['terms']])
else:
raise Exception("At least one term file is required." )

if fdict['terms2features']:
if 'terms2features' in fdict.keys():
if isinstance(fdict['terms2features'], list):
self.importTerms2FeaturesFiles(fdict['terms2features'])
else:
self.importTerms2FeaturesFiles([fdict['terms2features']])
else:
raise Exception("At least one term2features file is required." )

def doCounts(self):
"""
Expand Down
40 changes: 25 additions & 15 deletions func_e/cmd.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,32 @@
import os.path
from os import path
import argparse

from func_e.FUNC_E import FUNC_E
import func_e.vocabs.all as vocabs

def parseArgs():
def getTerms():
"""
Retrieves the arguments provided on the command-line.
"""
parser = argparse.ArgumentParser(description="This script generates files copmatible with the --terms argument for FUNC-E.")

parser.add_argument("--vocab", dest="vocab", type=str, nargs='*',
required=False, help="Optional. Specify the term vocabulary ID to perform enrichment and clustering. Provide as many vocabulary IDs as desired. Vocab IDs may include, for example, GO, IPR, KEGG, TOS, GNAME or whatever vocabularies are provided. Be sure that these vocabularies are present in the terms list or enrichment will be not be performed.")
parser.add_argument("--outprefix", dest="outprefix", type=str,
default=None, required=False, help="Optional. Provide a prefix for the output file.")

args = parser.parse_args()
terms = vocabs.getTerms(args.vocab)

outprefix = args.outprefix + '.' if args.outprefix else ''
terms.to_csv(outprefix + 'terms.tsv', index=None, sep="\t", header=None)


def func_e():
"""
"""

# Retrieves the arguments provided on the command-line.
parser = argparse.ArgumentParser(description="This script will perform functional enrichment and enriched term clustering on a list of genes.You must provide a background file of gene or transcript names, a network or query file, a set of vocabularies (e.g. GO, InterPro, etc), and a file mapping genes in the network or query file to the terms in the vocabularies. For information on the format of these files see the argument section below.")

parser.add_argument("--background", dest="background", type=str,
Expand All @@ -27,10 +47,10 @@ def parseArgs():
parser.add_argument("--outprefix", dest="outprefix", type=str,
default=None, required=False, help="Optional. Provide a prefix for the output reports.")

parser.add_argument("--module", dest="module", type=str,
parser.add_argument("--module", dest="module", type=str, default=[],
required=False, help="Optional. Specify a module name to limit the counting by module.")

parser.add_argument("--vocab", dest="vocab", type=str, nargs='*',
parser.add_argument("--vocab", dest="vocab", type=str, nargs='*', default=[],
required=False, help="Optional. Specify the term vocabulary ID to perform enrichment and clustering. Provide as many vocabulary IDs as desired. Vocab IDs may include, for example, GO, IPR, KEGG, TOS, GNAME or whatever vocabularies are provided. Be sure that these vocabularies are present in the terms list or enrichment will be not be performed.")

parser.add_argument("--similarity_threshold", dest="similarity_threshold", type=str,
Expand All @@ -54,17 +74,7 @@ def parseArgs():
parser.add_argument("--verbose", dest="verbose", type=float, default="1",
required=False, help="Optional. Set to 1 to print to STDOUT default progress deteails. Setto 2 for debugging logs. Set to 0 to run quietly without anything printed to STDOUT. The default value is 1.")

# TODO: make sure that the either the network or query arguments are
# provided.

return parser.parse_args()


def func_e():
"""
The main subrouting of FUNC-E.
"""
args = parseArgs()
args = parser.parse_args()

fe = FUNC_E()
fe.setVerbosity(args.verbose)
Expand Down
6 changes: 3 additions & 3 deletions func_e/vocabs/GO.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def getTerms():
line = line.decode("utf-8")
if re.search(r'^id: GO', line):
if len(cols.keys()) == 3:
terms_list.append([cols['Vocab'], cols['Term'], cols['Name']])
terms_list.append([cols['Vocabulary'], cols['Term'], cols['Name']])
cols = {}
m = re.search(r'^id: (GO:\d+)', line)
cols['Term'] = m.group(1)
Expand All @@ -21,6 +21,6 @@ def getTerms():
cols['Name'] = m.group(1)
if re.search(r'^namespace: ', line):
m = re.search(r'^namespace: (.+)', line)
cols['Vocab'] = 'GO' #m.group(1)
terms = pd.DataFrame(terms_list, columns=['Vocab', 'Term', 'Name'])
cols['Vocabulary'] = 'GO' #m.group(1)
terms = pd.DataFrame(terms_list, columns=['Vocabulary', 'Term', 'Name'])
return terms
2 changes: 1 addition & 1 deletion func_e/vocabs/IPR.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ def getTerms():
continue
cols = line.decode("utf-8").split("\t")
terms_list.append(['IPR', cols[0], cols[2]])
terms = pd.DataFrame(terms_list, columns=['Vocab', 'Term', 'Name'])
terms = pd.DataFrame(terms_list, columns=['Vocabulary', 'Term', 'Name'])
return terms
6 changes: 3 additions & 3 deletions func_e/vocabs/KEGG.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def getOrthologs():
for line in r.content.splitlines():
cols = line.decode("utf-8").split("\t")
terms_list.append(['KEGG', cols[0], cols[1]])
terms = pd.DataFrame(terms_list, columns=['Vocab', 'Term', 'Name'])
terms = pd.DataFrame(terms_list, columns=['Vocabulary', 'Term', 'Name'])
terms['Term'] = terms['Term'].str.replace(r'ko:','', regex=True)

return terms
Expand All @@ -26,7 +26,7 @@ def getPathways():
for line in r.content.splitlines():
cols = line.decode("utf-8").split("\t")
terms_list.append(['KEGG', cols[0], cols[1]])
terms = pd.DataFrame(terms_list, columns=['Vocab', 'Term', 'Name'])
terms = pd.DataFrame(terms_list, columns=['Vocabulary', 'Term', 'Name'])
terms['Term'] = terms['Term'].str.replace(r'path:map','ko', regex=True)
return terms

Expand All @@ -40,7 +40,7 @@ def getModules():
for line in r.content.splitlines():
cols = line.decode("utf-8").split("\t")
terms_list.append(['KEGG', cols[0], cols[1]])
terms = pd.DataFrame(terms_list, columns=['Vocab', 'Term', 'Name'])
terms = pd.DataFrame(terms_list, columns=['Vocabulary', 'Term', 'Name'])
terms['Term'] = terms['Term'].str.replace(r'md:','', regex=True)
return terms

Expand Down
4 changes: 2 additions & 2 deletions func_e/vocabs/Pfam.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

def getTerms():
return

url = 'http://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.full.gz'
r = requests.get(url, allow_redirects=True)

Expand All @@ -15,5 +15,5 @@ def getTerms():
continue
cols = line.decode("utf-8").split("\t")
terms_list.append(['Pfam', cols[1], cols[4]])
terms = pd.DataFrame(terms_list, columns=['Vocab', 'Term', 'Name'])
terms = pd.DataFrame(terms_list, columns=['Vocabulary', 'Term', 'Name'])
return terms
2 changes: 1 addition & 1 deletion func_e/vocabs/all.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
def getTerms(vocabs = []):
"""
"""
terms = pd.DataFrame(columns=['Vocab', 'Term', 'Name'])
terms = pd.DataFrame(columns=['Vocabulary', 'Term', 'Name'])
if 'GO' in vocabs:
terms = pd.concat([terms, GO_getTerms()])
if 'IPR' in vocabs:
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,6 @@
tests_require=['pytest'],
entry_points={'console_scripts': [
'FUNC-E = func_e.cmd:func_e',
'FUNC-E-terms = func_e.cmd:getTerms'
]},
)
Empty file added test/__init__.py
Empty file.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit a157df4

Please sign in to comment.