Skip to content

Commit

Permalink
Update analyze schema with Comments in previous PR. Added liting work…
Browse files Browse the repository at this point in the history
…flow
  • Loading branch information
luissian authored and saramonzon committed Jan 30, 2024
1 parent d212ab4 commit 0874569
Show file tree
Hide file tree
Showing 13 changed files with 3,628 additions and 1,296 deletions.
25 changes: 0 additions & 25 deletions .github/workflows/dockerhub_push_release.yml

This file was deleted.

35 changes: 35 additions & 0 deletions .github/workflows/python_lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: python_lint

on:
push:
paths:
- '**.py'
pull_request:
paths:
- '**.py'

jobs:
flake8_py3:
runs-on: ubuntu-latest
steps:
- name: Setup Python
uses: actions/setup-python@v1
with:
python-version: 3.9.x
architecture: x64
- name: Checkout PyTorch
uses: actions/checkout@master
- name: Install flake8
run: pip install flake8
- name: Run flake8
run: flake8 --ignore E501,W503,E203,W605

black_lint:
runs-on: ubuntu-latest
steps:
- name: Setup
uses: actions/checkout@v2
- name: Install black in jupyter
run: pip install black[jupyter]
- name: Check code lints with Black
uses: psf/black@stable
38 changes: 0 additions & 38 deletions .github/workflows/tests.yml

This file was deleted.

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from setuptools import setup, find_packages

version = "2.2.0"
version = "3.0.0"

with open("README.md") as f:
readme = f.read()
Expand Down
33 changes: 26 additions & 7 deletions taranis/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def run_taranis():
)

# stderr.print("[green] `._,._,'\n", highlight=False)
__version__ = "2.1.0"
__version__ = "3.0.0"
stderr.print(
"\n" "[grey39] Taranis version {}".format(__version__), highlight=False
)
Expand Down Expand Up @@ -166,6 +166,12 @@ def taranis_cli(verbose, log_file):
default=False,
help="Remove no CDS alleles from the schema.",
)
@click.option(
"--output-allele-annot/--no-output-allele-annot",
required=False,
default=True,
help="get extension annotation for all alleles in locus",
)
@click.option(
"--genus",
required=False,
Expand All @@ -184,29 +190,41 @@ def taranis_cli(verbose, log_file):
default="Genus",
help="Use genus-specific BLAST databases for Prokka schema genes annotation (needs --genus). Default is False.",
)
@click.option(
"--cpus",
required=False,
multiple=False,
type=int,
default=1,
help="Number of cpus used for execution",
)
def analyze_schema(
inputdir,
output,
remove_subset,
remove_duplicated,
remove_no_cds,
output_allele_annot,
genus,
species,
usegenus,
cpus,
):
schema_files = taranis.utils.get_files_in_folder(inputdir, "fasta")

"""
schema_analyze = {}
schema_analyze = []
for schema_file in schema_files:
schema_obj = taranis.analyze_schema.AnalyzeSchema(schema_file, output, remove_subset, remove_duplicated, remove_no_cds, genus, species, usegenus)
schema_analyze.update(schema_obj.analyze_allele_in_schema())
"""
schema_analyze.append(schema_obj.analyze_allele_in_schema())
import pdb; pdb.set_trace()
_ = taranis.analyze_schema.collect_statistics(schema_analyze, output, output_allele_annot)
sys.exit(0)
# for schema_file in schema_files:
"""
results = []
start = time.perf_counter()
with concurrent.futures.ProcessPoolExecutor() as executor:
with concurrent.futures.ProcessPoolExecutor(max_workers=cpus) as executor:
futures = [
executor.submit(
taranis.analyze_schema.parallel_execution,
Expand All @@ -224,10 +242,11 @@ def analyze_schema(
# Collect results as they complete
for future in concurrent.futures.as_completed(futures):
results.append(future.result())
_ = taranis.analyze_schema.collect_statistics(results, output)
_ = taranis.analyze_schema.collect_statistics(results, output, output_allele_annot)
finish = time.perf_counter()
print(f"Schema analyze finish in {round((finish-start)/60, 2)} minutes")


# Reference alleles
@taranis_cli.command(help_priority=2)
@click.option(
Expand Down
49 changes: 33 additions & 16 deletions taranis/allele_calling.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@

import taranis.utils
import taranis.blast

# import numpy
import pandas as pd
from pathlib import Path


import pdb

log = logging.getLogger(__name__)
stderr = rich.console.Console(
stderr=True,
Expand All @@ -19,6 +21,7 @@
force_terminal=taranis.utils.rich_force_colors(),
)


class AlleleCalling:
def __init__(self, prediction, sample_file, schema, reference_alleles, out_folder):
self.prediction = prediction
Expand All @@ -27,9 +30,25 @@ def __init__(self, prediction, sample_file, schema, reference_alleles, out_folde
self.ref_alleles = reference_alleles
self.out_folder = out_folder
self.s_name = Path(sample_file).stem
self.blast_dir = os.path.join(out_folder,"blastdb")
self.blast_dir = os.path.join(out_folder, "blastdb")
self.blast_sample = os.path.join(self.blast_dir, self.s_name)
self.blast_heading = ["qseqid", "sseqid", "pident", "qlen", "length", "mismatch", "gapopen", "evalue", "bitscore", "sstart", "send", "qstart", "qend", "sseq", "qseq"]
self.blast_heading = [
"qseqid",
"sseqid",
"pident",
"qlen",
"length",
"mismatch",
"gapopen",
"evalue",
"bitscore",
"sstart",
"send",
"qstart",
"qend",
"sseq",
"qseq",
]

def assign_allele_type(self, query_seq, allele_name, sample_contig, schema_gene):
"""_summary_
Expand All @@ -39,38 +58,36 @@ def assign_allele_type(self, query_seq, allele_name, sample_contig, schema_gene)
allele_name (_type_): _description_
sample_contig (_type_): _description_
schema_gene (_type_): _description_
"""
"""
s_alleles_blast = taranis.blast.Blast("nucl")
ref_allele_blast_dir = os.path.join(self.blast_dir, "ref_alleles")
query_path = os.path.join(self.out_folder, "tmp", allele_name)
# Write to file the sequence to find out the loci name that fully match
# Write to file the sequence to find out the loci name that fully match
f_name = taranis.utils.write_fasta_file(query_path, query_seq, allele_name)
query_file = os.path.join(query_path, f_name)
_ = s_alleles_blast.create_blastdb(schema_gene, ref_allele_blast_dir)
# Blast with sample sequence to find the allele in the schema
# Blast with sample sequence to find the allele in the schema
seq_blast_match = s_alleles_blast.run_blast(query_file, perc_identity=100)
pdb.set_trace()
if len(seq_blast_match) >= 1:
# allele is named as NIPHEM
# allele is named as NIPHEM

# Hacer un blast con la query esta secuencia y la database del alelo
# Create blast db with sample file


pass
elif len(seq_blast_match) == 1:
pass
else:
pass


def search_alleles (self, ref_allele):
def search_alleles(self, ref_allele):
allele_name = Path(ref_allele).stem
schema_gene = os.path.join(self.schema, allele_name + ".fasta")
schema_gene = os.path.join(self.schema, allele_name + ".fasta")
allele_name = Path(ref_allele).stem
# run blast with sample as db and reference allele as query
sample_blast_match = self.sample_blast.run_blast(ref_allele)
if len(sample_blast_match) > 0 :
if len(sample_blast_match) > 0:
pd_lines = pd.DataFrame([item.split("\t") for item in sample_blast_match])
pd_lines.columns = self.blast_heading
pd_lines["pident"] = pd_lines["pident"].apply(pd.to_numeric)
Expand All @@ -84,16 +101,17 @@ def search_alleles (self, ref_allele):
# sel_row = np_lines[mask, :] = np_lines[mask, :]
# query_seq = sel_row[0,14]
sample_contig = sel_max["sseqid"]
abbr = self.assign_allele_type(query_seq, allele_name, sample_contig, schema_gene)
abbr = self.assign_allele_type(
query_seq, allele_name, sample_contig, schema_gene
)
else:
# Sample does not have a reference allele to be matched
# Keep LNF info
# ver el codigo de espe
#lnf_tpr_tag()
# lnf_tpr_tag()
pass
pdb.set_trace()


def analyze_sample(self):
# Create blast db with sample file
self.sample_blast = taranis.blast.Blast("nucl")
Expand All @@ -107,4 +125,3 @@ def analyze_sample(self):

pdb.set_trace()
return

Loading

0 comments on commit 0874569

Please sign in to comment.