Skip to content

Commit

Permalink
formating
Browse files Browse the repository at this point in the history
  • Loading branch information
SilasK committed Aug 3, 2022
1 parent 144a13c commit 1eee69c
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 28 deletions.
7 changes: 3 additions & 4 deletions atlas/workflow/rules/genecatalog.smk
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ if (config["genecatalog"]["clustermethod"] == "linclust") or (
# cluster genes with cd-hit-est



elif config["genecatalog"]["clustermethod"] == "cd-hit-est":

include: "cdhit.smk"
Expand Down Expand Up @@ -280,7 +281,6 @@ rule combine_gene_coverages:
"../scripts/combine_gene_coverages.py"



###########
## EGG NOG
##########
Expand Down Expand Up @@ -437,7 +437,7 @@ checkpoint gene_subsets:
params:
subset_size=config["genecatalog"]["SubsetSize"],
conda:
"../envs/sequence_utils.yaml",
"../envs/sequence_utils.yaml"
log:
"logs/Genecatalog/clustering/split_genecatalog.log",
script:
Expand Down Expand Up @@ -490,8 +490,7 @@ rule gene2genome:
log:
"logs/genomes/annotations/gene2genome.log",
script:
"../scripts/gene2genome.py",

"../scripts/gene2genome.py"


# after combination need to add eggNOG headerself.
Expand Down
28 changes: 14 additions & 14 deletions atlas/workflow/rules/qc.smk
Original file line number Diff line number Diff line change
Expand Up @@ -158,20 +158,20 @@ rule get_read_stats:
tmp_file = os.path.join(subfolder, "read_stats.tmp")
shell(
"""
mkdir -p {subfolder} 2> {log}
reformat.sh {params_in} \
bhist={subfolder}/base_hist.txt \
qhist={subfolder}/quality_by_pos.txt \
lhist={subfolder}/readlength.txt \
gchist={subfolder}/gc_hist.txt \
gcbins=auto \
bqhist={subfolder}/boxplot_quality.txt \
threads={threads} \
overwrite=true \
-Xmx{mem}G \
2> >(tee -a {log} {tmp_file} )
""".format(
mkdir -p {subfolder} 2> {log}
reformat.sh {params_in} \
bhist={subfolder}/base_hist.txt \
qhist={subfolder}/quality_by_pos.txt \
lhist={subfolder}/readlength.txt \
gchist={subfolder}/gc_hist.txt \
gcbins=auto \
bqhist={subfolder}/boxplot_quality.txt \
threads={threads} \
overwrite=true \
-Xmx{mem}G \
2> >(tee -a {log} {tmp_file} )
""".format(
subfolder=subfolder,
params_in=params_in,
log=log,
Expand Down
6 changes: 2 additions & 4 deletions atlas/workflow/scripts/combine_gene_coverages.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,7 @@ def handle_exception(exc_type, exc_value, exc_traceback):

# add gene length to dataframe of counts
if cov_file == snakemake.input.covstats[0]:
combined_N_reads["Length"] = pd.to_numeric(
data.Length, downcast="unsigned"
)
combined_N_reads["Length"] = pd.to_numeric(data.Length, downcast="unsigned")

combined_cov[sample] = pd.to_numeric(data.Avg_fold, downcast="float")
combined_N_reads[sample] = pd.to_numeric(data.Reads, downcast="unsigned")
Expand All @@ -86,4 +84,4 @@ def handle_exception(exc_type, exc_value, exc_traceback):
gc.collect()

combined_cov.reset_index().to_parquet(snakemake.output[0])
del combined_cov
del combined_cov
6 changes: 2 additions & 4 deletions atlas/workflow/scripts/gene2genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,7 @@ def handle_exception(exc_type, exc_value, exc_traceback):
snakemake.input.old2newID, index_col=0, squeeze=True, sep="\t"
)

contigs2genome = (
contigs2bins.join(old2newID, on="Bin").dropna().drop("Bin", axis=1)
)
contigs2genome = contigs2bins.join(old2newID, on="Bin").dropna().drop("Bin", axis=1)
else:
contigs2genome = pd.read_csv(
snakemake.input.contigs2mags, index_col=0, squeeze=False, sep="\t", header=None
Expand Down Expand Up @@ -76,4 +74,4 @@ def handle_exception(exc_type, exc_value, exc_traceback):
).unstack(fill_value=0)

# save as parquet
gene2genome.reset_index().to_parquet(snakemake.output[0])
gene2genome.reset_index().to_parquet(snakemake.output[0])
4 changes: 3 additions & 1 deletion atlas/workflow/scripts/rename_genecatalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ def handle_exception(exc_type, exc_value, exc_traceback):
# from gene Nr to gene name
rep2gene = geneNr_to_string(map_genenr)

logging.info(f"Collect and rename representative genes according to:\n {rep2gene.head()}")
logging.info(
f"Collect and rename representative genes according to:\n {rep2gene.head()}"
)

assert rep2gene.shape[0] > 0

Expand Down
7 changes: 6 additions & 1 deletion atlas/workflow/scripts/split_genecatalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,9 @@ def handle_exception(exc_type, exc_value, exc_traceback):

from utils import fasta

fasta.split(snakemake.input[0], snakemake.params.subset_size, snakemake.output[0], simplify_headers=True)
fasta.split(
snakemake.input[0],
snakemake.params.subset_size,
snakemake.output[0],
simplify_headers=True,
)

0 comments on commit 1eee69c

Please sign in to comment.