From ffc554429b8433d6cc37157b6832c864eeb3acf6 Mon Sep 17 00:00:00 2001 From: Leighton Pritchard Date: Tue, 11 Feb 2020 19:01:03 +0000 Subject: [PATCH 1/4] fix issue #178 This was a knotty problem. The user had input files whose names were all interpretable as floating-point numbers. When loading the output CSV files, the names of files were interpreted as float64 datatypes, but only in the dataframe index, not the headers. This meant that the label/class files were not being used, and the sequence names in the graphical output had extra digits where the floats did not have an exact representation. The solution was to read the CSV file without the `index_col=0` argument, but specifying that column zero shoul dbe the str datatype. Once loaded, this column could be specified as a new index for the dataframe. This fix forms the basis for release 0.2.10 --- bin/average_nucleotide_identity.py | 17 ++++++++++++----- pyani/pyani_graphics.py | 6 +++--- pyani/pyani_tools.py | 4 ++-- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/bin/average_nucleotide_identity.py b/bin/average_nucleotide_identity.py index 227a6560..ebee50e1 100755 --- a/bin/average_nucleotide_identity.py +++ b/bin/average_nucleotide_identity.py @@ -114,7 +114,7 @@ # --makeblastdb_exe=MAKEBLASTDB_EXE # Path to BLAST+ makeblastdb executable # -# (c) The James Hutton Institute 2013-2019 +# (c) The James Hutton Institute 2013-2020 # Author: Leighton Pritchard # # Contact: @@ -132,7 +132,7 @@ # # The MIT License # -# Copyright (c) 2010-2019 The James Hutton Institute +# Copyright (c) 2010-2020 The James Hutton Institute # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -792,12 +792,19 @@ def draw(filestems, gformat): fullstem = os.path.join(args.outdirname, filestem) outfilename = fullstem + ".%s" % gformat infilename = fullstem + ".tab" - df = pd.read_csv(infilename, index_col=0, sep="\t") + # As some people want to provide input files whose names look like + # floating point numbers, we need to guard against pandas interpreting + # them as such, and creating a Float64Index. This requires us to read + #  the .csv as if it had no index, specify column 0 as string datatype, + # then reindex the dataframe with that column. + df = pd.read_csv(infilename, sep="\t", dtype={"Unnamed: 0": str}) + df.set_index("Unnamed: 0", inplace=True) + df = df.reindex(df.index.rename("Genomes")) logger.info("Writing heatmap to %s", outfilename) params = pyani_graphics.Params( params_mpl(df)[filestem], - pyani_tools.get_labels(args.labels), - pyani_tools.get_labels(args.classes), + pyani_tools.get_labels(args.labels, logger=logger), + pyani_tools.get_labels(args.classes, logger=logger), ) if args.gmethod == "mpl": pyani_graphics.heatmap_mpl( diff --git a/pyani/pyani_graphics.py b/pyani/pyani_graphics.py index 3894c5a7..faac9687 100644 --- a/pyani/pyani_graphics.py +++ b/pyani/pyani_graphics.py @@ -1,4 +1,4 @@ -# Copyright 2013-2019, The James Hutton Insitute +# Copyright 2013-2020, The James Hutton Insitute # Author: Leighton Pritchard # # This code is part of the pyani package, and is governed by its licence. @@ -102,8 +102,8 @@ def get_seaborn_colorbar(dfr, classes): def get_safe_seaborn_labels(dfr, labels): """Returns labels guaranteed to correspond to the dataframe.""" if labels is not None: - return [labels.get(i, i) for i in dfr.index] - return [i for i in dfr.index] + return [labels.get(str(i), str(i)) for i in dfr.index] + return [str(i) for i in dfr.index] # Return a clustermap diff --git a/pyani/pyani_tools.py b/pyani/pyani_tools.py index d39eaa83..27e1af57 100644 --- a/pyani/pyani_tools.py +++ b/pyani/pyani_tools.py @@ -1,4 +1,4 @@ -# Copyright 2016-2019, The James Hutton Insitute +# Copyright 2016-2020, The James Hutton Insitute # Author: Leighton Pritchard # # This code is part of the pyani package, and is governed by its licence. @@ -152,5 +152,5 @@ def get_labels(filename, logger=None): logger.warning("(skipping line)") continue else: - labeldict[key] = label + labeldict[str(key)] = str(label) return labeldict From d61ce10d2cdf80589048a4ddfd77423e5d5a5e96 Mon Sep 17 00:00:00 2001 From: Leighton Pritchard Date: Tue, 11 Feb 2020 19:06:00 +0000 Subject: [PATCH 2/4] update CHANGES.md for issue #178 fix --- CHANGES.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 7592f838..cfc24bde 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,9 @@ # CHANGES.md +## v0.2.10 + +- fix Issue #178: input filenames that could be interpreted as floats may break `labels.txt`/`classes.txt` integration, and graphical output + ## v0.2.9 - fix 1ssue #132: TETRA would fail if not all 4-mers present in all input sequences From f85b412ec0224a39ee69d39fa1d1268399d7ae11 Mon Sep 17 00:00:00 2001 From: Leighton Pritchard Date: Wed, 12 Feb 2020 08:11:23 +0000 Subject: [PATCH 3/4] update copyright notices and fix linting on modified files --- CHANGES.md | 3 +- bin/average_nucleotide_identity.py | 299 ++++++++++++++--------------- pyani/pyani_graphics.py | 66 +++++-- pyani/pyani_tools.py | 60 ++++-- 4 files changed, 244 insertions(+), 184 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index cfc24bde..3023d174 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -51,8 +51,8 @@ - Changes to `--noclobber` log behaviour (issue #79) - fixed `--rerender` code (issue #85) - ## v0.2.3 + - fixes a bug in the installed scripts where the shebang (`#!`) in wheel and egg packages pointed to a development Python ## v0.2.2 @@ -65,7 +65,6 @@ - removed requirement for `rpy2` - moved scripts to `bin/` subdirectory - ## v0.2.1 - `pyani` now requires `rpy2` v2.8.0 in order to satisfy running under Anaconda (see issue #26) diff --git a/bin/average_nucleotide_identity.py b/bin/average_nucleotide_identity.py index ebee50e1..b53d9b6a 100755 --- a/bin/average_nucleotide_identity.py +++ b/bin/average_nucleotide_identity.py @@ -1,138 +1,23 @@ #!/usr/bin/env python3 -# -# average_nucleotide_identity.py -# -# This script calculates Average Nucleotide Identity (ANI) according to one of -# a number of alternative methods described in, e.g. -# -# Richter M, Rossello-Mora R (2009) Shifting the genomic gold standard for the -# prokaryotic species definition. Proc Natl Acad Sci USA 106: 19126-19131. -# doi:10.1073/pnas.0906412106. (ANI1020, ANIm, ANIb) -# -# Goris J, Konstantinidis KT, Klappenbach JA, Coenye T, Vandamme P, et al. -# (2007) DNA-DNA hybridization values and their relationship to whole-genome -# sequence similarities. Int J Syst Evol Micr 57: 81-91. -# doi:10.1099/ijs.0.64483-0. -# -# ANI is proposed to be the appropriate in silico substitute for DNA-DNA -# hybridisation (DDH), and so useful for delineating species boundaries. A -# typical percentage threshold for species boundary in the literature is 95% -# ANI (e.g. Richter et al. 2009). -# -# All ANI methods follow the basic algorithm: -# - Align the genome of organism 1 against that of organism 2, and identify -# the matching regions -# - Calculate the percentage nucleotide identity of the matching regions, as -# an average for all matching regions -# Methods differ on: (1) what alignment algorithm is used, and the choice of -# parameters (this affects the aligned region boundaries); (2) what the input -# is for alignment (typically either fragments of fixed size, or the most -# complete assembly available); (3) whether a reciprocal comparison is -# necessary or desirable. -# -# ANIm: uses MUMmer (NUCmer) to align the input sequences. -# ANIb: uses BLASTN to align 1000nt fragments of the input sequences -# TETRA: calculates tetranucleotide frequencies of each input sequence -# -# This script takes as main input a directory containing a set of -# correctly-formatted FASTA multiple sequence files. All sequences for a -# single organism should be contained in only one sequence file. The names of -# these files are used for identification, so it would be advisable to name -# them sensibly. -# -# Output is written to a named directory. The output files differ depending on -# the chosen ANI method. -# -# ANIm: MUMmer/NUCmer .delta files, describing the sequence -# alignment; tab-separated format plain text tables describing total -# alignment lengths, and total alignment percentage identity -# -# ANIb: FASTA sequences describing 1000nt fragments of each input sequence; -# BLAST nucleotide databases - one for each set of fragments; and BLASTN -# output files (tab-separated tabular format plain text) - one for each -# pairwise comparison of input sequences. There are potentially a lot of -# intermediate files. -# -# TETRA: Tab-separated text file describing the Z-scores for each -# tetranucleotide in each input sequence. -# -# In addition, all methods produce a table of output percentage identity (ANIm -# and ANIb) or correlation (TETRA), between each sequence. -# -# If graphical output is chosen, the output directory will also contain PDF -# files representing the similarity between sequences as a heatmap with -# row and column dendrograms. -# -# DEPENDENCIES -# ============ -# -# o Biopython (http://www.biopython.org) -# -# o BLAST+ executable in the $PATH, or available on the command line (ANIb) -# (ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/) -# -# o MUMmer executables in the $PATH, or available on the command line (ANIm) -# (http://mummer.sourceforge.net/) -# -# For graphical output -# -------------------- -# -# o R with shared libraries installed on the system, for graphical output -# (http://cran.r-project.org/) -# -# o Rpy2 (http://rpy.sourceforge.net/rpy2.html) -# -# -# USAGE -# ===== -# -# calculate_ani.py [options] -# -# Options: -# -h, --help show this help message and exit -# -o OUTDIRNAME, --outdir=OUTDIRNAME -# Output directory -# -i INDIRNAME, --indir=INDIRNAME -# Input directory name -# -v, --verbose Give verbose output -# -f, --force Force file overwriting -# -s, --fragsize Sequence fragment size for ANIb -# --skip_nucmer Skip NUCmer runs, for testing (e.g. if output already -# present) -# --skip_blast Skip BLAST runs, for testing (e.g. if output already -# present) -# --noclobber Don't nuke existing files -# -g, --graphics Generate heatmap of ANI -# -m METHOD, --method=METHOD -# ANI method -# --maxmatch Override MUMmer settings and allow all matches in -# NUCmer -# --nucmer_exe=NUCMER_EXE -# Path to NUCmer executable -# --blast_exe=BLAST_EXE -# Path to BLASTN+ executable -# --makeblastdb_exe=MAKEBLASTDB_EXE -# Path to BLAST+ makeblastdb executable -# -# (c) The James Hutton Institute 2013-2020 +# -*- coding: utf-8 -*- +# (c) The James Hutton Institute 2013-2019 +# (c) University of Strathclyde 2019-2020 # Author: Leighton Pritchard # -# Contact: -# leighton.pritchard@hutton.ac.uk +# Contact: leighton.pritchard@strath.ac.uk # # Leighton Pritchard, -# Information and Computing Sciences, -# James Hutton Institute, -# Errol Road, -# Invergowrie, -# Dundee, -# DD6 9LH, +# Strathclyde Institute for Pharmacy and Biomedical Sciences, +# Cathedral Street, +# Glasgow, +# G4 0RE # Scotland, # UK # # The MIT License # -# Copyright (c) 2010-2020 The James Hutton Institute +# Copyright (c) 2013-2019 The James Hutton Institute +# Copyright (c) 2019-2020 University of Strathclyde # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -151,6 +36,120 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. +"""Provides average_nucleotide_identity.py script. + +This script calculates Average Nucleotide Identity (ANI) according to one of +a number of alternative methods described in, e.g. + +Richter M, Rossello-Mora R (2009) Shifting the genomic gold standard for the +prokaryotic species definition. Proc Natl Acad Sci USA 106: 19126-19131. +doi:10.1073/pnas.0906412106. (ANI1020, ANIm, ANIb) + +Goris J, Konstantinidis KT, Klappenbach JA, Coenye T, Vandamme P, et al. +(2007) DNA-DNA hybridization values and their relationship to whole-genome +sequence similarities. Int J Syst Evol Micr 57: 81-91. +doi:10.1099/ijs.0.64483-0. + +ANI is proposed to be the appropriate in silico substitute for DNA-DNA +hybridisation (DDH), and so useful for delineating species boundaries. A +typical percentage threshold for species boundary in the literature is 95% +ANI (e.g. Richter et al. 2009). + +All ANI methods follow the basic algorithm: +- Align the genome of organism 1 against that of organism 2, and identify + the matching regions +- Calculate the percentage nucleotide identity of the matching regions, as + an average for all matching regions +Methods differ on: (1) what alignment algorithm is used, and the choice of +parameters (this affects the aligned region boundaries); (2) what the input +is for alignment (typically either fragments of fixed size, or the most +complete assembly available); (3) whether a reciprocal comparison is +necessary or desirable. + +ANIm: uses MUMmer (NUCmer) to align the input sequences. +ANIb: uses BLASTN to align 1000nt fragments of the input sequences +TETRA: calculates tetranucleotide frequencies of each input sequence + +This script takes as main input a directory containing a set of +correctly-formatted FASTA multiple sequence files. All sequences for a +single organism should be contained in only one sequence file. The names of +these files are used for identification, so it would be advisable to name +them sensibly. + +Output is written to a named directory. The output files differ depending on +the chosen ANI method. + +ANIm: MUMmer/NUCmer .delta files, describing the sequence + alignment; tab-separated format plain text tables describing total + alignment lengths, and total alignment percentage identity + +ANIb: FASTA sequences describing 1000nt fragments of each input sequence; + BLAST nucleotide databases - one for each set of fragments; and BLASTN + output files (tab-separated tabular format plain text) - one for each + pairwise comparison of input sequences. There are potentially a lot of + intermediate files. + +TETRA: Tab-separated text file describing the Z-scores for each + tetranucleotide in each input sequence. + +In addition, all methods produce a table of output percentage identity (ANIm +and ANIb) or correlation (TETRA), between each sequence. + +If graphical output is chosen, the output directory will also contain PDF +files representing the similarity between sequences as a heatmap with +row and column dendrograms. + +DEPENDENCIES +============ + +o Biopython (http://www.biopython.org) + +o BLAST+ executable in the $PATH, or available on the command line (ANIb) + (ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/) + +o MUMmer executables in the $PATH, or available on the command line (ANIm) + (http://mummer.sourceforge.net/) + +For graphical output +-------------------- + +o R with shared libraries installed on the system, for graphical output + (http://cran.r-project.org/) + +o Rpy2 (http://rpy.sourceforge.net/rpy2.html) + + +USAGE +===== + +calculate_ani.py [options] + +Options: + -h, --help show this help message and exit + -o OUTDIRNAME, --outdir=OUTDIRNAME + Output directory + -i INDIRNAME, --indir=INDIRNAME + Input directory name + -v, --verbose Give verbose output + -f, --force Force file overwriting + -s, --fragsize Sequence fragment size for ANIb + --skip_nucmer Skip NUCmer runs, for testing (e.g. if output already + present) + --skip_blast Skip BLAST runs, for testing (e.g. if output already + present) + --noclobber Don't nuke existing files + -g, --graphics Generate heatmap of ANI + -m METHOD, --method=METHOD + ANI method + --maxmatch Override MUMmer settings and allow all matches in + NUCmer + --nucmer_exe=NUCMER_EXE + Path to NUCmer executable + --blast_exe=BLAST_EXE + Path to BLASTN+ executable + --makeblastdb_exe=MAKEBLASTDB_EXE + Path to BLAST+ makeblastdb executable +""" import json import logging @@ -437,8 +436,7 @@ def parse_cmdline(): # Report last exception as string def last_exception(): - """ Returns last exception as a string, or use in logging. - """ + """Return last exception as a string, or use in logging.""" exc_type, exc_value, exc_traceback = sys.exc_info() return "".join(traceback.format_exception(exc_type, exc_value, exc_traceback)) @@ -460,7 +458,7 @@ def make_outdir(): if os.path.exists(args.outdirname): if not args.force: logger.error( - "Output directory %s would overwrite existing " + "files (exiting)", + "Output directory %s would overwrite existing files (exiting)", args.outdirname, ) sys.exit(1) @@ -542,7 +540,7 @@ def calculate_anim(infiles, org_lengths): if args.scheduler == "multiprocessing": logger.info("Running jobs with multiprocessing") if args.workers is None: - logger.info("(using maximum number of available " + "worker threads)") + logger.info("(using maximum number of available worker threads)") else: logger.info("(using %d worker threads, if available)", args.workers) cumval = run_mp.run_dependency_graph( @@ -550,9 +548,7 @@ def calculate_anim(infiles, org_lengths): ) logger.info("Cumulative return value: %d", cumval) if 0 < cumval: - logger.warning( - "At least one NUCmer comparison failed. " + "ANIm may fail." - ) + logger.warning("At least one NUCmer comparison failed. ANIm may fail.") else: logger.info("All multiprocessing jobs complete.") else: @@ -575,21 +571,22 @@ def calculate_anim(infiles, org_lengths): if not args.skip_nucmer and args.scheduler == "multiprocessing": if 0 < cumval: logger.error( - "This has possibly been a NUCmer run failure, " - + "please investigate" + "This has possibly been a NUCmer run failure, please investigate" ) logger.error(last_exception()) sys.exit(1) else: logger.error( - "This is possibly due to a NUCmer comparison " - + "being too distant for use. Please consider " - + "using the --maxmatch option." + ( + "This is possibly due to a NUCmer comparison being too " + "distant for use. Please consider using the --maxmatch option." + ) ) logger.error( - "This is alternatively due to NUCmer run " - + "failure, analysis will continue, but please " - + "investigate." + ( + "This is alternatively due to NUCmer run failure, " + "analysis will continue, but please investigate." + ) ) if not args.nocompress: logger.info("Compressing/deleting %s", deltadir) @@ -700,7 +697,7 @@ def unified_anib(infiles, org_lengths): logger.info("Running jobs with multiprocessing") logger.info("Running job dependency graph") if args.workers is None: - logger.info("(using maximum number of available " + "worker threads)") + logger.info("(using maximum number of available worker threads)") else: logger.info("(using %d worker threads, if available)", args.workers) cumval = run_mp.run_dependency_graph( @@ -708,7 +705,7 @@ def unified_anib(infiles, org_lengths): ) if 0 < cumval: logger.warning( - "At least one BLAST run failed. " + "%s may fail.", args.method + "At least one BLAST run failed. %s may fail.", args.method ) else: logger.info("All multiprocessing jobs complete.") @@ -735,13 +732,11 @@ def unified_anib(infiles, org_lengths): if not args.skip_blastn: if 0 < cumval: logger.error( - "This is possibly due to BLASTN run failure, " - + "please investigate" + "This is possibly due to BLASTN run failure, please investigate" ) else: logger.error( - "This is possibly due to a BLASTN comparison " - + "being too distant for use." + "This is possibly due to a BLASTN comparison being too distant for use." ) logger.error(last_exception()) if not args.nocompress: @@ -782,7 +777,7 @@ def write(results): # Draw ANIb/ANIm/TETRA output def draw(filestems, gformat): - """Draw ANIb/ANIm/TETRA results + """Draw ANIb/ANIm/TETRA results. - filestems - filestems for output files - gformat - the format for output graphics @@ -871,7 +866,7 @@ def subsample_input(infiles): err_handler_file.setFormatter(err_formatter) err_handler_file.setLevel(logging.INFO) logger.addHandler(err_handler_file) - except: + except IOError: logger.error("Could not open %s for logging", args.logfile) sys.exit(1) @@ -958,12 +953,10 @@ def subsample_input(infiles): # Get lengths of input sequences logger.info("Processing input sequence lengths") org_lengths = pyani_files.get_sequence_lengths(infiles) - logger.info( - "Sequence lengths:\n" - + os.linesep.join( - ["\t%s: %d" % (k, v) for k, v in list(org_lengths.items())] - ) + logstr = "Sequence lengths:\n" + os.linesep.join( + ["\t%s: %d" % (k, v) for k, v in list(org_lengths.items())] ) + logger.info(logstr) # Run appropriate method on the contents of the input directory, # and write out corresponding results. diff --git a/pyani/pyani_graphics.py b/pyani/pyani_graphics.py index faac9687..feeb4f9d 100644 --- a/pyani/pyani_graphics.py +++ b/pyani/pyani_graphics.py @@ -1,11 +1,41 @@ -# Copyright 2013-2020, The James Hutton Insitute +# -*- coding: utf-8 -*- +# (c) The James Hutton Institute 2013-2019 +# (c) University of Strathclyde 2019-2020 # Author: Leighton Pritchard # -# This code is part of the pyani package, and is governed by its licence. -# Please see the LICENSE file that should have been included as part of -# this package. - -"""Code to implement graphics output for ANI analyses.""" +# Contact: leighton.pritchard@strath.ac.uk +# +# Leighton Pritchard, +# Strathclyde Institute for Pharmacy and Biomedical Sciences, +# Cathedral Street, +# Glasgow, +# G4 0RE +# Scotland, +# UK +# +# The MIT License +# +# Copyright (c) 2013-2019 The James Hutton Institute +# Copyright (c) 2019-2020 University of Strathclyde +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +"""Module to implement graphics output for ANI analyses.""" # Force matplotlib NOT to use an Xwindows backend on *nix, so that # _tkinter.TclError is avoided when there is no $DISPLAY env: this can occur @@ -22,18 +52,18 @@ # Specify matplotlib backend matplotlib.use("Agg") -import matplotlib.pyplot as plt -import matplotlib.gridspec as gridspec +import matplotlib.pyplot as plt # noqa: E402 +import matplotlib.gridspec as gridspec # noqa: E402 -import numpy as np +import numpy as np # noqa: E402 -import scipy.cluster.hierarchy as sch -import scipy.spatial.distance as distance +import scipy.cluster.hierarchy as sch # noqa: E402 +import scipy.spatial.distance as distance # noqa: E402 -import seaborn as sns -import pandas as pd +import seaborn as sns # noqa: E402 +import pandas as pd # noqa: E402 -from . import pyani_config +from . import pyani_config # noqa: E402 # Register Matplotlib colourmaps @@ -44,9 +74,11 @@ # Convenience class to hold heatmap graphics parameters class Params(object): # pylint: disable=too-few-public-methods + """Convenience class to hold heatmap rendering parameters.""" def __init__(self, params, labels=None, classes=None): + """Instantiate Params object.""" self.cmap = plt.get_cmap(params[0]) self.vmin = params[1] self.vmax = params[2] @@ -55,13 +87,13 @@ def __init__(self, params, labels=None, classes=None): @property def vdiff(self): - """Returns difference between max and min values for presentation""" + """Returns difference between max and min values for presentation.""" return max(0.01, self.vmax - self.vmin) # helper for cleaning up matplotlib axes by removing ticks etc. def clean_axis(axis): - """Remove ticks, tick labels, and frame from axis""" + """Remove ticks, tick labels, and frame from axis.""" axis.get_xaxis().set_ticks([]) axis.get_yaxis().set_ticks([]) for spine in list(axis.spines.values()): @@ -177,7 +209,7 @@ def heatmap_seaborn(dfr, outfilename=None, title=None, params=None): # Add dendrogram and axes to passed figure def add_mpl_dendrogram(dfr, fig, heatmap_gs, orientation="col"): - """Return a dendrogram and corresponding gridspec, attached to the fig + """Return a dendrogram and corresponding gridspec, attached to the fig. Modifies the fig in-place. Orientation is either 'row' or 'col' and determines location and orientation of the rendered dendrogram. diff --git a/pyani/pyani_tools.py b/pyani/pyani_tools.py index 27e1af57..3addb6f1 100644 --- a/pyani/pyani_tools.py +++ b/pyani/pyani_tools.py @@ -1,11 +1,42 @@ -# Copyright 2016-2020, The James Hutton Insitute +# -*- coding: utf-8 -*- +# (c) The James Hutton Institute 2016-2019 +# (c) University of Strathclyde 2019-2020 # Author: Leighton Pritchard # -# This code is part of the pyani package, and is governed by its licence. -# Please see the LICENSE file that should have been included as part of -# this package. +# Contact: leighton.pritchard@strath.ac.uk +# +# Leighton Pritchard, +# Strathclyde Institute for Pharmacy and Biomedical Sciences, +# Cathedral Street, +# Glasgow, +# G4 0RE +# Scotland, +# UK +# +# The MIT License +# +# Copyright (c) 2016-2019 The James Hutton Institute +# Copyright (c) 2019-2020 University of Strathclyde +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. -"""Code to support pyani.""" +"""Code to support pyani with miscellaneous functions.""" import pandas as pd from . import pyani_config @@ -13,6 +44,7 @@ # Class to hold ANI dataframe results class ANIResults(object): + """Holds ANI dataframe results.""" def __init__(self, labels, mode): @@ -86,50 +118,54 @@ def data(self): # Class to hold BLAST functions class BLASTfunctions(object): + """Class to hold BLAST functions.""" def __init__(self, db_func, blastn_func): + """Initialise struct to hold BLAST functions.""" self.db_func = db_func self.blastn_func = blastn_func # Class to hold BLAST executables class BLASTexes(object): + """Class to hold BLAST functions.""" def __init__(self, format_exe, blast_exe): + """Initialise struct to hold BLAST functions.""" self.format_exe = format_exe self.blast_exe = blast_exe # Class to hold/build BLAST commands class BLASTcmds(object): - """Class to hold BLAST command data for construction of BLASTN and - database formatting commands. - """ + + """Holds BLAST command/database formatting commands.""" def __init__(self, funcs, exes, prefix, outdir): + """Initialise BLASTcmds.""" self.funcs = funcs self.exes = exes self.prefix = prefix self.outdir = outdir def build_db_cmd(self, fname): - """Return database format/build command""" + """Return database format/build command.""" return self.funcs.db_func(fname, self.outdir, self.exes.format_exe)[0] def get_db_name(self, fname): - """Return database filename""" + """Return database filename.""" return self.funcs.db_func(fname, self.outdir, self.exes.format_exe)[1] def build_blast_cmd(self, fname, dbname): - """Return BLASTN command""" + """Return BLASTN command.""" return self.funcs.blastn_func(fname, dbname, self.outdir, self.exes.blast_exe) # Read sequence annotations in from file def get_labels(filename, logger=None): - """Returns a dictionary of alternative sequence labels, or None + r"""Return dictionary of alternative sequence labels, or None. - filename - path to file containing tab-separated table of labels From a6b47a81a4c57e0a5fdf97d0b52a1a3aa199d79f Mon Sep 17 00:00:00 2001 From: Leighton Pritchard Date: Wed, 12 Feb 2020 08:13:59 +0000 Subject: [PATCH 4/4] bump version number --- pyani/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyani/__init__.py b/pyani/__init__.py index 188078d1..7279b564 100644 --- a/pyani/__init__.py +++ b/pyani/__init__.py @@ -1,3 +1,3 @@ # python package version # should match r"^__version__ = '(?P[^']+)'$" for setup.py -__version__ = "0.2.9" +__version__ = "0.2.10"