From a10de6e6605fb238c5e51ca92a385d4f2b32c3b6 Mon Sep 17 00:00:00 2001 From: Faisal Dosani Date: Thu, 25 Jul 2019 21:43:33 -0400 Subject: [PATCH 1/3] adding .idea to ignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 5841546..d11cf99 100644 --- a/.gitignore +++ b/.gitignore @@ -62,6 +62,7 @@ target/ # Editor .vscode/ +.idea/ *.swp # other @@ -69,4 +70,3 @@ data/ wheelhouse/ *.zip *.csv - From 5b6bdf56b0ae447d2e228495379c712776053160 Mon Sep 17 00:00:00 2001 From: Faisal Dosani Date: Thu, 25 Jul 2019 21:43:58 -0400 Subject: [PATCH 2/3] updating badges --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5a26ac6..68383dd 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ Engarde ======= -[![Build Status](https://travis-ci.org/TomAugspurger/engarde.svg)](https://travis-ci.org/TomAugspurger/engarde) +[![Build Status](https://travis-ci.org/engarde-dev/engarde.svg)](https://travis-ci.org/engarde-dev/engarde) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) + A python package for defensive data analysis. Documentation is at [readthedocs](http://engarde.readthedocs.org/en/latest/). From 1018b86a67684aae25a8048345bcee91d47ac17e Mon Sep 17 00:00:00 2001 From: Faisal Dosani Date: Thu, 25 Jul 2019 21:44:56 -0400 Subject: [PATCH 3/3] Blackened engarde. Closes #58 --- docs/conf.py | 156 +++++++++++++++++++++--------------------- engarde/__init__.py | 4 +- engarde/_version.py | 121 +++++++++++++++++++------------- engarde/checks.py | 56 ++++++++++----- engarde/decorators.py | 70 ++++++++++++++++--- engarde/generic.py | 11 ++- setup.py | 47 ++++++------- 7 files changed, 279 insertions(+), 186 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 358e724..6444256 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -20,42 +20,42 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, os.path.abspath('./sphinxext')) +sys.path.insert(0, os.path.abspath("./sphinxext")) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.mathjax', + "sphinx.ext.autodoc", + "sphinx.ext.mathjax", # 'IPython.sphinxext.ipython_directive', # 'IPython.sphinxext.ipython_console_highlighting', - 'numpydoc' + "numpydoc", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'engarde' -copyright = '2015, Tom Augspurger' -author = 'Tom Augspurger' +project = "engarde" +copyright = "2015, Tom Augspurger" +author = "Tom Augspurger" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -63,6 +63,7 @@ # # The short X.Y version. from engarde import __version__ as version + # The full version, including alpha/beta/rc tags. release = version @@ -75,37 +76,37 @@ # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all # documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False +# keep_warnings = False # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False @@ -115,156 +116,149 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. -#html_extra_path = [] +# html_extra_path = [] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Language to be used for generating the HTML full-text search index. # Sphinx supports the following languages: # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' -#html_search_language = 'en' +# html_search_language = 'en' # A dictionary with options for the search language support, empty by default. # Now only 'ja' uses this config value -#html_search_options = {'type': 'default'} +# html_search_options = {'type': 'default'} # The name of a javascript file (relative to the configuration directory) that # implements a search results scorer. If empty, the default will be used. -#html_search_scorer = 'scorer.js' +# html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. -htmlhelp_basename = 'engardedoc' +htmlhelp_basename = "engardedoc" # -- Options for LaTeX output --------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', - -# Latex figure (float) alignment -#'figure_align': 'htbp', + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', + # Latex figure (float) alignment + #'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'engarde.tex', 'engarde Documentation', - 'Tom Augspurger', 'manual'), + (master_doc, "engarde.tex", "engarde Documentation", "Tom Augspurger", "manual") ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'engarde', 'engarde Documentation', - [author], 1) -] +man_pages = [(master_doc, "engarde", "engarde Documentation", [author], 1)] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------- @@ -273,19 +267,25 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'engarde', 'engarde Documentation', - author, 'engarde', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "engarde", + "engarde Documentation", + author, + "engarde", + "One line description of project.", + "Miscellaneous", + ) ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False +# texinfo_no_detailmenu = False diff --git a/engarde/__init__.py b/engarde/__init__.py index 896bddb..80edaf0 100644 --- a/engarde/__init__.py +++ b/engarde/__init__.py @@ -1,4 +1,4 @@ from ._version import get_versions -__version__ = get_versions()['version'] -del get_versions +__version__ = get_versions()["version"] +del get_versions diff --git a/engarde/_version.py b/engarde/_version.py index ba56d02..cb8a3b0 100644 --- a/engarde/_version.py +++ b/engarde/_version.py @@ -1,4 +1,3 @@ - # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build @@ -57,6 +56,7 @@ def decorate(f): HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f + return decorate @@ -67,9 +67,12 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) + p = subprocess.Popen( + [c] + args, + cwd=cwd, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) break except EnvironmentError: e = sys.exc_info()[1] @@ -99,12 +102,17 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): dirname = os.path.basename(root) if not dirname.startswith(parentdir_prefix): if verbose: - print("guessing rootdir is '%s', but '%s' doesn't start with " - "prefix '%s'" % (root, dirname, parentdir_prefix)) + print( + "guessing rootdir is '%s', but '%s' doesn't start with " + "prefix '%s'" % (root, dirname, parentdir_prefix) + ) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None} + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + } @register_vcs_handler("git", "get_keywords") @@ -144,7 +152,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -153,27 +161,32 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = set([r for r in refs if re.search(r"\d", r)]) if verbose: - print("discarding '%s', no digits" % ",".join(refs-tags)) + print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: print("likely tags: %s" % ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] + r = ref[len(tag_prefix) :] if verbose: print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None - } + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags"} + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + } @register_vcs_handler("git", "pieces_from_vcs") @@ -193,9 +206,9 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): GITS = ["git.cmd", "git.exe"] # if there is a tag, this yields TAG-NUM-gHEX[-dirty] # if there are no tags, this yields HEX[-dirty] (no NUM) - describe_out = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long"], - cwd=root) + describe_out = run_command( + GITS, ["describe", "--tags", "--dirty", "--always", "--long"], cwd=root + ) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") @@ -218,17 +231,16 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] + git_describe = git_describe[: git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out return pieces # tag @@ -237,10 +249,12 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] + pieces["closest-tag"] = full_tag[len(tag_prefix) :] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) @@ -251,8 +265,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): else: # HEX: no tags pieces["closest-tag"] = None - count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) + count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits return pieces @@ -281,8 +294,7 @@ def render_pep440(pieces): rendered += ".dirty" else: # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -389,10 +401,12 @@ def render_git_describe_long(pieces): def render(pieces, style): if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"]} + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + } if not style or style == "default": style = "pep440" # the default @@ -412,8 +426,12 @@ def render(pieces, style): else: raise ValueError("unknown style '%s'" % style) - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None} + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + } def get_versions(): @@ -426,8 +444,7 @@ def get_versions(): verbose = cfg.verbose try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass @@ -436,12 +453,15 @@ def get_versions(): # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. - for i in cfg.versionfile_source.split('/'): + for i in cfg.versionfile_source.split("/"): root = os.path.dirname(root) except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree"} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + } try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) @@ -455,6 +475,9 @@ def get_versions(): except NotThisMethod: pass - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version"} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + } diff --git a/engarde/checks.py b/engarde/checks.py index bed8281..3ca988f 100644 --- a/engarde/checks.py +++ b/engarde/checks.py @@ -43,6 +43,7 @@ def none_missing(df, columns=None): raise return df + def is_monotonic(df, items=None, increasing=None, strict=False): """ Asserts that the DataFrame is monotonic. @@ -67,23 +68,26 @@ def is_monotonic(df, items=None, increasing=None, strict=False): for col, (increasing, strict) in items.items(): s = pd.Index(df[col]) if increasing: - good = getattr(s, 'is_monotonic_increasing') + good = getattr(s, "is_monotonic_increasing") elif increasing is None: - good = getattr(s, 'is_monotonic') | getattr(s, 'is_monotonic_decreasing') + good = getattr(s, "is_monotonic") | getattr(s, "is_monotonic_decreasing") else: - good = getattr(s, 'is_monotonic_decreasing') + good = getattr(s, "is_monotonic_decreasing") if strict: if increasing: good = good & (s.to_series().diff().dropna() > 0).all() elif increasing is None: - good = good & ((s.to_series().diff().dropna() > 0).all() | - (s.to_series().diff().dropna() < 0).all()) + good = good & ( + (s.to_series().diff().dropna() > 0).all() + | (s.to_series().diff().dropna() < 0).all() + ) else: good = good & (s.to_series().diff().dropna() < 0).all() if not good: raise AssertionError return df + def is_shape(df, shape): """ Asserts that the DataFrame is of a known shape. @@ -101,12 +105,13 @@ def is_shape(df, shape): df : DataFrame """ try: - check = np.all(np.equal(df.shape, shape) | (np.equal(shape, [-1, -1]) | - np.equal(shape, [None, None]))) + check = np.all( + np.equal(df.shape, shape) + | (np.equal(shape, [-1, -1]) | np.equal(shape, [None, None])) + ) assert check except AssertionError as e: - msg = ("Expected shape: {}\n" - "\t\tActual shape: {}".format(shape, df.shape)) + msg = "Expected shape: {}\n" "\t\tActual shape: {}".format(shape, df.shape) e.args = (msg,) raise return df @@ -173,9 +178,10 @@ def within_set(df, items=None): for k, v in items.items(): if not df[k].isin(v).all(): bad = df.loc[~df[k].isin(v), k] - raise AssertionError('Not in set', bad) + raise AssertionError("Not in set", bad) return df + def within_range(df, items=None): """ Assert that a DataFrame is within a range. @@ -197,6 +203,7 @@ def within_range(df, items=None): raise AssertionError("Outside range", bad) return df + def within_n_std(df, n=3): """ Assert that every value is within ``n`` standard @@ -214,12 +221,13 @@ def within_n_std(df, n=3): """ means = df.mean() stds = df.std() - inliers = (np.abs(df[means.index] - means) < n * stds) + inliers = np.abs(df[means.index] - means) < n * stds if not np.all(inliers): msg = generic.bad_locations(~inliers) raise AssertionError(msg) return df + def has_dtypes(df, items): """ Assert that a DataFrame has ``dtypes`` @@ -237,7 +245,11 @@ def has_dtypes(df, items): dtypes = df.dtypes for k, v in items.items(): if not dtypes[k] == v: - raise AssertionError("{} has the wrong dtype. Should be ({}), is ({})".format(k, v,dtypes[k])) + raise AssertionError( + "{} has the wrong dtype. Should be ({}), is ({})".format( + k, v, dtypes[k] + ) + ) return df @@ -295,7 +307,19 @@ def is_same_as(df, df_to_compare, **kwargs): return df -__all__ = ['is_monotonic', 'is_same_as', 'is_shape', 'none_missing', - 'unique_index', 'within_n_std', 'within_range', 'within_set', - 'has_dtypes', 'verify', 'verify_all', 'verify_any', - 'one_to_many','is_same_as',] +__all__ = [ + "is_monotonic", + "is_same_as", + "is_shape", + "none_missing", + "unique_index", + "within_n_std", + "within_range", + "within_set", + "has_dtypes", + "verify", + "verify_all", + "verify_any", + "one_to_many", + "is_same_as", +] diff --git a/engarde/decorators.py b/engarde/decorators.py index 07cc885..ac7b7cf 100644 --- a/engarde/decorators.py +++ b/engarde/decorators.py @@ -1,19 +1,23 @@ # -*- coding: utf-8 -*- -from __future__ import (unicode_literals, absolute_import, division) +from __future__ import unicode_literals, absolute_import, division from functools import wraps import engarde.checks as ck + def none_missing(columns=None): """Asserts that no missing values (NaN) are found""" + def decorate(func): @wraps(func) def wrapper(*args, **kwargs): result = func(*args, **kwargs) ck.none_missing(result, columns=columns) return result + return wrapper + return decorate @@ -24,7 +28,9 @@ def wrapper(*args, **kwargs): result = func(*args, **kwargs) ck.is_shape(result, shape) return result + return wrapper + return decorate @@ -32,13 +38,16 @@ def unique(columns=None): """ Asserts that columns in the DataFrame only have unique values. """ + def decorate(func): @wraps(func) def wrapper(*args, **kwargs): result = func(*args, **kwargs) ck.unique(result, columns=columns) return result + return wrapper + return decorate @@ -49,20 +58,25 @@ def wrapper(*args, **kwargs): result = func(*args, **kwargs) ck.unique_index(result) return result + return wrapper + return decorate + def is_monotonic(items=None, increasing=None, strict=False): def decorate(func): @wraps(func) def wrapper(*args, **kwargs): result = func(*args, **kwargs) - ck.is_monotonic(result, items=items, increasing=increasing, - strict=strict) + ck.is_monotonic(result, items=items, increasing=increasing, strict=strict) return result + return wrapper + return decorate + def within_set(items): """ Check that DataFrame values are within set. @@ -71,13 +85,16 @@ def within_set(items): >>> def f(df): return df """ + def decorate(func): @wraps(func) def wrapper(*args, **kwargs): result = func(*args, **kwargs) ck.within_set(result, items) return result + return wrapper + return decorate @@ -92,13 +109,16 @@ def within_range(items): array-like checks the same (lower, upper) for each column """ + def decorate(func): @wraps(func) def wrapper(*args, **kwargs): result = func(*args, **kwargs) ck.within_range(result, items) return result + return wrapper + return decorate @@ -107,26 +127,33 @@ def within_n_std(n=3): Tests that all values are within 3 standard deviations of their mean. """ + def decorate(func): @wraps(func) def wrapper(*args, **kwargs): result = func(*args, **kwargs) ck.within_n_std(result, n=n) return result + return wrapper + return decorate + def has_dtypes(items): """ Tests that the dtypes are as specified in items. """ + def decorate(func): @wraps(func) def wrapper(*args, **kwargs): result = func(*args, **kwargs) ck.has_dtypes(result, items) return result + return wrapper + return decorate @@ -134,13 +161,16 @@ def one_to_many(unitcol, manycol): """ Tests that each value in ``manycol`` only is associated with just a single value in ``unitcol``. """ + def decorate(func): @wraps(func) def wrapper(*args, **kwargs): result = func(*args, **kwargs) ck.one_to_many(result, unitcol, manycol) return result + return wrapper + return decorate @@ -150,20 +180,23 @@ def verify(func, *args, **kwargs): """ return _verify(func, None, *args, **kwargs) + def verify_all(func, *args, **kwargs): """ Assert that all of `func(*args, **kwargs)` are true. """ - return _verify(func, 'all', *args, **kwargs) + return _verify(func, "all", *args, **kwargs) + def verify_any(func, *args, **kwargs): """ Assert that any of `func(*args, **kwargs)` are true. """ - return _verify(func, 'any', *args, **kwargs) + return _verify(func, "any", *args, **kwargs) + def _verify(func, _kind, *args, **kwargs): - d = {None: ck.verify, 'all': ck.verify_all, 'any': ck.verify_any} + d = {None: ck.verify, "all": ck.verify_all, "any": ck.verify_any} vfunc = d[_kind] def decorate(operation_func): @@ -172,7 +205,9 @@ def wrapper(*operation_args, **operation_kwargs): result = operation_func(*operation_args, **operation_kwargs) vfunc(result, func, *args, **kwargs) return result + return wrapper + return decorate @@ -183,12 +218,25 @@ def wrapper(*args, **kwargs): result = func(*args, **kwargs) ck.is_same_as(result, df_to_compare, **assert_kwargs) return result + return wrapper - return decorate + return decorate -__all__ = ['is_monotonic', 'is_same_as', 'is_shape', 'none_missing', - 'unique_index', 'within_range', 'within_set', 'has_dtypes', - 'verify', 'verify_all', 'verify_any', 'within_n_std', - 'one_to_many','is_same_as',] +__all__ = [ + "is_monotonic", + "is_same_as", + "is_shape", + "none_missing", + "unique_index", + "within_range", + "within_set", + "has_dtypes", + "verify", + "verify_all", + "verify_any", + "within_n_std", + "one_to_many", + "is_same_as", +] diff --git a/engarde/generic.py b/engarde/generic.py index 0265924..4ecc2b8 100644 --- a/engarde/generic.py +++ b/engarde/generic.py @@ -12,6 +12,7 @@ # Generic verify # -------------- + def verify(df, check, *args, **kwargs): """ Generic verify. Assert that ``check(df, *args, **kwargs)`` is @@ -32,11 +33,12 @@ def verify(df, check, *args, **kwargs): try: assert result except AssertionError as e: - msg = '{} is not true'.format(check.__name__) + msg = "{} is not true".format(check.__name__) e.args = (msg, df) raise return df + def verify_all(df, check, *args, **kwargs): """ Verify that all the entries in ``check(df, *args, **kwargs)`` @@ -51,6 +53,7 @@ def verify_all(df, check, *args, **kwargs): raise return df + def verify_any(df, check, *args, **kwargs): """ Verify that any of the entries in ``check(df, *args, **kwargs)`` @@ -60,15 +63,17 @@ def verify_any(df, check, *args, **kwargs): try: assert np.any(result) except AssertionError as e: - msg = '{} not true for any'.format(check.__name__) + msg = "{} not true for any".format(check.__name__) e.args = (msg, df) raise return df + # --------------- # Error reporting # --------------- + def bad_locations(df): columns = df.columns all_locs = chain.from_iterable(zip(df.index, cycle([col])) for col in columns) @@ -76,5 +81,5 @@ def bad_locations(df): msg = bad.values return msg -__all__ = ['verify', 'verify_all', 'verify_any', 'bad_locations'] +__all__ = ["verify", "verify_all", "verify_any", "bad_locations"] diff --git a/setup.py b/setup.py index 36bce9d..0181215 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,5 @@ from setuptools import setup, find_packages + # To use a consistent encoding from os import path import versioneer @@ -6,43 +7,35 @@ here = path.abspath(path.dirname(__file__)) setup( - name='engarde', + name="engarde", version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), - - description='A python package for defensive data analysis.', - long_description='A python package for defensive data analysis.', - - url='https://github.com/tomaugspurger/engarde', - + description="A python package for defensive data analysis.", + long_description="A python package for defensive data analysis.", + url="https://github.com/tomaugspurger/engarde", # Author details - author='Tom Augspurger', - author_email='tom.w.augspurger@gmail.com', - + author="Tom Augspurger", + author_email="tom.w.augspurger@gmail.com", # Choose your license - license='MIT', - + license="MIT", classifiers=[ - 'Development Status :: 3 - Alpha', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 2", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.4", ], - - keywords='data analysis', - packages=find_packages(exclude=['tests']), - install_requires=['numpy', 'pandas', 'six'], - + keywords="data analysis", + packages=find_packages(exclude=["tests"]), + install_requires=["numpy", "pandas", "six"], # List additional groups of dependencies here (e.g. development # dependencies). You can install these using the following syntax, # for example: # $ pip install -e .[dev,test] extras_require={ - 'dev': [''], - 'test': ['coverage', 'pytest', 'ipython', 'traitlets', 'numpydoc'], + "dev": ["black"], + "test": ["coverage", "pytest", "ipython", "traitlets", "numpydoc"], }, - )