From ca4292f1d61d3058d9293b048cfbac7bf37f7416 Mon Sep 17 00:00:00 2001 From: Geoffrey Spear Date: Tue, 6 Oct 2015 11:39:19 -0400 Subject: [PATCH] Initial commit --- .gitignore | 108 ++++++++++++ .travis.yml | 32 ++++ docs/source/conf.py | 296 ++++++++++++++++++++++++++++++++ docs/source/index.rst | 30 ++++ docs/source/marcholdings.rst | 11 ++ marcholdings/__init__.py | 5 + marcholdings/holding.py | 62 +++++++ marcholdings/test/__init__.py | 0 marcholdings/test/test_parse.py | 51 ++++++ marcholdings/version.py | 2 + setup.py | 34 ++++ 11 files changed, 631 insertions(+) create mode 100644 .gitignore create mode 100644 .travis.yml create mode 100644 docs/source/conf.py create mode 100644 docs/source/index.rst create mode 100644 docs/source/marcholdings.rst create mode 100644 marcholdings/__init__.py create mode 100644 marcholdings/holding.py create mode 100644 marcholdings/test/__init__.py create mode 100644 marcholdings/test/test_parse.py create mode 100644 marcholdings/version.py create mode 100644 setup.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..edd6ef7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,108 @@ +# Created by .ignore support plugin (hsz.mobi) +### Python template +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit tests / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ +### JetBrains template +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio + +*.iml + +## Directory-based project format: +.idea/ +# if you remove the above rule, at least ignore the following: + +# User-specific stuff: +# .idea/workspace.xml +# .idea/tasks.xml +# .idea/dictionaries + +# Sensitive or high-churn files: +# .idea/dataSources.ids +# .idea/dataSources.xml +# .idea/sqlDataSources.xml +# .idea/dynamic.xml +# .idea/uiDesigner.xml + +# Gradle: +# .idea/gradle.xml +# .idea/libraries + +# Mongo Explorer plugin: +# .idea/mongoSettings.xml + +## File-based project format: +*.ipr +*.iws + +## Plugin-specific files: + +# IntelliJ +/out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties + diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..78a3a8a --- /dev/null +++ b/.travis.yml @@ -0,0 +1,32 @@ +sudo: false + +language: python +python: + - "2.6" + - "2.7" + - "3.3" + - "3.4" + - "3.5" + - "pypy" + - "pypy3" + - "nightly" + +install: + - pip install . + - pip install git+https://github.com/PyCQA/pep8.git + - pip install pytest + - pip install pytest-cov + - pip install coveralls + - pip install flake8 + +before_script: + - "flake8 pycounter" + +script: + py.test --cov=marcholdings marcholdings/test + +after_success: + coveralls + +notifications: + irc: "chat.freenode.net#pycounter" diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..d843b90 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,296 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# marcholdings documentation build configuration file, created by +# sphinx-quickstart on Tue Oct 6 09:47:36 2015. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os +import shlex + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx', + 'sphinx.ext.coverage', + 'sphinx.ext.viewcode', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = 'marcholdings' +copyright = '2015, Health Sciences Library System, University of Pittsburgh' +author = 'Health Sciences Library System, University of Pittsburgh' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.0.2' +# The full version, including alpha/beta/rc tags. +release = '0.0.2' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = [] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'sphinx_rtd_theme' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +#html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Language to be used for generating the HTML full-text search index. +# Sphinx supports the following languages: +# 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' +# 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' +#html_search_language = 'en' + +# A dictionary with options for the search language support, empty by default. +# Now only 'ja' uses this config value +#html_search_options = {'type': 'default'} + +# The name of a javascript file (relative to the configuration directory) that +# implements a search results scorer. If empty, the default will be used. +#html_search_scorer = 'scorer.js' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'marcholdingsdoc' + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', + +# Latex figure (float) alignment +#'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'marcholdings.tex', 'marcholdings Documentation', + 'Health Sciences Library System, University of Pittsburgh', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'marcholdings', 'marcholdings Documentation', + [author], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'marcholdings', 'marcholdings Documentation', + author, 'marcholdings', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False + + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = {'https://docs.python.org/': None} diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..b880acb --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,30 @@ +.. marcholdings documentation master file, created by + sphinx-quickstart on Tue Oct 6 09:47:36 2015. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to marcholdings's documentation! +======================================== + +Contents: + +.. toctree:: + :maxdepth: 2 + + marcholdings + +API Docs +======== + +.. autosummary:: + + marcholdings.holding + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/source/marcholdings.rst b/docs/source/marcholdings.rst new file mode 100644 index 0000000..21b2f87 --- /dev/null +++ b/docs/source/marcholdings.rst @@ -0,0 +1,11 @@ +marcholdings API Doc +==================== + +pycounter.holding module +------------------------ + +.. automodule:: marcholdings.holding + +.. autoclass:: Holding + +.. autofunction:: parse_date diff --git a/marcholdings/__init__.py b/marcholdings/__init__.py new file mode 100644 index 0000000..9c858e8 --- /dev/null +++ b/marcholdings/__init__.py @@ -0,0 +1,5 @@ +"""parse Z39.71 textual holdings""" +from marcholdings.version import __version__ +from marcholdings.holding import Holding + +__all__ = ['__version__', 'Holding'] diff --git a/marcholdings/holding.py b/marcholdings/holding.py new file mode 100644 index 0000000..c24e519 --- /dev/null +++ b/marcholdings/holding.py @@ -0,0 +1,62 @@ +"""MARC holdings""" + +import datetime +import calendar +import re + + +class Holding(object): + """Holdings information from a MARC record + + :param text_holding: text of a non-gap holding + """ + def __init__(self, text_holding): + date_part = None + if '(' not in text_holding: + date_part = text_holding + else: + date_part = text_holding.split('(')[1].split(')')[0] + if text_holding.endswith('-'): + self.end_date = None + self.start_date = parse_date(date_part) + else: + parts = date_part.split('-') + start = parts[0] + end = parts[1] if len(parts) > 1 else parts[0] + if (':' not in end and not end.isdigit() and + not all(x.isdigit() for x in end.split('/'))): + end = start[0:5] + end + self.start_date = parse_date(start) + self.end_date = parse_date(end, True) + + +def parse_date(date_string, end=False): + """Parse a date string in Z39.71 format, return a datetime.date + + :param date_string: date in Z39.71 format + :param end: Boolean; whether date represents the end of a range + """ + months = [None, 'Jan.', 'Feb.', 'Mar.', 'Apr.', 'May', 'June', 'July', + 'Aug.', 'Sept.', 'Oct.', 'Nov.', 'Dec.',] + parts = re.split('[: ]', date_string) + text_year = parts[0] + if '/' in text_year: + text_year = text_year.split('/')[1 if end else 0] + year = int(text_year) + month = 1 + if len(parts) > 1: + month_text = parts[1] + if '/' in month_text: + month_text = month_text.split('/')[1 if end else 0] + month = months.index(month_text) + elif end: + month = 12 + + day = 1 + if len(parts) == 3: + day = int(parts[2]) + elif end: + day = calendar.monthrange(year, month)[1] + + return datetime.date(year, month, day) + diff --git a/marcholdings/test/__init__.py b/marcholdings/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/marcholdings/test/test_parse.py b/marcholdings/test/test_parse.py new file mode 100644 index 0000000..5121be2 --- /dev/null +++ b/marcholdings/test/test_parse.py @@ -0,0 +1,51 @@ +import datetime +import marcholdings +import unittest + + +class TestDateParsing(unittest.TestCase): + def test_simple_start_date(self): + holding = marcholdings.Holding("v.1(2010)-") + self.assertEqual(holding.start_date, datetime.date(2010, 1, 1)) + self.assertIsNone(holding.end_date) + + def test_single_year(self): + holding = marcholdings.Holding("v.1(1990)") + self.assertEqual(holding.start_date, datetime.date(1990, 1, 1)) + self.assertEqual(holding.end_date, datetime.date(1990, 12, 31)) + + def test_partial_year(self): + holding = marcholdings.Holding("v.1:no.2-4(1990:Feb.-Apr.)") + self.assertEqual(holding.start_date, datetime.date(1990, 2, 1)) + self.assertEqual(holding.end_date, datetime.date(1990, 4, 30)) + + def test_complex_dates(self): + holding = marcholdings.Holding("v.2:no.3-v.6:no.5(2002:Mar.-2006:May") + self.assertEqual(holding.start_date, datetime.date(2002,3,1)) + self.assertEqual(holding.end_date, datetime.date(2006,5,31)) + + def test_with_days(self): + holding = marcholdings.Holding("v.2:no.3-v.6:no.5(2002:Mar. 2-2006:May 6") + self.assertEqual(holding.start_date, datetime.date(2002, 3, 2)) + self.assertEqual(holding.end_date, datetime.date(2006, 5, 6)) + + def test_open_partial(self): + holding = marcholdings.Holding("v.1:no.2(1990:Feb.)-") + self.assertEqual(holding.start_date, datetime.date(1990, 2, 1)) + self.assertIsNone(holding.end_date) + + def test_multi_year_open(self): + holding = marcholdings.Holding("1992/1996-") + self.assertEqual(holding.start_date, datetime.date(1992, 1, 1)) + self.assertIsNone(holding.end_date) + + def test_multi_year_single_volume(self): + holding = marcholdings.Holding("v.1(1840/1842)") + self.assertEqual(holding.start_date, datetime.date(1840, 1, 1)) + self.assertEqual(holding.end_date, datetime.date(1842, 12, 31)) + + def test_ugly_comma(self): + holding = marcholdings.Holding( + "v.1:no.3,5-6(1982:May/June,Sept./Oct.-Nov./Dec.)-") + self.assertEqual(holding.start_date, datetime.date(1982, 5, 1)) + self.assertIsNone(holding.end_date) diff --git a/marcholdings/version.py b/marcholdings/version.py new file mode 100644 index 0000000..07cb342 --- /dev/null +++ b/marcholdings/version.py @@ -0,0 +1,2 @@ +"""version information""" +__version__ = '0.0.2' diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..fa77465 --- /dev/null +++ b/setup.py @@ -0,0 +1,34 @@ +from __future__ import with_statement +from setuptools import setup, find_packages + +version = {} # will be set by exec below + +with open('marcholdings/version.py', 'rb') as fp: + exec(fp.read(), version) + +with open('README.rst') as readmefile: + readme = readmefile.read() + +setup( + name='marcholdings', + version=version['__version__'], + packages=find_packages(), + author='Health Sciences Library System, University of Pittsburgh', + author_email='speargh@pitt.edu', + maintainer='Geoffrey Spear', + maintainer_email='speargh@pitt.edu', + url='http://www.github.com/pitthsls/pycounter', + description='Parse NISO Z39.71 textual holdings', + long_description=readme, + keywords='library MARC holdings Z39.71', + classifiers=[ + 'Development Status :: 3 - Alpha', + 'License :: OSI Approved :: MIT License', + 'Intended Audience :: Developers', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + ], + )