If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx', + 'sphinx.ext.coverage', + 'sphinx.ext.viewcode', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = 'marcholdings' +copyright = '2015, Health Sciences Library System, University of Pittsburgh' +author = 'Health Sciences Library System, University of Pittsburgh' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.0.2' +# The full version, including alpha/beta/rc tags. +release = '0.0.2' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = [] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'sphinx_rtd_theme' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +#html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Language to be used for generating the HTML full-text search index. +# Sphinx supports the following languages: +# 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' +# 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' +#html_search_language = 'en' + +# A dictionary with options for the search language support, empty by default. +# Now only 'ja' uses this config value +#html_search_options = {'type': 'default'} + +# The name of a javascript file (relative to the configuration directory) that +# implements a search results scorer. If empty, the default will be used. +#html_search_scorer = 'scorer.js' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'marcholdingsdoc' + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', + +# Latex figure (float) alignment +#'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'marcholdings.tex', 'marcholdings Documentation', + 'Health Sciences Library System, University of Pittsburgh', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'marcholdings', 'marcholdings Documentation', + [author], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'marcholdings', 'marcholdings Documentation', + author, 'marcholdings', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False + + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = {'': None} diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..b880acb --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,30 @@ +.. marcholdings documentation master file, created by + sphinx-quickstart on Tue Oct 6 09:47:36 2015. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to marcholdings's documentation! +======================================== + +Contents: + +.. toctree:: + :maxdepth: 2 + + marcholdings + +API Docs +======== + +.. autosummary:: + + marcholdings.holding + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/source/marcholdings.rst b/docs/source/marcholdings.rst new file mode 100644 index 0000000..21b2f87 --- /dev/null +++ b/docs/source/marcholdings.rst @@ -0,0 +1,11 @@ +marcholdings API Doc +==================== + +pycounter.holding module +------------------------ + +.. automodule:: marcholdings.holding + +.. autoclass:: Holding + +.. autofunction:: parse_date diff --git a/marcholdings/ b/marcholdings/ new file mode 100644 index 0000000..9c858e8 --- /dev/null +++ b/marcholdings/ @@ -0,0 +1,5 @@ +"""parse Z39.71 textual holdings""" +from marcholdings.version import __version__ +from marcholdings.holding import Holding + +__all__ = ['__version__', 'Holding'] diff --git a/marcholdings/ b/marcholdings/ new file mode 100644 index 0000000..c24e519 --- /dev/null +++ b/marcholdings/ @@ -0,0 +1,62 @@ +"""MARC holdings""" + +import datetime +import calendar +import re + + +class Holding(object): + """Holdings information from a MARC record + + :param text_holding: text of a non-gap holding + """ + def __init__(self, text_holding): + date_part = None + if '(' not in text_holding: + date_part = text_holding + else: + date_part = text_holding.split('(')[1].split(')')[0] + if text_holding.endswith('-'): + self.end_date = None + self.start_date = parse_date(date_part) + else: + parts = date_part.split('-') + start = parts[0] + end = parts[1] if len(parts) > 1 else parts[0] + if (':' not in end and not end.isdigit() and + not all(x.isdigit() for x in end.split('/'))): + end = start[0:5] + end + self.start_date = parse_date(start) + self.end_date = parse_date(end, True) + + +def parse_date(date_string, end=False): + """Parse a date string in Z39.71 format, return a + + :param date_string: date in Z39.71 format + :param end: Boolean; whether date represents the end of a range + """ + months = [None, 'Jan.', 'Feb.', 'Mar.', 'Apr.', 'May', 'June', 'July', + 'Aug.', 'Sept.', 'Oct.', 'Nov.', 'Dec.',] + parts = re.split('[: ]', date_string) + text_year = parts[0] + if '/' in text_year: + text_year = text_year.split('/')[1 if end else 0] + year = int(text_year) + month = 1 + if len(parts) > 1: + month_text = parts[1] + if '/' in month_text: + month_text = month_text.split('/')[1 if end else 0] + month = months.index(month_text) + elif end: + month = 12 + + day = 1 + if len(parts) == 3: + day = int(parts[2]) + elif end: + day = calendar.monthrange(year, month)[1] + + return, month, day) + diff --git a/marcholdings/test/ b/marcholdings/test/ new file mode 100644 index 0000000..e69de29 diff --git a/marcholdings/test/ b/marcholdings/test/ new file mode 100644 index 0000000..5121be2 --- /dev/null +++ b/marcholdings/test/ @@ -0,0 +1,51 @@ +import datetime +import marcholdings +import unittest + + +class TestDateParsing(unittest.TestCase): + def test_simple_start_date(self): + holding = marcholdings.Holding("v.1(2010)-") + self.assertEqual(holding.start_date,, 1, 1)) + self.assertIsNone(holding.end_date) + + def test_single_year(self): + holding = marcholdings.Holding("v.1(1990)") + self.assertEqual(holding.start_date,, 1, 1)) + self.assertEqual(holding.end_date,, 12, 31)) + + def test_partial_year(self): + holding = marcholdings.Holding("v.1:no.2-4(1990:Feb.-Apr.)") + self.assertEqual(holding.start_date,, 2, 1)) + self.assertEqual(holding.end_date,, 4, 30)) + + def test_complex_dates(self): + holding = marcholdings.Holding("v.2:no.3-v.6:no.5(2002:Mar.-2006:May") + self.assertEqual(holding.start_date,,3,1)) + self.assertEqual(holding.end_date,,5,31)) + + def test_with_days(self): + holding = marcholdings.Holding("v.2:no.3-v.6:no.5(2002:Mar. 2-2006:May 6") + self.assertEqual(holding.start_date,, 3, 2)) + self.assertEqual(holding.end_date,, 5, 6)) + + def test_open_partial(self): + holding = marcholdings.Holding("v.1:no.2(1990:Feb.)-") + self.assertEqual(holding.start_date,, 2, 1)) + self.assertIsNone(holding.end_date) + + def test_multi_year_open(self): + holding = marcholdings.Holding("1992/1996-") + self.assertEqual(holding.start_date,, 1, 1)) + self.assertIsNone(holding.end_date) + + def test_multi_year_single_volume(self): + holding = marcholdings.Holding("v.1(1840/1842)") + self.assertEqual(holding.start_date,, 1, 1)) + self.assertEqual(holding.end_date,, 12, 31)) + + def test_ugly_comma(self): + holding = marcholdings.Holding( + "v.1:no.3,5-6(1982:May/June,Sept./Oct.-Nov./Dec.)-") + self.assertEqual(holding.start_date,, 5, 1)) + self.assertIsNone(holding.end_date) diff --git a/marcholdings/ b/marcholdings/ new file mode 100644 index 0000000..07cb342 --- /dev/null +++ b/marcholdings/ @@ -0,0 +1,2 @@ +"""version information""" +__version__ = '0.0.2' diff --git a/ b/ new file mode 100644 index 0000000..fa77465 --- /dev/null +++ b/ @@ -0,0 +1,34 @@ +from __future__ import with_statement +from setuptools import setup, find_packages + +version = {} # will be set by exec below + +with open('marcholdings/', 'rb') as fp: + exec(, version) + +with open('README.rst') as readmefile: + readme = + +setup( + name='marcholdings', + version=version['__version__'], + packages=find_packages(), + author='Health Sciences Library System, University of Pittsburgh', + author_email='', + maintainer='Geoffrey Spear', + maintainer_email='', + url='', + description='Parse NISO Z39.71 textual holdings', + long_description=readme, + keywords='library MARC holdings Z39.71', + classifiers=[ + 'Development Status :: 3 - Alpha', + 'License :: OSI Approved :: MIT License', + 'Intended Audience :: Developers', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + ], + )