Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conforms to Spec v0.2 #11

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[run]
omit = gb2260/data/*
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,7 @@ __pycache__
/build
/dist
/htmlcov
/gb2260/data.py

.pytest_cache/

gb2260/data/
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ python:
- "3.4"
- "pypy"
install:
- "pip install pytest pytest-cov pytest-pep8 coveralls"
- "pip install pytest pytest-cov pytest-pep8 pytest-mock coveralls"
script: "make clean test"
after_success: "coveralls"
branches:
Expand Down
12 changes: 6 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,20 @@ TOX := tox

all: build

build: gb2260/data.py
build: gb2260/data/__init__.py
$(PYTHON) setup.py sdist bdist_wheel

test: gb2260/data.py
test: gb2260/data/__init__.py
$(PYTEST)

test-all: gb2260/data.py
test-all: gb2260/data/__init__.py
$(TOX)

clean:
rm -rf dist build gb2260/data.py
rm -rf dist build gb2260/data/

gb2260/data.py: data/GB2260*.txt
$(PYTHON) generate.py $? $@
gb2260/data/__init__.py: data/revisions.json
$(PYTHON) generate.py $?

data/GB2260*.txt:
git submodule init
Expand Down
2 changes: 1 addition & 1 deletion data
Submodule data updated 118 files
22 changes: 17 additions & 5 deletions gb2260/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,19 @@
from .division import Division
from __future__ import absolute_import
from __future__ import unicode_literals

__version__ = '0.4.1'
__all__ = ['Division', 'get', 'search']
from gb2260.gb2260 import GB2260
from gb2260.exceptions import (
GB2260Exception,
InvalidCode,
RevisionNotFound,
SourceNotFound,
)

get = Division.get
search = Division.search

__all__ = [
'GB2260',
'GB2260Exception',
'InvalidCode',
'RevisionNotFound',
'SourceNotFound',
]
43 changes: 27 additions & 16 deletions gb2260/_compat.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,36 @@
from __future__ import absolute_import
from __future__ import unicode_literals

import sys


__all__ = ['PY2', 'unicode_type', 'unicode_compatible']
PY2 = sys.version_info[0] == 2


PY2 = sys.version_info[0] == 2
if PY2:
text_type = unicode
binary_type = str

def iteritems(d):
return d.iteritems()
else:
text_type = str
binary_type = bytes

def iteritems(d):
return d.items()


def unicode_compatible(cls):
if PY2: # pragma: no cover
__str__ = getattr(cls, '__str__', None)
__repr__ = getattr(cls, '__repr__', None)
if __str__ is not None:
cls.__unicode__ = __str__
cls.__str__ = lambda self: __str__(self).encode('utf-8')
if __repr__ is not None:
cls.__repr__ = lambda self: __repr__(self).encode('utf-8')
return cls
def ensure_text(value, encoding):
if isinstance(value, text_type):
return value
return value.decode(encoding)


if PY2: # pragma: no cover
unicode_type = unicode
else: # pragma: no cover
unicode_type = str
def ensure_str(value, encoding):
if isinstance(value, str):
return value
if PY2:
return value.encode(encoding)
else:
return value.decode(encoding)
101 changes: 101 additions & 0 deletions gb2260/code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from __future__ import absolute_import
from __future__ import unicode_literals

import re

from gb2260.exceptions import InvalidCode


# GB/T 2260 conformant code pattern.
# Every two digits form a province / prefecture / county layer subcode.
# NOTE: Whether a code has a corresponding division has to be considered along
# with a specific revision.
CODE_PATTERN = re.compile(
r'^(?P<province>\d\d)(?P<prefecture>\d\d)(?P<county>\d\d)$')


# The following province / prefecture / county code patterns are from the Spec.
# Subcode 00 is considered special, denoting the upper level division.
# When used as an argument, trailing 00s can be ommited.

# Province code
# e.g. 320000 / 3200 / 32
# NOTE: Allowing 320000 is an extension to the Spec.
PROVINCE_CODE_PATTERN = re.compile(r'^(?P<code>(?!00)\d\d)(?:00){0,2}$')

# Prefecture code
# e.g. 320200 / 3202
PREFECTURE_CODE_PATTERN = re.compile(r'^(?P<code>(?:(?!00)\d\d){2})(?:00)?$')

# County code
# e.g. 320203
COUNTY_CODE_PATTERN = re.compile(r'^(?P<code>(?:(?!00)\d\d){3})$')


def to_province(code):
"""Returns the corresponding province level division code.
:raises InvalidCode: if the code is not a valid GB/T 2260 code
:raises ValueError: the province level subcode is 00
"""
match = CODE_PATTERN.match(code)
if not match:
raise InvalidCode(code)
province = match.group('province')
if province == '00':
raise ValueError(code)
return '{0}0000'.format(province)


def to_prefecture(code):
"""Returns the corresponding prefecture level division code.
:raises InvalidCode: if the code is not a valid GB/T 2260 code
:raises ValueError: the province or prefecture level subcode is 00
"""
match = CODE_PATTERN.match(code)
if not match:
raise InvalidCode(code)
province = match.group('province')
prefecture = match.group('prefecture')
if province == '00' or prefecture == '00':
raise ValueError(code)
return '{0}{1}00'.format(province, prefecture)


def make_prefecture_pattern(province_code):
"""Returns a pattern for matching prefectures in the province.
:raises InvalidCode: if the province_code is not a valid province code.
"""
match = PROVINCE_CODE_PATTERN.match(province_code)
if not match:
raise InvalidCode(province_code)
raw = r'{0}(?!00)\d\d00'.format(match.group('code'))
return re.compile(raw)


def make_county_pattern(prefecture_code):
"""Returns a pattern for matching counties in the prefecture.
:raises InvalidCode: if the prefecture_code is not a valid prefecture code.
"""
match = PREFECTURE_CODE_PATTERN.match(prefecture_code)
if not match:
raise InvalidCode(prefecture_code)
raw = r'{0}(?!00)\d\d'.format(match.group('code'))
return re.compile(raw)


def split(code):
"""Returns codes for all the three level divisions.
:raises InvalidCode: if the code is not a valid GB/T 2260 code
"""
match = CODE_PATTERN.match(code)
if not match:
raise InvalidCode(code)
subcodes = match.groups()
province, prefecture, county = subcodes

codes = [
None if province == '00' else '{0}0000'.format(*subcodes),
None if prefecture == '00' else '{0}{1}00'.format(*subcodes),
None if county == '00' else '{0}{1}{2}'.format(*subcodes),
]
return codes
134 changes: 34 additions & 100 deletions gb2260/division.py
Original file line number Diff line number Diff line change
@@ -1,128 +1,62 @@
from __future__ import absolute_import
from __future__ import unicode_literals

import weakref
import gb2260.code as dcode
from gb2260._compat import ensure_str

from .data import data
from ._compat import unicode_compatible, unicode_type

class Division(object):

LATEST_YEAR = 2014
__slots__ = ['_code', '_name', '_revision']

def __init__(self, code, name, revision):
self._code = code
self._name = name
self._revision = revision

@unicode_compatible
class Division(object):
"""The administrative division."""
@property
def code(self):
return self._code

_identity_map = dict(
(year, weakref.WeakValueDictionary()) for year in data)
@property
def name(self):
return self._name

def __init__(self, code, name, year=None):
self.code = unicode_type(code)
self.name = unicode_type(name)
self.year = year
@property
def revision(self):
return self._revision.name

def __repr__(self):
if self.year is None:
return 'gb2260.get(%r)' % self.code
else:
return 'gb2260.get(%r, %r)' % (self.code, self.year)
message = '<Division {0} {1} rev={2}>'.format(
self.code, self.name, self.revision)
return ensure_str(message, 'utf-8')

def __str__(self):
name = 'GB2260' if self.year is None else 'GB2260-%d' % self.year
humanize_name = '/'.join(x.name for x in self.stack())
return '<%s %s %s>' % (name, self.code, humanize_name)
def __eq__(self, other):
return (self.code, self.revision) == (other.code, other.revision)

def __hash__(self):
return hash((self.__class__, self.code, self.year))

def __eq__(self, other):
if not isinstance(other, self.__class__):
return NotImplemented
return self.code == other.code and self.year == other.year

@classmethod
def get(cls, code, year=None):
"""Gets an administrative division by its code.

:param code: The division code.
:param year: The year of revision.
:returns: A :class:`gb2260.Division` object.
"""
key = int(code)
if year and year not in data:
raise ValueError('year must be in %r' % list(data))

cache = cls._identity_map[year]
store = data[year]

if key in cache:
return cache[key]
if key in store:
instance = cls(code, store[key], year)
cache[key] = instance
return instance

raise ValueError('%r is not valid division code' % code)

@classmethod
def search(cls, code):
"""Searches administrative division by its code in all revision.

:param code: The division code.
:returns: A :class:`gb2260.Division` object or ``None``.
"""
# sorts from latest to oldest, and ``None`` means latest
key = int(code)
pairs = sorted(
data.items(), reverse=True,
key=lambda pair: make_year_key(pair[0]))
for year, store in pairs:
if key in store:
return cls.get(key, year=year)
return hash((self.__class__, self._code, self._revision.name))

@property
def province(self):
return self.get(self.code[:2] + '0000', self.year)
return self._revision.get_province(self._code)

@property
def is_province(self):
return self.province == self
def prefecture(self):
return self._revision.get_prefecture(self._code)

@property
def prefecture(self):
if self.is_province:
return
return self.get(self.code[:4] + '00', self.year)
def description(self):
return self._revision.describe(self._code)

@property
def is_prefecture(self):
return self.prefecture == self
def is_province(self):
return bool(dcode.PROVINCE_CODE_PATTERN.match(self._code))

@property
def county(self):
if self.is_province or self.is_prefecture:
return
return self
def is_prefecture(self):
return bool(dcode.PREFECTURE_CODE_PATTERN.match(self._code))

@property
def is_county(self):
return self.county is not None

def stack(self):
yield self.province
if self.is_prefecture or self.is_county:
yield self.prefecture
if self.is_county:
yield self


def make_year_key(year):
"""A key generator for sorting years."""
if year is None:
return (LATEST_YEAR, 12)
year = str(year)
if len(year) == 4:
return (int(year), 12)
if len(year) == 6:
return (int(year[:4]), int(year[4:]))
raise ValueError('invalid year %s' % year)
return bool(dcode.COUNTY_CODE_PATTERN.match(self._code))
Loading