Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Make flanker compatible with python 3 #106

Closed
wants to merge 34 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
009f351
Add six to required setup packages
salehe Nov 2, 2015
628c497
Add setup classifiers to indicate python 3 compatibility
salehe Nov 3, 2015
61db717
Make EmailAddress.full_spec() always return str
salehe Nov 13, 2015
cf9d6f2
Use bytes delimiter for EmailAddress.full_spec()
salehe Nov 13, 2015
5574019
Make EmailAddress constructor always convert spec to ascii bytes
salehe Nov 13, 2015
fb8956d
Add TODO for RFC 6530 compliancy
salehe Nov 13, 2015
2ede07c
Make EmailAddress constructor always convert display_name to unicode
salehe Nov 13, 2015
501fde0
Make sure EmailAddrss.full_spec() always returns bytes
salehe Nov 13, 2015
7c7afef
Make sure EmailAddrss.to_unicode() properly converts to unicode
salehe Nov 13, 2015
5eebaeb
Make UrlAddress.full_spec() always return bytes and UrlAddress.to_uni…
salehe Nov 13, 2015
316675c
Use explicit bytes separator forjoining full_spec s
salehe Nov 13, 2015
7c954d8
Add TODO for IDN compliancy
salehe Nov 13, 2015
e648538
Make result of _normalize_address_list always of unicode type
salehe Nov 15, 2015
a26cd64
Add TODO for better handling of non-ascii urls
salehe Nov 15, 2015
0502448
Set bytes type explicitly in corrector
salehe Nov 15, 2015
dd585d9
Make smart_unquote honor input type
salehe Nov 15, 2015
dea7a11
Type smart_quote for working with bytes explicitly
salehe Nov 15, 2015
4a797cf
Make tokenizer eq checks and regex match checks str type safe
salehe Nov 17, 2015
2022ec0
Add missed ASCII_FLAG to regex
salehe Nov 17, 2015
71a8e04
Assume all strings as bytes in _mailbox_post_processing_checks
salehe Nov 17, 2015
c54f34f
Handle both bytes and unicode in _addr_spec
salehe Nov 17, 2015
30d9df0
Make cleanup_display_name and cleanup_email honor input type
salehe Nov 17, 2015
efd2008
Always convert contains_control_chars input to unicode
salehe Nov 17, 2015
a958dde
Make more adjustments to tokenizer
salehe Nov 17, 2015
24212e0
Convert domains lookup table items to bytes type
salehe Nov 17, 2015
6caa41d
Use appropriate concatenate method for display name
salehe Nov 17, 2015
c64dc7a
Use explicit bytes in preparse_address
salehe Nov 17, 2015
733150a
Use explicit bytes regex for patterns
salehe Nov 17, 2015
222e57c
Use explicit bytes in suggest_alternate
salehe Nov 17, 2015
42ba420
Convert query key to native str
salehe Nov 17, 2015
51bdba6
Use explicit bytes
salehe Nov 17, 2015
12b75f0
Use explicit bytes regex for non-unicode regex
salehe Nov 17, 2015
dbdfb65
Use bytes for all custom email validations
salehe Nov 17, 2015
5f78a6e
Modify .travis.yml to run tests on travis
salehe Nov 18, 2015
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 7 additions & 12 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
language: python
python: 2.7

env:
- TOX_ENV=py26
- TOX_ENV=py27
- TOX_ENV=pypy

install:
- pip install tox

script:
- tox -e $TOX_ENV
python:
- "2.6"
- "2.7"
- "3.4"
- "3.5"
install: "python setup.py install"
script: "nosetests -w tests"
19 changes: 12 additions & 7 deletions flanker/addresslib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
'''
import re

try:
ASCII_FLAG = re.ASCII
except AttributeError:
ASCII_FLAG = 0


from flanker.addresslib.drivers.redis_driver import RedisCache
from flanker.addresslib.drivers.dns_lookup import DNSLookup

Expand All @@ -21,16 +27,15 @@
from flanker.addresslib.plugins import hotmail
from flanker.addresslib.plugins import google


mx_cache = RedisCache()
dns_lookup = DNSLookup()

YAHOO_PATTERN = re.compile(r'''mta[0-9]+\.am[0-9]+\.yahoodns\.net$''')
GMAIL_PATTERN = re.compile(r'''.*gmail-smtp-in\.l\.google.com$''')
AOL_PATTERN = re.compile(r'''.*\.mx\.aol\.com$''')
ICLOUD_PATTERN = re.compile(r'''.*\.mail\.icloud\.com$''')
HOTMAIL_PATTERN = re.compile(r'''mx[0-9]\.hotmail\.com''')
GOOGLE_PATTERN = re.compile(r'''(.*aspmx\.l\.google\.com$)|(aspmx.*\.googlemail.com$)''', re.IGNORECASE)
YAHOO_PATTERN = re.compile(br'''mta[0-9]+\.am[0-9]+\.yahoodns\.net$''')
GMAIL_PATTERN = re.compile(br'''.*gmail-smtp-in\.l\.google.com$''')
AOL_PATTERN = re.compile(br'''.*\.mx\.aol\.com$''')
ICLOUD_PATTERN = re.compile(br'''.*\.mail\.icloud\.com$''')
HOTMAIL_PATTERN = re.compile(br'''mx[0-9]\.hotmail\.com''')
GOOGLE_PATTERN = re.compile(br'''(.*aspmx\.l\.google\.com$)|(aspmx.*\.googlemail.com$)''', re.IGNORECASE)

CUSTOM_GRAMMAR_LIST = [
(YAHOO_PATTERN, yahoo),
Expand Down
36 changes: 22 additions & 14 deletions flanker/addresslib/address.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def parse_list(address_list, strict=False, as_tuple=False, metrics=False):

# if we have a list, transform it into a string first
if isinstance(address_list, list):
address_list = ', '.join(_normalize_address_list(address_list))
address_list = u', '.join(_normalize_address_list(address_list))

# parse
try:
Expand Down Expand Up @@ -187,7 +187,7 @@ def validate_address(addr_spec, metrics=False):

# run parser against address
bstart = time.time()
paddr = parse('@'.join(addr_parts), addr_spec_only=True)
paddr = parse(b'@'.join(addr_parts), addr_spec_only=True)
mtimes['parsing'] = time.time() - bstart
if paddr is None:
return None, mtimes
Expand Down Expand Up @@ -319,6 +319,7 @@ class Type(object):
Url = 'url'


# TODO: RFC 6530 (Internationalizaion of address, etc) compliancy
class EmailAddress(Address):
"""
Represents a fully parsed email address with built-in support for MIME
Expand Down Expand Up @@ -354,17 +355,21 @@ def __init__(self, display_name, spec=None, parsed_name=None):

assert(spec)

spec = spec if isinstance(spec, str) else spec.encode('ascii')

if parsed_name:
self.display_name = smart_unquote(mime_to_unicode(parsed_name))
elif display_name:
self.display_name = display_name
else:
self.display_name = u''

parts = spec.rsplit('@', 1)
self.display_name = self.display_name if isinstance(self.display_name, unicode) else self.display_name.decode('ascii')

parts = spec.rsplit(b'@', 1)
self.mailbox = parts[0]
self.hostname = parts[1].lower()
self.address = self.mailbox + "@" + self.hostname
self.address = self.mailbox + b"@" + self.hostname
self.addr_type = self.Type.Email

def __repr__(self):
Expand Down Expand Up @@ -401,16 +406,16 @@ def full_spec(self):
if self.display_name:
encoded_display_name = smart_quote(encode_string(
None, self.display_name, maxlinelen=MAX_ADDRESS_LENGTH))
return '{0} <{1}>'.format(encoded_display_name, self.address)
return u'{0}'.format(self.address)
return b'{0} <{1}>'.format(encoded_display_name, self.address)
return b'{0}'.format(self.address)

def to_unicode(self):
"""
Converts to unicode.
"""
if self.display_name:
return u'{0} <{1}>'.format(self.display_name, self.address)
return u'{0}'.format(self.address)
return u'{0} <{1}>'.format(self.display_name, self.address.decode('ascii'))
return u'{0}'.format(self.address.decode('ascii'))

def __cmp__(self, other):
return True
Expand Down Expand Up @@ -452,6 +457,7 @@ def __hash__(self):
return hash(self.address.lower())


# TODO: Non-ASCII addresses compliancy
class UrlAddress(Address):
"""
Represents a parsed URL:
Expand Down Expand Up @@ -498,10 +504,10 @@ def __str__(self):
return self.address

def full_spec(self):
return self.address
return self.address if isinstance(self.address, bytes) else self.address.encode('idna')

def to_unicode(self):
return self.address
return self.address if isinstance(self.address, unicode) else self.address.decode('idna')

def __repr__(self):
return self.address
Expand Down Expand Up @@ -561,7 +567,7 @@ def __eq__(self, other):
return set(self.container) == set(other.container)

def __repr__(self):
return ''.join(['[', self.full_spec(), ']'])
return b''.join([b'[', self.full_spec(), b']'])

def __add__(self, other):
"""
Expand All @@ -573,12 +579,12 @@ def __add__(self, other):
result = self.container + other.container
return AddressList(result)

def full_spec(self, delimiter=", "):
def full_spec(self, delimiter=b", "):
"""
Returns a full string which looks pretty much what the original was
like
>>> adl = AddressList("Foo <[email protected]>, Bar <[email protected]>")
>>> adl.full_spec(delimiter='; ')
>>> adl.full_spec(delimiter=b'; ')
'Foo <[email protected]; Bar <[email protected]>'
"""
return delimiter.join(addr.full_spec() for addr in self.container)
Expand Down Expand Up @@ -623,7 +629,9 @@ def _normalize_address_list(address_list):
for addr in address_list:
if isinstance(addr, Address):
parts.append(addr.to_unicode())
if isinstance(addr, basestring):
elif isinstance(addr, unicode):
parts.append(addr)
elif isinstance(addr, str):
parts.append(addr.decode('ascii'))

return parts
106 changes: 53 additions & 53 deletions flanker/addresslib/corrector.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,63 +33,63 @@ def suggest(word, cutoff=0.77):

MOST_COMMON_DOMAINS = [
# mailgun :)
'mailgun.net',
b'mailgun.net',
# big esps
'yahoo.com',
'yahoo.ca',
'yahoo.co.jp',
'yahoo.co.uk',
'yahoo.com.br',
'ymail.com',
'hotmail.com',
'hotmail.ca',
'hotmail.co.uk',
'windowslive.com',
'live.com',
'outlook.com',
'msn.com',
'gmail.com',
'googlemail.com',
'aol.com',
'aim.com',
'icloud.com',
'me.com',
'mac.com',
'facebook.com',
b'yahoo.com',
b'yahoo.ca',
b'yahoo.co.jp',
b'yahoo.co.uk',
b'yahoo.com.br',
b'ymail.com',
b'hotmail.com',
b'hotmail.ca',
b'hotmail.co.uk',
b'windowslive.com',
b'live.com',
b'outlook.com',
b'msn.com',
b'gmail.com',
b'googlemail.com',
b'aol.com',
b'aim.com',
b'icloud.com',
b'me.com',
b'mac.com',
b'facebook.com',
# big isps
'comcast.net',
'sbcglobal.net',
'bellsouth.net',
'verizon.net',
'earthlink.net',
'cox.net',
'charter.net',
'shaw.ca',
'bell.net'
b'comcast.net',
b'sbcglobal.net',
b'bellsouth.net',
b'verizon.net',
b'earthlink.net',
b'cox.net',
b'charter.net',
b'shaw.ca',
b'bell.net'
]

# domains that the corrector doesn't fix that we should fix
LOOKUP_TABLE = {
u'yahoo': u'yahoo.com',
u'gmail': u'gmail.com',
u'hotmail': u'hotmail.com',
u'live': u'live.com',
u'outlook': u'outlook.com',
u'msn': u'msn.com',
u'googlemail': u'googlemail.com',
u'aol': u'aol.com',
u'aim': u'aim.com',
u'icloud': u'icloud.com',
u'me': u'me.com',
u'mac': u'mac.com',
u'facebook': u'facebook.com',
u'comcast': u'comcast.net',
u'sbcglobal': u'sbcglobal.net',
u'bellsouth': u'bellsouth.net',
u'verizon': u'verizon.net',
u'earthlink': u'earthlink.net',
u'cox': u'cox.net',
u'charter': u'charter.net',
u'shaw': u'shaw.ca',
u'bell': u'bell.net'
b'yahoo': b'yahoo.com',
b'gmail': b'gmail.com',
b'hotmail': b'hotmail.com',
b'live': b'live.com',
b'outlook': b'outlook.com',
b'msn': b'msn.com',
b'googlemail': b'googlemail.com',
b'aol': b'aol.com',
b'aim': b'aim.com',
b'icloud': b'icloud.com',
b'me': b'me.com',
b'mac': b'mac.com',
b'facebook': b'facebook.com',
b'comcast': b'comcast.net',
b'sbcglobal': b'sbcglobal.net',
b'bellsouth': b'bellsouth.net',
b'verizon': b'verizon.net',
b'earthlink': b'earthlink.net',
b'cox': b'cox.net',
b'charter': b'charter.net',
b'shaw': b'shaw.ca',
b'bell': b'bell.net'
}
3 changes: 3 additions & 0 deletions flanker/addresslib/drivers/dns_lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ def __init__(self):

def __getitem__(self, key):
try:
# dnsq accepts native python strs (bytes in python 2, unicode in python 3)
if isinstance('', unicode) and isinstance(key, str):
key = key.decode('iso-8859-1')
return dnsq.mx_hosts_for(key)
except:
return []
Expand Down
Loading