Skip to content

Commit

Permalink
v1.20.11 Performance improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
akariv committed Apr 1, 2024
1 parent b669f59 commit a25db31
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 23 deletions.
2 changes: 1 addition & 1 deletion tableschema/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.20.10
1.20.11
3 changes: 2 additions & 1 deletion tableschema/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def __init__(self, descriptor, missing_values=config.DEFAULT_MISSING_VALUES,
self.__schema = schema
self.__cast_function = self.__get_cast_function()
self.__check_functions = self.__get_check_functions()
self.__preserve_missing_values = os.environ.get('TABLESCHEMA_PRESERVE_MISSING_VALUES')

@cached_property
def schema(self):
Expand Down Expand Up @@ -155,7 +156,7 @@ def cast_value(self, value, constraints=True):
# Null value
if value in self.__missing_values:
# Whether missing_values should be preserved without being cast
if os.environ.get('TABLESCHEMA_PRESERVE_MISSING_VALUES'):
if self.__preserve_missing_values:
return value
value = None

Expand Down
4 changes: 2 additions & 2 deletions tableschema/types/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def cast_integer(format, value, **options):

elif isinstance(value, six.string_types):
if not options.get('bareNumber', _DEFAULT_BARE_NUMBER):
value = re.sub(r'((^\D*)|(\D*$))', '', value)
value = _RE_BARE_NUMBER.sub('', value)

try:
value = int(value)
Expand All @@ -41,5 +41,5 @@ def cast_integer(format, value, **options):


# Internal

_RE_BARE_NUMBER = re.compile(r'((^\D*)|(\D*$))')
_DEFAULT_BARE_NUMBER = True
49 changes: 30 additions & 19 deletions tableschema/types/number.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,31 +13,42 @@
# Module API

def cast_number(format, value, **options):
group_char = options.get('groupChar', _DEFAULT_GROUP_CHAR)
decimal_char = options.get('decimalChar', _DEFAULT_DECIMAL_CHAR)
if not isinstance(value, Decimal):
if isinstance(value, six.string_types):
value = re.sub(r'\s', '', value)
value = value.replace(decimal_char, '__decimal_char__')
if isinstance(value, six.string_types):
group_char = options.get('groupChar', _DEFAULT_GROUP_CHAR)
decimal_char = options.get('decimalChar', _DEFAULT_DECIMAL_CHAR)
value = _RE_WHITESPACE.sub('', value)
if decimal_char != '.':
if group_char:
value = value.replace(decimal_char, '__decimal_char__')
value = value.replace(group_char, '')
value = value.replace('__decimal_char__', '.')
else:
value = value.replace(decimal_char, '__decimal_char__')
value = value.replace('__decimal_char__', '.')
elif group_char:
value = value.replace(group_char, '')
value = value.replace('__decimal_char__', '.')
if not options.get('bareNumber', _DEFAULT_BARE_NUMBER):
value = re.sub(r'((^\D*)|(\D*$))', '', value)
elif not isinstance(value, six.integer_types + (float,)):
return ERROR
elif value is True or value is False:
return ERROR
try:
if isinstance(value, float):
value = str(value)
value = Decimal(value)
except Exception:
return ERROR

if not options.get('bareNumber', _DEFAULT_BARE_NUMBER):
value = _RE_BARE_NUMBER.sub('', value)
elif isinstance(value, Decimal):
return value
elif not isinstance(value, six.integer_types + (float,)):
return ERROR
elif value is True or value is False:
return ERROR
else:
value = str(value)
try:
value = Decimal(value)
except Exception:
return ERROR
return value


# Internal

_RE_WHITESPACE = re.compile(r'\s')
_RE_BARE_NUMBER = re.compile(r'((^\D*)|(\D*$))')
_DEFAULT_GROUP_CHAR = ''
_DEFAULT_DECIMAL_CHAR = '.'
_DEFAULT_BARE_NUMBER = True
3 changes: 3 additions & 0 deletions tableschema/types/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
def cast_string(format, value, **options):
if not isinstance(value, six.string_types):
return ERROR
if format in _SIMPLE_FORMATS:
return value
if format == 'uri':
uri = _uri_from_string(value)
try:
Expand All @@ -43,6 +45,7 @@ def cast_string(format, value, **options):

# Internal

_SIMPLE_FORMATS = {'default', None}
_EMAIL_PATTERN = re.compile(r'[^@]+@[^@]+\.[^@]+')
_uri_from_string = rfc3986.uri.URIReference.from_string
_uri_validator = rfc3986.validators.Validator().require_presence_of('scheme')

0 comments on commit a25db31

Please sign in to comment.