Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle unspecified + qualified EDTF strings #58

Merged
merged 3 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions edtf/appsettings.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,13 @@
PADDING_MONTH_PRECISION = EDTF.get("PADDING_MONTH_PRECISION", relativedelta(months=1))
PADDING_YEAR_PRECISION = EDTF.get("PADDING_YEAR_PRECISION", relativedelta(years=1))
PADDING_SEASON_PRECISION = EDTF.get("PADDING_SEASON_PRECISION", relativedelta(weeks=12))
PADDING_DECADE_PRECISION = EDTF.get("PADDING_DECADE_PRECISION", relativedelta(years=10))
PADDING_CENTURY_PRECISION = EDTF.get(
"PADDING_CENTURY_PRECISION", relativedelta(years=100)
)
PADDING_MILLENNIUM_PRECISION = EDTF.get(
"PADDING_MILLENNIUM_PRECISION", relativedelta(years=1000)
)
MULTIPLIER_IF_UNCERTAIN = EDTF.get("MULTIPLIER_IF_UNCERTAIN", 1.0)
MULTIPLIER_IF_APPROXIMATE = EDTF.get("MULTIPLIER_IF_APPROXIMATE", 1.0)
MULTIPLIER_IF_BOTH = EDTF.get("MULTIPLIER_IF_BOTH", 2.0)
Expand Down
8 changes: 5 additions & 3 deletions edtf/parser/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,17 +161,19 @@ def f(toks):
Level1Interval.set_parser(level1Interval)

# (* *** unspecified *** *)
yearWithOneOrTwoUnspecifedDigits = Combine(digit + digit + (digit ^ "X") + "X")("year")
yearWithOneOrTwoOrThreeUnspecifedDigits = Combine(
Optional("-") + digit + (digit ^ "X") + (digit ^ "X") + "X"
)("year")
monthUnspecified = year + "-" + L("XX")("month")
dayUnspecified = yearMonth + "-" + L("XX")("day")
dayAndMonthUnspecified = year + "-" + L("XX")("month") + "-" + L("XX")("day")

unspecified = (
yearWithOneOrTwoUnspecifedDigits
yearWithOneOrTwoOrThreeUnspecifedDigits
^ monthUnspecified
^ dayUnspecified
^ dayAndMonthUnspecified
)
) + Optional(UASymbol)("ua")
Unspecified.set_parser(unspecified)

# (* *** uncertainOrApproxDate *** *)
Expand Down
143 changes: 142 additions & 1 deletion edtf/parser/parser_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,148 @@ def precision(self):


class Unspecified(Date):
pass
def __init__(
self,
year=None,
month=None,
day=None,
significant_digits=None,
ua=None,
**kwargs,
):
super().__init__(
year=year,
month=month,
day=day,
significant_digits=significant_digits,
**kwargs,
)
self.ua = ua
self.negative = self.year.startswith("-")

def __str__(self):
base = super().__str__()
if self.ua:
base += str(self.ua)
return base

def _get_fuzzy_padding(self, lean):
if not self.ua:
return relativedelta()
multiplier = self.ua._get_multiplier()
padding = relativedelta()

if self.year:
years_padding = self._years_padding(multiplier)
padding += years_padding
if self.month:
padding += relativedelta(
months=int(multiplier * appsettings.PADDING_MONTH_PRECISION.months)
)
if self.day:
padding += relativedelta(
days=int(multiplier * appsettings.PADDING_DAY_PRECISION.days)
)
return padding

def _years_padding(self, multiplier):
"""Calculate year padding based on the precision."""
precision_settings = {
PRECISION_MILLENIUM: appsettings.PADDING_MILLENNIUM_PRECISION.years,
PRECISION_CENTURY: appsettings.PADDING_CENTURY_PRECISION.years,
PRECISION_DECADE: appsettings.PADDING_DECADE_PRECISION.years,
PRECISION_YEAR: appsettings.PADDING_YEAR_PRECISION.years,
}
years = precision_settings.get(self.precision, 0)
return relativedelta(years=int(multiplier * years))

def lower_fuzzy(self):
strict_val = (
self.lower_strict()
) # negative handled in the lower_strict() override
adjusted = apply_delta(sub, strict_val, self._get_fuzzy_padding(EARLIEST))
return adjusted

def upper_fuzzy(self):
strict_val = (
self.upper_strict()
) # negative handled in the upper_strict() override

adjusted = apply_delta(add, strict_val, self._get_fuzzy_padding(LATEST))
return adjusted

def lower_strict(self):
if self.negative:
strict_val = self._strict_date(
lean=LATEST
) # gets the year right, but need to adjust day and month
if self.precision in (
PRECISION_YEAR,
PRECISION_DECADE,
PRECISION_CENTURY,
PRECISION_MILLENIUM,
):
return struct_time(
(strict_val.tm_year, 1, 1)
+ tuple(TIME_EMPTY_TIME)
+ tuple(TIME_EMPTY_EXTRAS)
)
elif self.precision == PRECISION_MONTH:
return struct_time(
(strict_val.tm_year, strict_val.tm_mon, 1)
+ tuple(TIME_EMPTY_TIME)
+ tuple(TIME_EMPTY_EXTRAS)
)
else:
return strict_val
else:
return self._strict_date(lean=EARLIEST)

def upper_strict(self):
if self.negative:
strict_val = self._strict_date(lean=EARLIEST)
if self.precision in (
PRECISION_YEAR,
PRECISION_DECADE,
PRECISION_CENTURY,
PRECISION_MILLENIUM,
):
return struct_time(
(strict_val.tm_year, 12, 31)
+ tuple(TIME_EMPTY_TIME)
+ tuple(TIME_EMPTY_EXTRAS)
)
elif self.precision == PRECISION_MONTH:
days_in_month = calendar.monthrange(
strict_val.tm_year, strict_val.tm_mon
)[1]
return struct_time(
(strict_val.tm_year, strict_val.tm_mon, days_in_month)
+ tuple(TIME_EMPTY_TIME)
+ tuple(TIME_EMPTY_EXTRAS)
)
else:
return strict_val
else:
return self._strict_date(lean=LATEST)

@property
def precision(self):
if self.day:
return PRECISION_DAY
if self.month:
return PRECISION_MONTH
if self.year:
year_no_symbol = self.year.lstrip("-")
if year_no_symbol.isdigit():
return PRECISION_YEAR
if len(year_no_symbol) == 4 and year_no_symbol.endswith("XXX"):
return PRECISION_MILLENIUM
if len(year_no_symbol) == 4 and year_no_symbol.endswith("XX"):
return PRECISION_CENTURY
if len(year_no_symbol) == 4 and year_no_symbol.endswith("X"):
return PRECISION_DECADE
raise ValueError(f"Unspecified date {self} has no precision")


class Level1Interval(Interval):
Expand Down
10 changes: 10 additions & 0 deletions edtf/parser/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,20 @@
("1999-01-XX", ("1999-01-01", "1999-01-31")),
# some day in 1999
("1999-XX-XX", ("1999-01-01", "1999-12-31")),
# negative unspecified year
("-01XX", ("-0199-01-01", "-0100-12-31")),
# Uncertain/Approximate lower boundary dates (BCE)
("-0275~", ("-0275-01-01", "-0275-12-31", "-0276-01-01", "-0274-12-31")),
("-0001~", ("-0001-01-01", "-0001-12-31", "-0002-01-01", "0000-12-31")),
("0000~", ("0000-01-01", "0000-12-31", "-0001-01-01", "0001-12-31")),
# Unspecified and qualified
# "circa 17th century"
("16XX~", ("1600-01-01", "1699-12-31", "1500-01-01", "1799-12-31")),
("16XX%", ("1600-01-01", "1699-12-31", "1400-01-01", "1899-12-31")),
("1XXX", ("1000-01-01", "1999-12-31")),
("1XXX~", ("1000-01-01", "1999-12-31", "0000-01-01", "2999-12-31")),
("156X~", ("1560-01-01", "1569-12-31", "1550-01-01", "1579-12-31")),
("-01XX~", ("-0199-01-01", "-0100-12-31", "-0299-01-01", "0000-12-31")),
# L1 Extended Interval
# beginning unknown, end 2006
# for intervals with an unknown beginning or end, the unknown bound is calculated with the constant DELTA_IF_UNKNOWN (10 years)
Expand Down
Loading