Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make datetime separate datatype #407

Closed
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 67 additions & 1 deletion guardrails/datatypes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import logging
import re
import warnings
from dataclasses import dataclass
from types import SimpleNamespace
@@ -267,6 +268,9 @@ def from_str(self, s: str) -> Optional[datetime.date]:
"""Create a Date from a string."""
if s is None:
return None
# Handle basic ISO 8601 week dates
if re.match(r"\d{4}-W\d{2}-\d", s):
return datetime.datetime.strptime(s, "%G-W%V-%u")
if not self.date_format:
return parse(s).date()
return datetime.datetime.strptime(s, self.date_format).date()
@@ -285,7 +289,7 @@ def from_xml(cls, element: ET._Element, strict: bool = False) -> "Date":
class Time(ScalarType):
"""Element tag: `<time>`

To configure the date format, create a date-format attribute on the
To configure the time format, create a time-format attribute on the
element. E.g. `<time name="..." ... time-format="%H:%M:%S" />`
"""

@@ -319,6 +323,68 @@ def from_xml(cls, element: ET._Element, strict: bool = False) -> "Time":
return datatype


@register_type("datetime")
class DateTime(ScalarType):
"""Element tag: `<datetime>`

To configure the datetime format, create a datetime-format attribute on the
element. E.g. `<datetime name="..." ... datetime-format="%Y-%m-%d %H:%M:%S.%f" />`

DateTime represents a specific moment in time, combining a date (year,
month, day) and precise time (hours, minutes, seconds, microseconds) into a
single timestamp. It is generally preferred over using a separate Date or
Time datatype due to its comprehensiveness.

This datatype supports a wide variety of datetime formats, including ISO 8601,
RFC 2822, American and European styles, verbose formats, and also Unix Epoch
timestamps with seconds and milliseconds. However, some ambiguous formats may
not be parsed correctly (see dateutil and datetime documentation for supported
formats).
"""

tag = "datetime"

def __init__(
self,
children: Dict[str, Any],
format_attr: "FormatAttr",
optional: bool,
name: Optional[str],
description: Optional[str],
) -> None:
super().__init__(children, format_attr, optional, name, description)
self.datetime_format = None

def from_str(self, s: str) -> Optional[datetime.datetime]:
"""Create a DateTime from a string."""
if s is None:
return None
# Handle Epoch/Unix string with seconds
try:
epoch_s = float(s)
return datetime.datetime.utcfromtimestamp(epoch_s)
except ValueError:
pass
# Handle Epoch/Unix string with milliseconds
try:
epoch_ms = float(s) / 1000.0
return datetime.datetime.utcfromtimestamp(epoch_ms)
except ValueError:
pass
if not self.datetime_format:
return parse(s)
return datetime.datetime.strptime(s, self.datetime_format)

@classmethod
def from_xml(cls, element: ET._Element, strict: bool = False) -> "DataType":
datatype = super().from_xml(element, strict)

if "datetime-format" in element.attrib or "datetime_format" in element.attrib:
datatype.datetime_format = element.attrib["datetime-format"]

return datatype


@deprecate_type
@register_type("email")
class Email(ScalarType):
2 changes: 2 additions & 0 deletions guardrails/utils/json_utils.py
Original file line number Diff line number Diff line change
@@ -11,6 +11,7 @@
Choice,
DataType,
Date,
DateTime,
Email,
Float,
Integer,
@@ -52,6 +53,7 @@ def verify(
ListDataType: list,
Date: str,
Time: str,
DateTime: str,
}

ignore_types = [
1 change: 1 addition & 0 deletions tests/integration_tests/test_datatypes.py
Original file line number Diff line number Diff line change
@@ -30,6 +30,7 @@ def test_passed_date_format():
("2021-01-01"), # standard date
("2021-01-01T11:10:00+01:00"), # Cohere-style
("2023-10-03T14:18:38.476Z"), # ISO
("2023-W01-1"), # ISO 8601 week date (start of year)
],
)
def test_defaulted_date_parser(date_string: str):
126 changes: 126 additions & 0 deletions tests/integration_tests/test_datatypes_datetime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import pytest

from guardrails.guard import Guard


def test_passed_datetime_format():
rail_spec = """
<rail version="0.1">
<output>
<string name="name"/>
<datetime name="dob" datetime-format="%Y-%m-%d %H:%M:%S.%f"/>
</output>
<prompt>
Dummy prompt.
</prompt>
</rail>
"""

guard = Guard.from_rail_string(rail_spec)
guard.parse(
llm_output='{"name": "John Doe", "dob": "2023-10-23 11:06:32.498099"}',
num_reasks=0,
)


@pytest.mark.parametrize(
"datetime_string",
[
# ISO 8601 and similar formats
"2023-10-20T15:30:00+05:00", # ISO format with timezone
"2023-10-20T15:30:00.123456", # ISO format with milliseconds
"2023-10-20T15:30:59.123", # ISO format with seconds and milliseconds
"2023-10-20T15:30:59.999999", # ISO format with seconds and microseconds
"2023-03-01T13:45:30", # ISO format without timezone
"2023-03-01 13:45:30+02:00", # Datetime with UTC offset
"2023-03-01 13:45:30 UTC+2", # Datetime with UTC offset and UTC string
"2023-03-01 13:45:30 EST", # Datetime with timezone abbreviation
"2023-03-12T01:45:00+05:00", # ISO 8601 combined date and time with separator
"20230312T014500+0500", # Compact ISO 8601 format without colons
# Formats with various separators
"2023-10-20 15:30:00.123456", # Datetime with milliseconds
"2023-10-20 15:30:59.123", # Datetime with seconds and milliseconds
"2023 March 12 01:45:00 +05:00", # Full month name with time and offset
"2023-03-12 01:45:00 +05:00", # Standard datetime with offset
# Various separators and comma as decimal point
"2023-10-20T15:30:59,123", # ISO format with seconds and milliseconds
"2023-10-20 15:30:59,123", # Datetime with seconds and milliseconds
"2023-10-20T15:30:00,999", # ISO format with milliseconds
"2023-10-20 15:30:00,123", # Datetime with milliseconds
"2023-10-20T15:30:59,999999", # ISO format with seconds and microseconds
# Date and time formats with day names
"Sun, 12 Mar 2023 01:45:00 +0000", # RFC 822/2822 format with day name
"2023, Oct 20th 15:30", # Year, abbreviated month name with time
"2023 March 12 Sunday 01:45:00 +05:00", # Wordy format with day name
"2023 AD March 12 Sunday 01:45:00 +05:00", # With era AD
"2023 AD Mar 12 Sun 01:45:00 +05:00", # Short month and day names with era AD
"2023 AD 03 12 Sun 01:45:00 +05:00", # Numeric month with day name and era AD
# Wordy formats and special cases
"12th of March, 2023 01:45:00", # 'of' and 'th' suffix, with time
"12th December 2022 14:15:29.123456", # ordinal suffix, full month name
"12-Dec-2022 14:15:29.999999", # ordinal suffix, month abbreviation, year, time
"12 December 2022 14:15:29,999", # Full date with time and comma separator
"12/Dec/2022 14:15:29.123", # Day/MonthAbbreviation/Year
"12-Dec-2023 13:45:00", # Short year with time
"20230312", # Compact date format without separators
"2023 AD 03 12th Sun 01:45:00 +05:00", # Numeric month with 'th', day, era AD
# Unix/Epoch strings
"1696343743", # Unix timestamp/seconds
"1677649200", # Epoch timestamp for a specific date
"1672531199.5", # Epoch timestamp with fractional seconds
"1609459200123", # Epoch timestamp with milliseconds seconds
"1672531199.123456", # Epoch timestamp with precision time
"0", # Epoch timestamp (start of Unix time)
],
)
def test_defaulted_datetime_parser(datetime_string: str):

rail_spec = """
<rail version="0.1">
<output>
<string name="name"/>
<datetime name="dob"/>
</output>
<prompt>
Dummy prompt.
</prompt>
</rail>
"""

guard = Guard.from_rail_string(rail_spec)
# This should not raise an exception
guard.parse(
llm_output='{"name": "John Doe", "dob": "' + datetime_string + '"}',
num_reasks=0,
)


@pytest.mark.parametrize(
"datetime_string",
[
"3rd Thursday in November 2023", # Informal format
"2023T03T12T01T45T00+05:00", # Malformed ISO 8601 with extra 'T' separators
"2023 CE 03 12th Sun 01:45:00 +05:00", # CE era
"2023 CE March 12th Sun 01:45:00 +05:00", # CE era
"12 März 2023", # German month name
],
)
def test_defaulted_datetime_parser_unsupported_values(datetime_string: str):
rail_spec = """
<rail version="0.1">
<output>
<string name="name"/>
<datetime name="dob"/>
</output>
<prompt>
Dummy prompt.
</prompt>
</rail>
"""
guard = Guard.from_rail_string(rail_spec)
# this should always raise either a ValueError or an OverflowError
with pytest.raises((ValueError, OverflowError)):
guard.parse(
llm_output='{"name": "John Doe", "dob": "' + datetime_string + '"}',
num_reasks=0,
)