Skip to content

Commit

Permalink
tmp - move to_regex back to DSL.py
Browse files Browse the repository at this point in the history
  • Loading branch information
rlouf committed Feb 18, 2025
1 parent bff8058 commit 029e917
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 57 deletions.
2 changes: 1 addition & 1 deletion docs/reference/regex_dsl.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ literal = String("hello") # Internally represents "hello"
digit = Regex(r"[0-9]+") # Internally represents the pattern [0-9]+

# Converting to standard regex strings:
from outlines.types.regex import to_regex
from outlines.types.dsl import to_regex

print(to_regex(literal)) # Output: hello
print(to_regex(digit)) # Output: [0-9]+
Expand Down
2 changes: 1 addition & 1 deletion outlines/types/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from enum import Enum

from . import airports, countries, locale
from .dsl import Regex, json_schema, one_or_more, optional, regex, repeat, zero_or_more, times
from outlines.types.dsl import Regex, json_schema, one_or_more, optional, regex, repeat, zero_or_more, times

# Python types
integer = Regex(r"[+-]?(0|[1-9][0-9]*)")
Expand Down
46 changes: 41 additions & 5 deletions outlines/types/dsl.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pydantic import BaseModel, GetCoreSchemaHandler, GetJsonSchemaHandler
from pydantic.json_schema import JsonSchemaValue
from pydantic_core import core_schema as cs
from outlines_core.fsm.json_schema import build_regex_from_schema


class Term:
Expand Down Expand Up @@ -68,13 +69,10 @@ def __get_pydantic_core_schema__(
def __get_pydantic_json_schema__(
self, core_schema: cs.CoreSchema, handler: GetJsonSchemaHandler
) -> JsonSchemaValue:
from outlines.types.regex import to_regex

return {"type": "string", "pattern": to_regex(self)}

def validate(self, value: str) -> str:
from outlines.types.regex import to_regex

pattern = to_regex(self)
compiled = re.compile(pattern)
if not compiled.fullmatch(str(value)):
Expand All @@ -90,8 +88,6 @@ def matches(self, value: str) -> bool:
be defined with a regular expression.
"""
from outlines.types.regex import to_regex

pattern = to_regex(self)
compiled = re.compile(pattern)
if compiled.fullmatch(str(value)):
Expand Down Expand Up @@ -357,3 +353,43 @@ def regex(pattern: str):

def json_schema(schema: Union[str, dict, type[BaseModel]]):
return JsonSchema(schema)


def to_regex(term: Term) -> str:
"""Convert a term to a regular expression.
We only consider self-contained terms that do not refer to another rule.
"""
match term:
case String():
return re.escape(term.value)
case Regex():
return f"({term.pattern})"
case JsonSchema():
regex_str = build_regex_from_schema(term.schema)
return f"({regex_str})"
case KleeneStar():
return f"({to_regex(term.term)})*"
case KleenePlus():
return f"({to_regex(term.term)})+"
case Optional():
return f"({to_regex(term.term)})?"
case Alternatives():
regexes = [to_regex(subterm) for subterm in term.terms]
return f"({'|'.join(regexes)})"
case Sequence():
regexes = [to_regex(subterm) for subterm in term.terms]
return f"{''.join(regexes)}"
case QuantifyExact():
return f"({to_regex(term.term)}){{{term.count}}}"
case QuantifyMinimum():
return f"({to_regex(term.term)}){{{term.min_count},}}"
case QuantifyMaximum():
return f"({to_regex(term.term)}){{,{term.max_count}}}"
case QuantifyBetween():
return f"({to_regex(term.term)}){{{term.min_count},{term.max_count}}}"
case _:
raise TypeError(
f"Cannot convert object {repr(term)} to a regular expression."
)
48 changes: 0 additions & 48 deletions outlines/types/regex.py

This file was deleted.

3 changes: 1 addition & 2 deletions tests/types/test_to_regex.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import pytest


from outlines.types.regex import to_regex
from outlines.types.dsl import String, Regex, JsonSchema, KleeneStar, KleenePlus, QuantifyBetween, QuantifyExact, QuantifyMaximum, QuantifyMinimum, Sequence, Alternatives, Optional, Term
from outlines.types.dsl import String, Regex, JsonSchema, KleeneStar, KleenePlus, QuantifyBetween, QuantifyExact, QuantifyMaximum, QuantifyMinimum, Sequence, Alternatives, Optional, Term, to_regex


def test_to_regex_simple():
Expand Down

0 comments on commit 029e917

Please sign in to comment.