Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 100 Samples Per Regex / JSON Schema #35

Merged
merged 3 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .github/workflows/asv_benchmarks_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
git fetch upstream main

# Run benchmarks, allow errors, they will be caught in the next step
asv continuous upstream/main HEAD \
asv continuous --quick upstream/main HEAD \
--no-stats --interleave-rounds -a repeat=3 || true

- name: BENCHMARK RESULTS
Expand All @@ -52,3 +52,9 @@ jobs:
echo "Performance degradation detected!"
exit 1
fi

- name: Upload Benchmark Results Folder
uses: actions/upload-artifact@v3
with:
name: benchmark-results
path: ${{ env.BENCHMARKS_OUTPUT }}
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ authors = [{"name" = "The Outlines developers"}]
description = "A benchmarking suite for structured generation libraries."
requires-python = ">=3.10"
dependencies = [
"lm-format-enforcer==0.10.6",
"lm-format-enforcer==0.10.7",
"outlines==0.0.46",
"outlines-core==0.1.0",
"outlines-core",
"torch==2.4.0",
"transformers==4.44.0",
]
131 changes: 92 additions & 39 deletions src/benchmark_lfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,57 +8,110 @@
from .data import json_cases, models, regex_cases


class LMFormatEnforcerRegex:
params = [models, regex_cases]
param_names = ["model", "regex"]
timeout = 600

def setup(self, model, _):
"""Set up the benchmark.

We convert the tokenizer during set up as this only
needs to be done once for a given model.

"""
class LMFormatEnforcerBenchmark:
def do_setup(self, model, samples):
self.tokenizer = AutoTokenizer.from_pretrained(
model, clean_up_tokenization_spaces=True
)
self.tokenizer_data = build_token_enforcer_tokenizer_data(self.tokenizer)
self.all_tokenized_samples = [
self.tokenizer.encode(sample) for sample in samples
]

def _exhaust_samples(self, token_enforcer):
for sample_tokens in self.all_tokenized_samples:
for i in range(len(sample_tokens)):
_ = token_enforcer.get_allowed_tokens(sample_tokens[: i + 1])

def _get_first_token(self, token_enforcer):
"""Get first token to verify lazy index is fully warmed up"""
_ = token_enforcer.get_allowed_tokens(self.all_tokenized_samples[0][:1])


class LMFormatEnforcerRegex(LMFormatEnforcerBenchmark):
params = [models, regex_cases.keys()]
param_names = ["model", "regex_name"]
timeout = 1200

def setup(self, model, regex_name):
samples = regex_cases[regex_name]["samples"]
self.do_setup(model, samples)

def _get_enforcer(self, regex_name):
pattern = regex_cases[regex_name]["regex"]
parser = RegexParser(pattern)
tokenizer_data = build_token_enforcer_tokenizer_data(self.tokenizer)
return TokenEnforcer(tokenizer_data, parser)

def time_lfe_total(self, _, regex_name):
enforcer = self._get_enforcer(regex_name)
self._exhaust_samples(enforcer)

def time_lfe_first_token(self, _, regex_name):
enforcer = self._get_enforcer(regex_name)
self._get_first_token(enforcer)


def time_lfe(self, _, regex):
regex_string, regex_example = regex["regex"], regex["example"]
regex_example_tokens = self.tokenizer.encode(regex_example)
class LMFormatEnforcerRegexRunTime(LMFormatEnforcerBenchmark):
"""Class which warms-up enforcer in setup steps"""

parser = RegexParser(regex_string)
token_enforcer = TokenEnforcer(self.tokenizer_data, parser)
_get_enforcer = LMFormatEnforcerRegex._get_enforcer

for i in range(len(regex_example_tokens)):
_ = token_enforcer.get_allowed_tokens(regex_example_tokens[: i + 1])
params = [models, regex_cases.keys()]
param_names = ["model", "regex_name"]
timeout = 1200

def setup(self, model, regex_name):
samples = regex_cases[regex_name]["samples"]
self.do_setup(model, samples)

class LMFormatEnforcerJsonSchema:
params = [models, json_cases]
param_names = ["model", "json"]
# ensure warmed up so we're only measuring runtime
self.enforcer = self._get_enforcer(regex_name)
self._get_first_token(self.enforcer)

def time_lfe_runtime(self, *args):
self._exhaust_samples(self.enforcer)


class LMFormatEnforcerJsonSchema(LMFormatEnforcerBenchmark):
params = [models, json_cases.keys()]
param_names = ["model", "json_schema_name"]
timeout = 600

def setup(self, model, _):
"""Set up the benchmark.
def setup(self, model, json_schema_name):
samples = json_cases[json_schema_name]["samples"]
self.do_setup(model, samples)

We convert the tokenizer during set up as this only
needs to be done once for a given model.
def _get_enforcer(self, json_schema_name):
schema = json_cases[json_schema_name]["schema"]
parser = JsonSchemaParser(schema)
tokenizer_data = build_token_enforcer_tokenizer_data(self.tokenizer)
return TokenEnforcer(tokenizer_data, parser)

"""
self.tokenizer = AutoTokenizer.from_pretrained(
model, clean_up_tokenization_spaces=True
)
self.tokenizer_data = build_token_enforcer_tokenizer_data(self.tokenizer)
def time_lfe_total(self, _, json_schema_name):
enforcer = self._get_enforcer(json_schema_name)
self._exhaust_samples(enforcer)

def time_lfe_first_token(self, _, json_schema_name):
enforcer = self._get_enforcer(json_schema_name)
self._get_first_token(enforcer)


class LMFormatEnforcerJsonSchemaRunTime(LMFormatEnforcerBenchmark):
"""Class which warms-up enforcer in setup steps"""

_get_enforcer = LMFormatEnforcerJsonSchema._get_enforcer

params = [models, json_cases.keys()]
param_names = ["model", "json_schema_name"]
timeout = 600

def time_lfe(self, _, json):
json_string, json_example = json["schema"], json["example"]
json_example_tokens = self.tokenizer.encode(json_example)
def setup(self, model, json_schema_name):
samples = json_cases[json_schema_name]["samples"]
self.do_setup(model, samples)

parser = JsonSchemaParser(json_string)
token_enforcer = TokenEnforcer(self.tokenizer_data, parser)
# ensure warmed up so we're only measuring runtime
self.enforcer = self._get_enforcer(json_schema_name)
self._get_first_token(self.enforcer)

for i in range(len(json_example_tokens)):
_ = token_enforcer.get_allowed_tokens(json_example_tokens[: i + 1])
def time_lfe_runtime(self, *args):
self._exhaust_samples(self.enforcer)
144 changes: 99 additions & 45 deletions src/benchmark_outlines.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json

import outlines.caching as caching
import torch
from outlines.fsm.guide import RegexGuide
from outlines.fsm.json_schema import build_regex_from_schema
from outlines.models.transformers import TransformerTokenizer
Expand All @@ -10,12 +11,10 @@
from .data import json_cases, models, regex_cases


class OutlinesRegex:
params = [models, regex_cases]
param_names = ["model", "regex"]
timeout = 1200
class OutlinesBenchmark:
guide_class = RegexGuide

def setup(self, model, _):
def do_setup(self, model, samples):
"""Set up the benchmark.

We JIT-compile Numba functions and convert the vocabulary
Expand All @@ -26,59 +25,114 @@ def setup(self, model, _):
model, clean_up_tokenization_spaces=True
)
self.tokenizer = TransformerTokenizer(self.tokenizer)
RegexGuide("a", self.tokenizer) # JIT-compile and convert the vocabulary
self.guide_class("a", self.tokenizer) # JIT-compile and convert the vocabulary

self.all_tokenized_samples = [
self.tokenizer.encode(sample)[0][0] for sample in samples
]

def _exhaust_samples(self, guide):
state = guide.initial_state
for sample_tokens in self.all_tokenized_samples:
for token in sample_tokens:
if isinstance(token, torch.Tensor):
token = token.item()
state = guide.get_next_state(state, token)
_ = guide.get_next_instruction(state)

def _get_first_token(self, guide):
"""Get first token to verify lazy index is fully warmed up"""
state = guide.get_next_state(
guide.initial_state, self.all_tokenized_samples[0][0]
)
_ = guide.get_next_instruction(state)

def time_outlines(self, _, regex):
"""Measure generation time with Outlines.
def teardown(self, *args):
caching.clear_cache()

Outlines' generation time is split between compiling an index for each
regular expression, and walking this index while generating tokens.

"""
caching.clear_cache()
class OutlinesRegex(OutlinesBenchmark):
params = [models, regex_cases.keys()]
param_names = ["model", "regex_name"]
timeout = 1200

def setup(self, model, regex_name):
samples = regex_cases[regex_name]["samples"]
self.do_setup(model, samples)

regex_string, regex_example = regex["regex"], regex["example"]
regex_example_tokens = self.tokenizer.encode(regex_example)[0][0]
guide = RegexGuide(regex_string, self.tokenizer)
def time_outlines_total(self, _, regex_name):
regex_string = regex_cases[regex_name]["regex"]
guide = self.guide_class(regex_string, self.tokenizer)
self._exhaust_samples(guide)

state = 0
for token in regex_example_tokens:
_ = guide.get_next_instruction(state)
state = guide.get_next_state(state, token)
def time_outlines_first_token(self, _, regex_name):
regex_string = regex_cases[regex_name]["regex"]
guide = self.guide_class(regex_string, self.tokenizer)
self._get_first_token(guide)


class OutlinesJsonSchema:
params = [models, json_cases]
param_names = ["model", "json"]
class OutlinesRegexRunTime(OutlinesBenchmark):
"""Class which warms-up Guide in setup steps"""

params = [models, regex_cases.keys()]
param_names = ["model", "regex_name"]
timeout = 1200

def setup(self, model, _):
"""Set up the benchmark.
def setup(self, model, regex_name):
samples = regex_cases[regex_name]["samples"]
self.do_setup(model, samples)

We JIT-compile Numba functions and convert the vocabulary
during set up as this only need to be ever done once.
# ensure warmed up so we're only measuring runtime
regex_string = regex_cases[regex_name]["regex"]
self.guide = self.guide_class(regex_string, self.tokenizer)
self._get_first_token(self.guide)

"""
self.tokenizer = AutoTokenizer.from_pretrained(
model, clean_up_tokenization_spaces=True
)
self.tokenizer = TransformerTokenizer(self.tokenizer)
RegexGuide("a", self.tokenizer) # JIT-compile and convert the vocabulary
def time_outlines_runtime(self, *args):
self._exhaust_samples(self.guide)

def time_outlines(self, _, json_case):
"""Measure generation time with Outlines.

Outlines' generation time is split between compiling an index for each
regular expression, and walking this index while generating tokens.
class OutlinesJsonSchema(OutlinesBenchmark):
json_from_regex_fn = lambda self, schema: build_regex_from_schema(schema)

"""
json_string, json_example = json_case["schema"], json_case["example"]
json_example_tokens = self.tokenizer.encode(json_example)[0][0]
params = [models, json_cases.keys()]
param_names = ["model", "json_schema_name"]
timeout = 1200

def setup(self, model, json_schema_name):
samples = json_cases[json_schema_name]["samples"]
self.do_setup(model, samples)

def time_outlines_total(self, _, json_schema_name):
json_string = json_cases[json_schema_name]["schema"]
regex_string = self.json_from_regex_fn(json.dumps(json_string))
guide = self.guide_class(regex_string, self.tokenizer)
self._exhaust_samples(guide)

def time_outlines_first_token(self, _, json_schema_name):
json_string = json_cases[json_schema_name]["schema"]
regex_string = self.json_from_regex_fn(json.dumps(json_string))
guide = self.guide_class(regex_string, self.tokenizer)
self._get_first_token(guide)


class OutlinesJsonSchemaRunTime(OutlinesBenchmark):
"""Class which warms-up Guide in setup steps"""

json_from_regex_fn = lambda self, schema: build_regex_from_schema(schema)

params = [models, json_cases.keys()]
param_names = ["model", "json_schema_name"]
timeout = 1200

def setup(self, model, json_schema_name):
samples = json_cases[json_schema_name]["samples"]
self.do_setup(model, samples)

regex_string = build_regex_from_schema(json.dumps(json_string))
guide = RegexGuide(regex_string, self.tokenizer)
# ensure warmed up so we're only measuring runtime
json_string = json_cases[json_schema_name]["schema"]
regex_string = self.json_from_regex_fn(json.dumps(json_string))
self.guide = self.guide_class(regex_string, self.tokenizer)
self._get_first_token(self.guide)

state = 0
for token in json_example_tokens:
_ = guide.get_next_instruction(state)
state = guide.get_next_state(state, token)
def time_outlines_runtime(self, *args):
self._exhaust_samples(self.guide)
Loading
Loading