Skip to content

Commit

Permalink
refactor benchmarks, split into warmup/runtime/total, tokenize sample…
Browse files Browse the repository at this point in the history
…s before running
  • Loading branch information
lapp0 committed Oct 17, 2024
1 parent 77fdf8f commit a5adbe4
Show file tree
Hide file tree
Showing 3 changed files with 205 additions and 163 deletions.
117 changes: 78 additions & 39 deletions src/benchmark_lfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,61 +8,100 @@
from .data import json_cases, models, regex_cases


class LMFormatEnforcerRegex:
class LMFormatEnforcerBenchmark:
def do_setup(self, model, samples):
self.tokenizer = AutoTokenizer.from_pretrained(
model, clean_up_tokenization_spaces=True
)
self.tokenizer_data = build_token_enforcer_tokenizer_data(self.tokenizer)
self.all_tokenized_samples = [
self.tokenizer.encode(sample) for sample in samples
]

def _exhaust_samples(self, token_enforcer):
for sample_tokens in self.all_tokenized_samples:
for i in range(len(sample_tokens)):
_ = token_enforcer.get_allowed_tokens(sample_tokens[: i + 1])

def _get_first_token(self, token_enforcer):
"""Get first token to verify lazy index is fully warmed up"""
_ = token_enforcer.get_allowed_tokens(self.all_tokenized_samples[0][:1])

def teardown(self, *args):
del self.tokenizer_data


class LMFormatEnforcerRegex(LMFormatEnforcerBenchmark):
params = [models, regex_cases.keys()]
param_names = ["model", "regex_name"]
timeout = 1200

def setup(self, model, _):
"""Set up the benchmark.
def setup(self, model, regex_name):
samples = regex_cases[regex_name]["samples"]
self.do_setup(model, samples)

We convert the tokenizer during set up as this only
needs to be done once for a given model.
def _get_enforcer(self, regex_name):
pattern = regex_cases[regex_name]["regex"]
parser = RegexParser(pattern)
return TokenEnforcer(self.tokenizer_data, parser)

"""
self.tokenizer = AutoTokenizer.from_pretrained(
model, clean_up_tokenization_spaces=True
)
def time_lfe_total(self, _, regex_name):
enforcer = self._get_enforcer(regex_name)
self._exhaust_samples(enforcer)

def time_lfe(self, _, regex_name):
regex_string = regex_cases[regex_name]["regex"]
regex_samples = regex_cases[regex_name]["samples"]
def time_lfe_first_token(self, _, regex_name):
enforcer = self._get_enforcer(regex_name)
self._get_first_token(enforcer)

parser = RegexParser(regex_string)
tokenizer_data = build_token_enforcer_tokenizer_data(self.tokenizer)
token_enforcer = TokenEnforcer(tokenizer_data, parser)

for regex_sample in regex_samples:
regex_sample_tokens = self.tokenizer.encode(regex_sample)
for i in range(len(regex_sample_tokens)):
_ = token_enforcer.get_allowed_tokens(regex_sample_tokens[: i + 1])
class LMFormatEnforcerRegexRunTime(LMFormatEnforcerRegex):
"""Class which warms-up enforcer in setup steps"""

def setup(self, model, regex_name):
samples = regex_cases[regex_name]["samples"]
self.do_setup(model, samples)

class LMFormatEnforcerJsonSchema:
# ensure warmed up so we're only measuring runtime
self.enforcer = self._get_enforcer(regex_name)
self._get_first_token(self.enforcer)

def time_lfe_runtime(self, *args):
self._exhaust_samples(self.enforcer)


class LMFormatEnforcerJsonSchema(LMFormatEnforcerBenchmark):
params = [models, json_cases.keys()]
param_names = ["model", "json_schema_name"]
timeout = 1200
timeout = 600

def setup(self, model, _):
"""Set up the benchmark.
def setup(self, model, json_schema_name):
samples = json_cases[json_schema_name]["samples"]
self.do_setup(model, samples)

We convert the tokenizer during set up as this only
needs to be done once for a given model.
def _get_enforcer(self, json_schema_name):
schema = json_cases[json_schema_name]["schema"]
parser = JsonSchemaParser(schema)
return TokenEnforcer(self.tokenizer_data, parser)

"""
self.tokenizer = AutoTokenizer.from_pretrained(
model, clean_up_tokenization_spaces=True
)
def time_lfe_total(self, _, json_schema_name):
enforcer = self._get_enforcer(json_schema_name)
self._exhaust_samples(enforcer)

def time_lfe_first_token(self, _, json_schema_name):
enforcer = self._get_enforcer(json_schema_name)
self._get_first_token(enforcer)


class LMFormatEnforcerJsonSchemaRunTime(LMFormatEnforcerJsonSchema):
"""Class which warms-up enforcer in setup steps"""

def time_lfe(self, _, json_schema_name):
json_string = json_cases[json_schema_name]["schema"]
json_samples = json_cases[json_schema_name]["samples"]
def setup(self, model, json_schema_name):
samples = json_cases[json_schema_name]["samples"]
self.do_setup(model, samples)

parser = JsonSchemaParser(json_string)
tokenizer_data = build_token_enforcer_tokenizer_data(self.tokenizer)
token_enforcer = TokenEnforcer(tokenizer_data, parser)
# ensure warmed up so we're only measuring runtime
self.enforcer = self._get_enforcer(json_schema_name)
self._get_first_token(self.enforcer)

for json_sample in json_samples:
json_sample_tokens = self.tokenizer.encode(json_sample)
for i in range(len(json_sample_tokens)):
_ = token_enforcer.get_allowed_tokens(json_sample_tokens[: i + 1])
def time_lfe_runtime(self, *args):
self._exhaust_samples(self.enforcer)
136 changes: 84 additions & 52 deletions src/benchmark_outlines.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json

import outlines.caching as caching
import torch
from outlines.fsm.guide import RegexGuide
from outlines.fsm.json_schema import build_regex_from_schema
from outlines.models.transformers import TransformerTokenizer
Expand All @@ -10,12 +11,10 @@
from .data import json_cases, models, regex_cases


class OutlinesRegex:
params = [models, regex_cases.keys()]
param_names = ["model", "regex_name"]
timeout = 1200
class OutlinesBenchmark:
guide_class = RegexGuide

def setup(self, model, _):
def do_setup(self, model, samples):
"""Set up the benchmark.
We JIT-compile Numba functions and convert the vocabulary
Expand All @@ -26,71 +25,104 @@ def setup(self, model, _):
model, clean_up_tokenization_spaces=True
)
self.tokenizer = TransformerTokenizer(self.tokenizer)
RegexGuide("a", self.tokenizer) # JIT-compile and convert the vocabulary

def time_outlines(self, _, regex_name):
"""Measure generation time with Outlines.
self.guide_class("a", self.tokenizer) # JIT-compile and convert the vocabulary

self.all_tokenized_samples = [
self.tokenizer.encode(sample)[0][0] for sample in samples
]

def _exhaust_samples(self, guide):
state = guide.initial_state
for sample_tokens in self.all_tokenized_samples:
for token in sample_tokens:
if isinstance(token, torch.Tensor):
token = token.item()
state = guide.get_next_state(state, token)
_ = guide.get_next_instruction(state)

Outlines' generation time is split between compiling an index for each
regular expression, and walking this index while generating tokens.
def _get_first_token(self, guide):
"""Get first token to verify lazy index is fully warmed up"""
state = guide.get_next_state(
guide.initial_state, self.all_tokenized_samples[0][0]
)
_ = guide.get_next_instruction(state)

"""
def teardown(self, *args):
caching.clear_cache()


class OutlinesRegex(OutlinesBenchmark):
params = [models, regex_cases.keys()]
param_names = ["model", "regex_name"]
timeout = 1200

def setup(self, model, regex_name):
samples = regex_cases[regex_name]["samples"]
self.do_setup(model, samples)

def time_outlines_total(self, _, regex_name):
regex_string = regex_cases[regex_name]["regex"]
regex_samples = regex_cases[regex_name]["samples"]
guide = self.guide_class(regex_string, self.tokenizer)
self._exhaust_samples(guide)

guide = RegexGuide(regex_string, self.tokenizer)
def time_outlines_first_token(self, _, regex_name):
regex_string = regex_cases[regex_name]["regex"]
guide = self.guide_class(regex_string, self.tokenizer)
self._get_first_token(guide)

for regex_sample in regex_samples:
regex_sample_tokens = self.tokenizer.encode(regex_sample)[0][0]
state = guide.initial_state
for token in regex_sample_tokens:
_ = guide.get_next_instruction(state)
state = guide.get_next_state(state, token)

def teardown(self, *args):
caching.clear_cache()
class OutlinesRegexRunTime(OutlinesRegex):
"""Class which warms-up Guide in setup steps"""

def setup(self, model, regex_name):
samples = regex_cases[regex_name]["samples"]
self.do_setup(model, samples)

class OutlinesJsonSchema:
params = [models, json_cases.keys()]
param_names = ["model", "json_schema_name"]
# ensure warmed up so we're only measuring runtime
regex_string = regex_cases[regex_name]["regex"]
self.guide = self.guide_class(regex_string, self.tokenizer)
self._get_first_token(self.guide)

timeout = 1200
def time_outlines_runtime(self, *args):
self._exhaust_samples(self.guide)

def setup(self, model, _):
"""Set up the benchmark.

We JIT-compile Numba functions and convert the vocabulary
during set up as this only need to be ever done once.
class OutlinesJsonSchema(OutlinesBenchmark):
json_from_regex_fn = lambda self, schema: build_regex_from_schema(schema)

"""
self.tokenizer = AutoTokenizer.from_pretrained(
model, clean_up_tokenization_spaces=True
)
self.tokenizer = TransformerTokenizer(self.tokenizer)
RegexGuide("a", self.tokenizer) # JIT-compile and convert the vocabulary
params = [models, json_cases.keys()]
param_names = ["model", "json_schema_name"]
timeout = 1200

def time_outlines(self, _, json_schema_name):
"""Measure generation time with Outlines.
def setup(self, model, json_schema_name):
samples = json_cases[json_schema_name]["samples"]
self.do_setup(model, samples)

Outlines' generation time is split between compiling an index for each
regular expression, and walking this index while generating tokens.
def time_outlines_total(self, _, json_schema_name):
json_string = json_cases[json_schema_name]["schema"]
regex_string = self.json_from_regex_fn(json.dumps(json_string))
guide = self.guide_class(regex_string, self.tokenizer)
self._exhaust_samples(guide)

"""
def time_outlines_first_token(self, _, json_schema_name):
json_string = json_cases[json_schema_name]["schema"]
json_samples = json_cases[json_schema_name]["samples"]
regex_string = self.json_from_regex_fn(json.dumps(json_string))
guide = self.guide_class(regex_string, self.tokenizer)
self._get_first_token(guide)

regex_string = build_regex_from_schema(json.dumps(json_string))
guide = RegexGuide(regex_string, self.tokenizer)

for json_sample in json_samples:
json_sample_tokens = self.tokenizer.encode(json_sample)[0][0]
state = guide.initial_state
for token in json_sample_tokens:
_ = guide.get_next_instruction(state)
state = guide.get_next_state(state, token)
class OutlinesJsonSchemaRunTime(OutlinesJsonSchema):
"""Class which warms-up Guide in setup steps"""

def teardown(self, *args):
caching.clear_cache()
def setup(self, model, json_schema_name):
samples = json_cases[json_schema_name]["samples"]
self.do_setup(model, samples)

# ensure warmed up so we're only measuring runtime
json_string = json_cases[json_schema_name]["schema"]
regex_string = self.json_from_regex_fn(json.dumps(json_string))
self.guide = self.guide_class(regex_string, self.tokenizer)
self._get_first_token(self.guide)

def time_outlines_runtime(self, *args):
self._exhaust_samples(self.guide)
Loading

0 comments on commit a5adbe4

Please sign in to comment.