From e40c7fd362dbc029841d98e3d60bb5231c3555cd Mon Sep 17 00:00:00 2001 From: Andrew Lapp Date: Mon, 22 Jul 2024 16:24:16 -0500 Subject: [PATCH] Restructure documentation, use phi-3-mini, include structured generation example, add feature matrix to models --- .../images/logits_processing_diagram.svg | 157 +++++++++++++ docs/quickstart.md | 214 +++--------------- docs/reference/{ => generation}/cfg.md | 0 docs/reference/{ => generation}/choices.md | 0 .../{ => generation}/custom_fsm_ops.md | 0 docs/reference/{ => generation}/format.md | 0 docs/reference/generation/generation.md | 210 +++++++++++++++++ docs/reference/{ => generation}/json.md | 3 +- docs/reference/{ => generation}/regex.md | 0 .../structured_generation_explanation.md | 55 +++++ docs/reference/{ => generation}/types.md | 0 docs/reference/json_mode.md | 17 -- docs/reference/models/models.md | 93 ++++++++ docs/reference/models/transformers_vision.md | 1 + mkdocs.yml | 25 +- requirements-doc.txt | 1 + 16 files changed, 564 insertions(+), 212 deletions(-) create mode 100644 docs/assets/images/logits_processing_diagram.svg rename docs/reference/{ => generation}/cfg.md (100%) rename docs/reference/{ => generation}/choices.md (100%) rename docs/reference/{ => generation}/custom_fsm_ops.md (100%) rename docs/reference/{ => generation}/format.md (100%) create mode 100644 docs/reference/generation/generation.md rename docs/reference/{ => generation}/json.md (98%) rename docs/reference/{ => generation}/regex.md (100%) create mode 100644 docs/reference/generation/structured_generation_explanation.md rename docs/reference/{ => generation}/types.md (100%) delete mode 100644 docs/reference/json_mode.md create mode 100644 docs/reference/models/models.md diff --git a/docs/assets/images/logits_processing_diagram.svg b/docs/assets/images/logits_processing_diagram.svg new file mode 100644 index 000000000..49d4d3af8 --- /dev/null +++ b/docs/assets/images/logits_processing_diagram.svg @@ -0,0 +1,157 @@ + + + + + + +%3 + + + +inputTokens + +Input Tokens + +7 + +4 + +8 + + + +TransformerDecoder + +Transformer Decoder Pass + + + +inputTokens->TransformerDecoder + + + + + +logitsTable + +Model Output Logits + +Token + +Probability + ++ + +3% + +foo + +4% + +. + +7% + +1 + +11% + +2 + +13% + +3 + +17% + + + +TransformerDecoder->logitsTable + + + + + +OutlinesLogitsProcessor + +Outlines Regex Logits Processor + + + +logitsTable->OutlinesLogitsProcessor + + + + + +logitsProcessorTable + +Processed Logits + +Token + +Probability + ++ + +0% + +number + +0% + +. + +7% + +1 + +11% + +2 + +13% + +3 + +17% + + + +OutlinesLogitsProcessor->logitsProcessorTable + + + + + +sampler + +Sampler + + + +logitsProcessorTable->sampler + + + + + +sampledTokenTable + +Sampled Token + +Token + +3 + + + +sampler->sampledTokenTable + + + + + diff --git a/docs/quickstart.md b/docs/quickstart.md index 7d4e86fbc..fcb524d8a 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -6,6 +6,7 @@ title: Quickstart After [installing Outlines](installation.md), the fastest way to get to up to speed with the library is to get acquainted with its few core elements. We advise you to take a quick look at this page to see everything Outlines has to offer before diving in the [documentation](reference/index.md). + ## Core elements ### Models @@ -15,88 +16,57 @@ The first step when writing a program with Outlines is to initialize a model. We ```python import outlines -model = outlines.models.transformers("mistralai/Mistral-7B-Instruct-v0.2") +model = outlines.models.transformers( + "microsoft/Phi-3-mini-4k-instruct", + device="cuda" # optional device argument, default is cpu +) ``` -### Text generator - -Once the model is initialized you can build a text generator. This generator can be called with a prompt directly, or you can use the `stream` method to generate text token by token: - -=== "Generate" - - ```python - import outlines +Outlines supports a wide variety of inference engines and model weight types. More details on different models can be found in the [Outlines Models](./reference/models/models.md) documentation page. - model = outlines.models.transformers("mistralai/Mistral-7B-Instruct-v0.2") - generator = outlines.generate.text(model) +### Generation - result = generator("What's 2+2?", max_tokens=100) +Once the model is initialized you can build an `outlines.generate` generator. This generator can be called with a prompt directly. - print(result) - # That's right, it's 4! But remember, a delicious and nutrient dense 4, - # according to YEARS BUILT ON SOLID SCIENCE. This column presents additional - # findings from the fifteen-year study that produced the 2+2=4 conclusion. - ``` +([Outlines Structured Generation Full Documentation](./reference/generation/generation.md)) -=== "Stream" +=== "Text" ```python - import outlines - - model = outlines.models.transformers("mistralai/Mistral-7B-Instruct-v0.2") generator = outlines.generate.text(model) + result = generator("Question: What's 2+2? Answer:", max_tokens=100) + print(result) + # The answer is 4 + + # Outlines also supports streaming output stream = generator.stream("What's 2+2?", max_tokens=4) for i in range(5): token = next(stream) - print(token) - # ['Is'] - # [' this'] - # [' even'] - # [' a'] - # [' question'] + print(repr(token)) + # '2' + # '+' + # '2' + # ' equals' + # '4' ``` -### Multi-label classification - -Outlines allows you to do multi-label classification by guiding the model so it can only output either of the specified choices: - -```python -import outlines - -model = outlines.models.transformers("mistralai/Mistral-7B-Instruct-v0.2") -generator = outlines.generate.choice(model, ["Blue", "Red", "Yellow"]) - -color = generator("What is the closest color to Indigo? ") -print(color) -# Blue -``` +=== "Structured" -### JSON-structured generation + Along with typical language model generation behavior via, `outlines.generate.text`, Outlines supports structured generation, which guarantees the tokens generated by the model will follow a predefined structure. Structures can be defined by a regex pattern, JSON schema, python object type, or a Lark grammar defining a parsable language such as SQL or Python. -Outlines can guide models so that they output valid JSON **100%** of the time. You can either specify the structure using [Pydantic][pydantic]{:target="_blank"} or a string that contains a [JSON Schema][jsonschema]{:target="_blank"}: - -=== "Pydantic" + Example: using pydantic to enforce a JSON schema ```python from enum import Enum from pydantic import BaseModel, constr, conint - import outlines - - class Armor(str, Enum): - leather = "leather" - chainmail = "chainmail" - plate = "plate" - - class Character(BaseModel): name: constr(max_length=10) age: conint(gt=18, lt=99) - armor: Armor + armor: (Enum('Armor', {'leather': 'leather', 'chainmail': 'chainmail', 'plate': 'plate'})) strength: conint(gt=1, lt=100) - model = outlines.models.transformers("mistralai/Mistral-7B-Instruct-v0.2") generator = outlines.generate.json(model, Character) character = generator( @@ -104,135 +74,10 @@ Outlines can guide models so that they output valid JSON **100%** of the time. Y + "name, age (between 1 and 99), armor and strength. " ) print(character) - # name='Orla' age=21 armor= strength=8 - ``` - -=== "JSON Schema" - - ```python - import outlines - - schema = """{ - "$defs": { - "Armor": { - "enum": ["leather", "chainmail", "plate"], - "title": "Armor", - "type": "string" - } - }, - "properties": { - "name": {"maxLength": 10, "title": "Name", "type": "string"}, - "age": {"title": "Age", "type": "integer"}, - "armor": {"$ref": "#/$defs/Armor"}, - "strength": {"title": "Strength", "type": "integer"}\ - }, - "required": ["name", "age", "armor", "strength"], - "title": "Character", - "type": "object" - }""" - - model = outlines.models.transformers("mistralai/Mistral-7B-Instruct-v0.2") - generator = outlines.generate.json(model, schema) - character = generator( - "Generate a new character for my awesome game: " - + "name, age (between 1 and 99), armor and strength. " - ) - print(character) - # {'name': 'Yuki', 'age': 24, 'armor': 'plate', 'strength': 3} - ``` - -!!! Note - - We advise you to constrain the length of the strings fields when first testing your schema, especially with small models. + # Character(name='Zara', age=25, armor=, strength=85) + ``` -### Grammar-structured generation - -Outlines also allows to generate text that is valid to any [context-free grammar][cfg]{:target="_blank"} (CFG) in the [EBNF format][ebnf]{:target="_blank"}. Grammars can be intimidating, but they are a very powerful tool! Indeed, they determine the syntax of every programming language, valid chess moves, molecule structure, can help with procedural graphics generation, etc. - -Here we show a simple example of a grammar that defines arithmetic operations: - -```python -from outlines import models, generate - -arithmetic_grammar = """ - ?start: sum - - ?sum: product - | sum "+" product -> add - | sum "-" product -> sub - - ?product: atom - | product "*" atom -> mul - | product "/" atom -> div - - ?atom: NUMBER -> number - | "-" atom -> neg - | "(" sum ")" - - %import common.NUMBER - %import common.WS_INLINE - - %ignore WS_INLINE -""" - -model = models.transformers("mistralai/Mistral-7B-Instruct-v0.2") -generator = generate.cfg(model, arithmetic_grammar, max_tokens=100) - -result = generator("Question: How can you write 5*5 using addition?\nAnswer:") -print(result) -# 5+5+5+5+5 -``` - - -EBNF grammars can be cumbersome to write. This is why Outlines provides grammar definitions in the `outlines.grammars.` module - -```python -from outlines import models, generate, grammars - -model = models.transformers("mistralai/Mistral-7B-Instruct-v0.2") -generator = generate.cfg(model, grammars.arithmetic, max_tokens=100) - -result = generator("Question: How can you write 5*5 using addition?\nAnswer:") -print(result) -# 5+5+5+5+5 -``` - -The available grammars are listed [here](https://github.com/outlines-dev/outlines/tree/main/outlines/grammars). - - -### Regex-structured generation - -Slightly simpler, but no less useful, Outlines can generate text that is in the language of a [regular expression](https://www.regular-expressions.info/tutorial.html). For instance to force the model to generate IP addresses: - -```python -from outlines import models, generate - -model = models.transformers("mistralai/Mistral-7B-Instruct-v0.2") - -regex_str = r"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)" -generator = generate.regex(model, regex_str) - -result = generator("What is the IP address of localhost?\nIP: ") -print(result) -# 127.0.0.100 -``` - -### Generate a given Python type - -We provide a shortcut to regex-structured generation for simple use cases. Pass a Python type to the `outlines.generate.format` function and the LLM will output text that matches this type: - -```python -from outlines import models, generate - -model = models.transformers("mistralai/Mistral-7B-Instruct-v0.2") -generator = generate.format(model, int) - -result = generator("What is 2+2?") -print(result) -# 4 -``` - -## Deploy using vLLM and FastAPI +## [Deploy using vLLM and FastAPI](./reference/serve/vllm.md) Outlines can be deployed as a LLM service using [vLLM][vllm]{:target="_blank"} and [FastAPI][fastapi]{:target="_blank"}. The server supports asynchronous processing of incoming requests, and benefits from the performance of vLLM. @@ -265,7 +110,7 @@ Or use the [requests][requests]{:target="_blank"} library from another python pr ## Utilities -### Prompt templates +### [Prompt templates](./reference/prompting.md) Prompting can lead to messy code. Outlines' prompt functions are python functions that contain a template for the prompt in their docstring. We use a powerful templating language to allow you to loop over lists, dictionaries, add conditionals, etc. directly from the prompt. When called, a prompt function returns the rendered template: @@ -368,6 +213,7 @@ Once you are done experimenting with a prompt and an output structure, it is use ``` It make it easier for the community to collaborate on the infinite number of use cases enabled by these models! + ## Going further If you need more inspiration you can take a look at the [cookbook](cookbook/index.md). If you have any question, or requests for documentation please reach out to us on [GitHub](https://github.com/outlines-dev/outlines/discussions), [Twitter](https://twitter.com/remilouf) or [Discord](https://discord.gg/UppQmhEpe8). diff --git a/docs/reference/cfg.md b/docs/reference/generation/cfg.md similarity index 100% rename from docs/reference/cfg.md rename to docs/reference/generation/cfg.md diff --git a/docs/reference/choices.md b/docs/reference/generation/choices.md similarity index 100% rename from docs/reference/choices.md rename to docs/reference/generation/choices.md diff --git a/docs/reference/custom_fsm_ops.md b/docs/reference/generation/custom_fsm_ops.md similarity index 100% rename from docs/reference/custom_fsm_ops.md rename to docs/reference/generation/custom_fsm_ops.md diff --git a/docs/reference/format.md b/docs/reference/generation/format.md similarity index 100% rename from docs/reference/format.md rename to docs/reference/generation/format.md diff --git a/docs/reference/generation/generation.md b/docs/reference/generation/generation.md new file mode 100644 index 000000000..c74cf5593 --- /dev/null +++ b/docs/reference/generation/generation.md @@ -0,0 +1,210 @@ +--- +title: Generation +--- + +# Generation + +Once an [`outlines.model`](../models) is constructed you can use `outlines.generate` to generate text. Standard LLM generation is possible via `outlines.generate.text`, along with a variety of structured generation methods described below. (For a detailed technical explanation of how structured generation works, you may review the [Structured Generation Explanation](./structured_generation_explanation.md) page) + +Before generating text, you must construct an `outlines.model`. Example: + +```python +import outlines + +model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct", device="cuda") +``` +### Text generator + +```python +generator = outlines.generate.text(model) + +result = generator("Question: What's 2+2? Answer:", max_tokens=100) +print(result) +# The answer is 4 + +# Outlines also supports streaming output +stream = generator.stream("What's 2+2?", max_tokens=4) +for i in range(5): + token = next(stream) + print(repr(token)) +# '2' +# '+' +# '2' +# ' equals' +# '4' +``` + +### [Multi-label classification](./choices.md) + +Outlines allows you to do multi-label classification by guiding the model so it can only output either of the specified choices: + +```python +import outlines + +model = outlines.models.transformers("microsoft/Phi-3-mini-128k-instruct") +generator = outlines.generate.choice(model, ["Blue", "Red", "Yellow"]) + +color = generator("What is the closest color to Indigo? ") +print(color) +# Blue +``` + +### [JSON-structured generation](./json.md) + +Outlines can guide models so that they output valid JSON **100%** of the time. You can either specify the structure using [Pydantic][pydantic]{:target="_blank"} or a string that contains a [JSON Schema][jsonschema]{:target="_blank"}: + +=== "Pydantic" + + ```python + from enum import Enum + from pydantic import BaseModel, constr, conint + + import outlines + + class Armor(str, Enum): + leather = "leather" + chainmail = "chainmail" + plate = "plate" + + + class Character(BaseModel): + name: constr(max_length=10) + age: conint(gt=18, lt=99) + armor: Armor + strength: conint(gt=1, lt=100) + + model = outlines.models.transformers("microsoft/Phi-3-mini-128k-instruct") + generator = outlines.generate.json(model, Character) + + character = generator( + "Generate a new character for my awesome game: " + + "name, age (between 1 and 99), armor and strength. " + ) + print(character) + # name='Orla' age=21 armor= strength=8 + ``` + +=== "JSON Schema" + + ```python + import outlines + + schema = """{ + "$defs": { + "Armor": { + "enum": ["leather", "chainmail", "plate"], + "title": "Armor", + "type": "string" + } + }, + "properties": { + "name": {"maxLength": 10, "title": "Name", "type": "string"}, + "age": {"title": "Age", "type": "integer"}, + "armor": {"$ref": "#/$defs/Armor"}, + "strength": {"title": "Strength", "type": "integer"}\ + }, + "required": ["name", "age", "armor", "strength"], + "title": "Character", + "type": "object" + }""" + + model = outlines.models.transformers("microsoft/Phi-3-mini-128k-instruct") + generator = outlines.generate.json(model, schema) + character = generator( + "Generate a new character for my awesome game: " + + "name, age (between 1 and 99), armor and strength. " + ) + print(character) + # {'name': 'Yuki', 'age': 24, 'armor': 'plate', 'strength': 3} + ``` + +!!! Note + + We advise you to constrain the length of the strings fields when first testing your schema, especially with small models. + +### [Grammar-structured generation](./cfg.md) + +Outlines also allows to generate text that is valid to any [context-free grammar][cfg]{:target="_blank"} (CFG) in the [EBNF format][ebnf]{:target="_blank"}. Grammars can be intimidating, but they are a very powerful tool! Indeed, they determine the syntax of every programming language, valid chess moves, molecule structure, can help with procedural graphics generation, etc. + +Here we show a simple example of a grammar that defines arithmetic operations: + +```python +from outlines import models, generate + +arithmetic_grammar = """ + ?start: sum + + ?sum: product + | sum "+" product -> add + | sum "-" product -> sub + + ?product: atom + | product "*" atom -> mul + | product "/" atom -> div + + ?atom: NUMBER -> number + | "-" atom -> neg + | "(" sum ")" + + %import common.NUMBER + %import common.WS_INLINE + + %ignore WS_INLINE +""" + +model = models.transformers("microsoft/Phi-3-mini-128k-instruct") +generator = generate.cfg(model, arithmetic_grammar, max_tokens=100) + +result = generator("Question: How can you write 5*5 using addition?\nAnswer:") +print(result) +# 5+5+5+5+5 +``` + + +EBNF grammars can be cumbersome to write. This is why Outlines provides grammar definitions in the `outlines.grammars.` module + +```python +from outlines import models, generate, grammars + +model = models.transformers("microsoft/Phi-3-mini-128k-instruct") +generator = generate.cfg(model, grammars.arithmetic, max_tokens=100) + +result = generator("Question: How can you write 5*5 using addition?\nAnswer:") +print(result) +# 5+5+5+5+5 +``` + +The available grammars are listed [here](https://github.com/outlines-dev/outlines/tree/main/outlines/grammars). + + +### [Regex-structured generation](./regex.md) + +Slightly simpler, but no less useful, Outlines can generate text that is in the language of a [regular expression](https://www.regular-expressions.info/tutorial.html). For instance to force the model to generate IP addresses: + +```python +from outlines import models, generate + +model = models.transformers("microsoft/Phi-3-mini-128k-instruct") + +regex_str = r"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)" +generator = generate.regex(model, regex_str) + +result = generator("What is the IP address of localhost?\nIP: ") +print(result) +# 127.0.0.100 +``` + +### [Generate a given Python type](./types.md) + +We provide a shortcut to regex-structured generation for simple use cases. Pass a Python type to the `outlines.generate.format` function and the LLM will output text that matches this type: + +```python +from outlines import models, generate + +model = models.transformers("microsoft/Phi-3-mini-128k-instruct") +generator = generate.format(model, int) + +result = generator("What is 2+2?") +print(result) +# 4 +``` diff --git a/docs/reference/json.md b/docs/reference/generation/json.md similarity index 98% rename from docs/reference/json.md rename to docs/reference/generation/json.md index 85e1a846a..a342395bd 100644 --- a/docs/reference/json.md +++ b/docs/reference/generation/json.md @@ -70,7 +70,8 @@ schema = """ "name": {"type": "string"}, "last_name": {"type": "string"}, "id": {"type": "integer"} - } + }, + "required": ["name", "last_name", "id"] } """ diff --git a/docs/reference/regex.md b/docs/reference/generation/regex.md similarity index 100% rename from docs/reference/regex.md rename to docs/reference/generation/regex.md diff --git a/docs/reference/generation/structured_generation_explanation.md b/docs/reference/generation/structured_generation_explanation.md new file mode 100644 index 000000000..8212a231e --- /dev/null +++ b/docs/reference/generation/structured_generation_explanation.md @@ -0,0 +1,55 @@ +--- +title: Structured Generation Explanation +--- + +# Structured Generation Explanation + + +Language models generate text token by token, using the previous token sequence as input and sampled logits as output. This document explains the structured generation process, where only legal tokens are considered for the next step based on a predefined automata, e.g. a regex-defined [finite-state machine](https://en.wikipedia.org/wiki/Finite-state_machine) (FSM) or [Lark](https://lark-parser.readthedocs.io/en/stable/) grammar.` + + +## Worked Example + +Let's consider a worked example with a pattern for whole and decimal numbers: `^\d*(\.\d+)?$`. + +### Creating Automata + +The pattern is first converted into an automata. Below is a brief explanation of the automata conversion and its representation. + +**Automata Diagram:** + +```mermaid +graph LR + node0("1-9") --> node1("1-9") + node1 --> node1 + node1 --> nodeEND{{END}} + node1 --> nodePeriod(".") + nodePeriod --> node2("1-9") + node2 --> node2 + node2 --> nodeEND{{END}} +``` + +### Generating a Token + +Let's assume that we're in the middle of generation, and so far "748" has been generated. Here is the automata with the current state highlighted in green, with the legal next characters being another number (1-9), a dot (.), or end of sequence. + +```mermaid +graph LR + node0("1-9") --> node1("1-9") + node1 --> node1 + node1 --> nodeEND{{END}} + node1 --> nodePeriod(".") + nodePeriod --> node2("1-9") + node2 --> node2 + node2 --> nodeEND{{END}} + + style node1 fill:#090 +``` + +Generating a token requires the following steps: +- Feed the previous input sequence ("748") into the language model. +- Language model runs a forward pass and produces token logits. +- Outlines logits processor sets the probability of illegal tokens to 0%. +- A token is sampled from the set of legal tokens. + +![Generation and Logits Processing Flow Chart](../../assets/images/logits_processing_diagram.svg) diff --git a/docs/reference/types.md b/docs/reference/generation/types.md similarity index 100% rename from docs/reference/types.md rename to docs/reference/generation/types.md diff --git a/docs/reference/json_mode.md b/docs/reference/json_mode.md deleted file mode 100644 index bb1c1c7a8..000000000 --- a/docs/reference/json_mode.md +++ /dev/null @@ -1,17 +0,0 @@ -# JSON mode - -Outlines can guarantee that the LLM will generate valid JSON, using [Grammar-structured generation](cfg.md): - -```python -from outlines import models, generate - -json_grammar = outlines.grammars.json - -model = models.transformers("mistralai/Mistral-7b-v0.1") -generator = generate.cfg(model, json_grammar) -sequence = generator("Generate valid JSON") -``` - -!!! Note "JSON that follows a schema" - - If you want to guarantee that the generated JSON follows a given schema, consult [this section](json.md) instead. diff --git a/docs/reference/models/models.md b/docs/reference/models/models.md new file mode 100644 index 000000000..dadfd34ad --- /dev/null +++ b/docs/reference/models/models.md @@ -0,0 +1,93 @@ +--- +title: Models +--- + +# Models + +Outlines supports generation using a number of inference engines (`outlines.models`) + +Loading a model using outlines follows a similar interface between inference engines. + +```python +import outlines +``` + +## [Transformers](./transformers.md) + +```python +model = outlines.models.transformers("microsoft/Phi-3-mini-128k-instruct", model_kwargs={}) +``` + +For additional arguments and use of other Huggingface Transformers model types see [Outlines' Transformers documentation](./transformers.md). + + +## [Transformers Vision](./transformers_vision.md) + +```python +model = outlines.models.transformers_vision("llava-hf/llava-v1.6-mistral-7b-hf") +``` + +For examples of generation and other details, see [Outlines' Transformers Vision documentation](./transformers_vision.md). + +## [vLLM](./vllm.md) + +```python +model = outlines.models.vllm("microsoft/Phi-3-mini-128k-instruct") +``` + +## [llama.cpp](./llamacpp.md) + +```python +model = outlines.models.llamacpp("microsoft/Phi-3-mini-4k-instruct-gguf", "Phi-3-mini-4k-instruct-q4.gguf") +``` + +Additional llama.cpp parameters can be found in the [Outlines' llama.cpp documentation](./llamacpp.md). + +## [ExLlamaV2](./exllamav2.md) + +```python +model = outlines.models.exllamav2("bartowski/Phi-3-mini-128k-instruct-exl2") +``` + +## [MLXLM](./mlxlmx.md) + +```python +model = outlines.models.mlxlm("mlx-community/Phi-3-mini-4k-instruct-4bit") +``` + +## [OpenAI](./openai.md) + +```python +model = outlines.models.openai( + "gpt-4o-mini", + api_key=os.environ["OPENAI_API_KEY"] +) +``` + + +# Feature Matrix +| | Transformers | Transformers Vision | vLLM | llama.cpp | ExLlamaV2 | MLXLM | OpenAI* | +|-------------------|--------------|---------------------|------|-----------|-----------|-------|---------| +| **Device** | | | | | | | | +| Cuda | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | N/A | +| Apple Silicon | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | N/A | +| x86 / AMD64 | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | N/A | +| **Sampling** | | | | | | | | +| Greedy | ✅ | ✅ | ✅ | ✅* | ✅ | ✅ | ❌ | +| Multinomial | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| Multiple Samples | ✅ | ✅ | | ❌ | | ❌ | ✅ | +| Beam Search | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | +| **Generation** | | | | | | | | +| Batch | ✅ | ✅ | ✅ | ❌ | ? | ❌ | ❌ | +| Stream | ✅ | ❌ | ❌ | ✅ | ? | ✅ | ❌ | +| **`outlines.generate`** | | | | | | | | +| Text | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| Structured* | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | + + +## Caveats + +- OpenAI doesn't support structured generation due to limitations in their API and server implementation. +- `outlines.generate` ["Structured"](../generation/generation.md) includes methods such as `outlines.generate.regex`, `outlines.generate.json`, `outlines.generate.cfg`, etc. +- MLXLM only supports Apple Silicon. +- llama.cpp greedy sampling available via multinomial with `temperature = 0.0`. diff --git a/docs/reference/models/transformers_vision.md b/docs/reference/models/transformers_vision.md index b0ad6533b..6b80fb463 100644 --- a/docs/reference/models/transformers_vision.md +++ b/docs/reference/models/transformers_vision.md @@ -5,6 +5,7 @@ Outlines allows seamless use of [vision models](https://huggingface.co/learn/com `outlines.models.transformers_vision` has shares interfaces with, and is based on [`outlines.models.transformers`](./transformers.md). Tasks supported include + - image + text -> text - video + text -> text diff --git a/mkdocs.yml b/mkdocs.yml index 7de4a6350..117e99911 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -58,7 +58,11 @@ markdown_extensions: pygments_lang_class: true noclasses: True pygments_style: nord - - pymdownx.superfences + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format - pymdownx.tabbed: alternate_style: true - pymdownx.inlinehilite @@ -115,22 +119,24 @@ nav: - Docs: - reference/index.md - Generation: + - Generation Overview: reference/generation/generation.md - Text: reference/text.md - Samplers: reference/samplers.md - - Structured generation: - - Classification: reference/choices.md - - Regex: reference/regex.md - - Type constraints: reference/format.md - - JSON (function calling): reference/json.md - - JSON mode: reference/json_mode.md - - Grammar: reference/cfg.md - - Custom FSM operations: reference/custom_fsm_ops.md + - Structured generation: + - Classification: reference/generation/choices.md + - Regex: reference/generation/regex.md + - Type constraints: reference/generation/format.md + - JSON (function calling): reference/generation/json.md + - Grammar: reference/generation/cfg.md + - Custom FSM operations: reference/generation/custom_fsm_ops.md + - Structured Generation Technical Explanation: reference/generation/structured_generation_explanation.md - Utilities: - Serve with vLLM: reference/serve/vllm.md - Custom types: reference/types.md - Prompt templating: reference/prompting.md - Outlines functions: reference/functions.md - Models: + - Models Overview: reference/models/models.md - Open source: - Transformers: reference/models/transformers.md - Llama.cpp: reference/models/llamacpp.md @@ -138,7 +144,6 @@ nav: - TGI: reference/models/tgi.md - ExllamaV2: reference/models/exllamav2.md - MLX: reference/models/mlxlm.md - - Mamba: reference/models/mamba.md - API: - OpenAI: reference/models/openai.md - API Reference: diff --git a/requirements-doc.txt b/requirements-doc.txt index b7bd2efbb..cf8c674a7 100644 --- a/requirements-doc.txt +++ b/requirements-doc.txt @@ -1,6 +1,7 @@ mkdocs mkdocs-material mkdocs-material[imaging] +mkdocs-mermaid2-plugin mkdocs-section-index mkdocstrings[python] mkdocs-git-committers-plugin-2