Skip to content

Instantly share code, notes, and snippets.

@alonsosilvaallende
Last active October 24, 2024 12:35
Show Gist options
  • Save alonsosilvaallende/93017fe8a86868a8d659e83b72a68995 to your computer and use it in GitHub Desktop.
Save alonsosilvaallende/93017fe8a86868a8d659e83b72a68995 to your computer and use it in GitHub Desktop.
Benchmark Outlines v0.1.1 vs v0.0.46
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "outlines-core==0.1.14",
# "outlines==0.1.1",
# "protobuf==5.28.3",
# "pydantic==2.9.2",
# "rich==13.9.3",
# "sentencepiece==0.2.0",
# "setuptools==75.2.0",
# "transformers==4.45.2",
# "uvtrick==0.3.0",
# ]
# ///
def main() -> None:
# Get a json schema from a Pydantic class
from enum import Enum
from pydantic import BaseModel
import json
class Name(str, Enum):
john = "John"
paul = "Paul"
class Age(int, Enum):
twenty = 20
thirty = 30
class Character(BaseModel):
name: Name
age: Age
json_schema = Character.model_json_schema()
# Use a tokenizer, for example `mistralai/Mistral-7B-Instruct-v0.3`
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
# Define bench to compare creation of the index
def bench(json_schema, tokenizer):
from time import time
from importlib import metadata
from rich import print
from outlines.fsm.json_schema import build_regex_from_schema
import interegular
from outlines.models.transformers import TransformerTokenizer
version = metadata.version("outlines")
if version=="0.1.1":
from outlines.fsm.json_schema import convert_json_schema_to_str
from outlines_core.fsm.regex import make_deterministic_fsm
from outlines_core.fsm.regex import create_fsm_index_tokenizer
else:
from outlines.integrations.utils import convert_json_schema_to_str
from outlines.fsm.regex import make_deterministic_fsm
from outlines.fsm.regex import create_fsm_index_tokenizer
tic = time()
schema_str = convert_json_schema_to_str(json_schema=json_schema)
regex_str = build_regex_from_schema(schema_str, whitespace_pattern=r"")
list_of_strings_fsm = interegular.parse_pattern(regex_str).to_fsm()
new_fsm, _ = make_deterministic_fsm(list_of_strings_fsm)
new_tokenizer = TransformerTokenizer(tokenizer)
# print(new_fsm)
index, _ = create_fsm_index_tokenizer(new_fsm, new_tokenizer)
toc = time()
print(f"[green]outlines=={version}, time: {toc-tic}[/green]")
return toc - tic
# Use uvtrick to compare the execution time
from uvtrick import Env
for i in range(3):
timed = Env("outlines==0.0.46", python="3.12").run(bench, json_schema, tokenizer)
for i in range(3):
timed = Env("outlines==0.1.1", python="3.12").run(bench, json_schema, tokenizer)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment