Skip to content

Instantly share code, notes, and snippets.

@BramVanroy
Created December 2, 2025 13:38
Show Gist options
  • Select an option

  • Save BramVanroy/90b5b787f6546b79720e35cc3ad928e9 to your computer and use it in GitHub Desktop.

Select an option

Save BramVanroy/90b5b787f6546b79720e35cc3ad928e9 to your computer and use it in GitHub Desktop.
Generate NER data with vLLM Structured Output (regex)
from vllm import LLM, SamplingParams
from vllm.sampling_params import StructuredOutputsParams
def generate_output(prompt: str, sampling_params: SamplingParams, llm: LLM) -> str:
outputs = llm.generate(prompt, sampling_params=sampling_params)
return outputs[0].outputs[0].text
def main():
ner_regex = r"^# [^#\n]+\n(?:\S+\t(?:O|B-LOC|I-LOC|B-ORG|I-ORG|B-PER|I-PER)\n)+"
sampling_params_ner = SamplingParams(
structured_outputs=StructuredOutputsParams(regex=ner_regex),
max_tokens=200
)
prompt_ner = """Generate an example of named entity recognition (NER) data in tab-seperated IOB2 format. Start with a comment line that begins with a '#' character describing the sentence. Each subsequent line should contain a word followed by its corresponding NER tag (O, B-LOC, I-LOC, B-ORG, I-ORG, B-PER, I-PER), separated by a tab character.
Ensure that the output strictly adheres to this format.
Example:
# Crude-oil prices rose Wednesday as strengthening Hurricane Rita, now a Category 5 storm, threatened to disrupt oil production in the Gulf of Mexico.
Crude O
- O
oil O
prices O
rose O
Wednesday O
as O
strengthening O
Hurricane O
Rita O
, O
now O
a O
Category O
5 O
storm O
, O
threatened O
to O
disrupt O
oil O
production O
in O
the O
Gulf B-LOC
of I-LOC
Mexico I-LOC
. O
"""
# Works better with a larger model, this is just an example.
llm = LLM(model="Qwen/Qwen3-4B-Instruct-2507", max_model_len=480)
output = generate_output(prompt_ner, sampling_params_ner, llm)
print(output)
# You should probably still verify that the input sentences (after #) has
# the same tokens as the provided tokens.
"""
Output I got:
# The United States Department of Defense announced a new policy to increase funding for military research in Boston and San Diego.
The O
United B-PER
States B-ORG
Department B-ORG
of O
Defense B-ORG
announced O
a O
new O
policy O
to O
increase O
funding O
for O
military O
research O
in O
Boston B-LOC
and O
San O
Diego B-LOC
. O
"""
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment