Skip to content

Instantly share code, notes, and snippets.

@sam-paech
Last active August 12, 2025 16:07
Show Gist options
  • Save sam-paech/2a269e47d1c47e3c0103e2edf5d74e39 to your computer and use it in GitHub Desktop.
Save sam-paech/2a269e47d1c47e3c0103e2edf5d74e39 to your computer and use it in GitHub Desktop.
squash the em-dash
#!/usr/bin/env python3
"""
Demonstrate banning the em-dash (and common variants) with logit_bias.
import os, sys, textwrap
import openai # pip install openai>=1.9.0
MODEL = "chatgpt-4o-latest"
PROMPT = ("Write a short sentence that would normally include an em dash "
"(for example between two clauses).")
# ──────────────────────────────────────────────────────────────
# Exact token-id → bias map (your list, comments preserved)
# ──────────────────────────────────────────────────────────────
LOGIT_BIAS_EM_DASH = {
1127: -100, # ' –'
1585: -100, # '–'
2322: -100, # '—'
2733: -100, # ' —'
8290: -100, # '——'
20962: -100, # '————'
35251: -100, # '—and'
36553: -100, # '―'
41648: -100, # '————————'
51692: -100, # '—the'
54067: -100, # '.—'
64860: -100, # ' –\n\n'
65363: -100, # '—a'
74605: -100, # '――'
85865: -100, # ' ―'
87643: -100, # '—\n\n'
90877: -100, # ' –\n'
94012: -100, # '—but'
94353: -100, # '––'
94828: -100, # '————————————————'
96754: -100, # '.”—'
108181: -100, # '—that'
109774: -100, # '–\n\n'
112305: -100, # '.–'
114635: -100, # '—it'
118256: -100, # '—in'
121630: -100, # '—or'
121655: -100, # '—to'
123101: -100, # '—\n'
126952: -100, # '—I'
127126: -100, # '”—'
134820: -100, # ' —\n'
137419: -100, # '—which'
140135: -100, # ' ——'
141391: -100, # ' –,'
142654: -100, # ' —\n\n'
144129: -100, # ')—'
144787: -100, # '—is'
147994: -100, # ',—'
151396: -100, # '–and'
155638: -100, # '—as'
160984: -100, # '—not'
169785: -100, # '—you'
170523: -100, # '–\n'
178328: -100, # '—from'
180500: -100, # '—including'
183122: -100, # '—for'
183862: -100, # '​—'
187349: -100, # '—they'
188860: -100, # '—all'
190702: -100, # '—with'
192749: -100, # '––––'
196615: -100, # '—we'
197618: -100, # '—even'
}
def main() -> None:
openai.api_key = "YOUR_KEY_HERE"
messages = [
{"role": "user", "content": PROMPT},
]
response = openai.chat.completions.create(
model = MODEL,
messages = messages,
max_tokens = 64,
temperature = 0.7,
logit_bias = {str(k): v for k, v in LOGIT_BIAS_EM_DASH.items()},
)
print("\n=== COMPLETION WITH EM-DASH BANNED ===\n")
print(textwrap.fill(response.choices[0].message.content, 80))
print("\n(If banning worked, there should be no em dashes.)\n")
if __name__ == "__main__":
main()
@sam-paech
Copy link
Author

A more comprehensive list including two medium-sized dashes you never knew existed: "–" and "―". Short dashes are allowed.

LOGIT_BIAS_EM_DASH = {
      1127: -100,  # ' –'
      1585: -100,  # '–'
      2322: -100,  # '—'
      2733: -100,  # ' —'
      8290: -100,  # '——'
     20962: -100,  # '————'
     35251: -100,  # '—and'
     36553: -100,  # '―'
     41648: -100,  # '————————'
     51692: -100,  # '—the'
     54067: -100,  # '.—'
     64860: -100,  # ' –\n\n'
     65363: -100,  # '—a'
     74605: -100,  # '――'
     85865: -100,  # ' ―'
     87643: -100,  # '—\n\n'
     90877: -100,  # ' –\n'
     94012: -100,  # '—but'
     94353: -100,  # '––'
     94828: -100,  # '————————————————'
     96754: -100,  # '.”—'
    108181: -100,  # '—that'
    109774: -100,  # '–\n\n'
    112305: -100,  # '.–'
    114635: -100,  # '—it'
    118256: -100,  # '—in'
    121630: -100,  # '—or'
    121655: -100,  # '—to'
    123101: -100,  # '—\n'
    126952: -100,  # '—I'
    127126: -100,  # '”—'
    134820: -100,  # ' —\n'
    137419: -100,  # '—which'
    140135: -100,  # ' ——'
    141391: -100,  # ' –,'
    142654: -100,  # ' —\n\n'
    144129: -100,  # ')—'
    144787: -100,  # '—is'
    147994: -100,  # ',—'
    151396: -100,  # '–and'
    155638: -100,  # '—as'
    160984: -100,  # '—not'
    169785: -100,  # '—you'
    170523: -100,  # '–\n'
    178328: -100,  # '—from'
    180500: -100,  # '—including'
    183122: -100,  # '—for'
    183862: -100,  # '​—'
    187349: -100,  # '—they'
    188860: -100,  # '—all'
    190702: -100,  # '—with'
    192749: -100,  # '––––'
    196615: -100,  # '—we'
    197618: -100,  # '—even'
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment