Last active
August 12, 2025 16:07
-
-
Save sam-paech/2a269e47d1c47e3c0103e2edf5d74e39 to your computer and use it in GitHub Desktop.
squash the em-dash
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Demonstrate banning the em-dash (and common variants) with logit_bias. | |
import os, sys, textwrap | |
import openai # pip install openai>=1.9.0 | |
MODEL = "chatgpt-4o-latest" | |
PROMPT = ("Write a short sentence that would normally include an em dash " | |
"(for example between two clauses).") | |
# ────────────────────────────────────────────────────────────── | |
# Exact token-id → bias map (your list, comments preserved) | |
# ────────────────────────────────────────────────────────────── | |
LOGIT_BIAS_EM_DASH = { | |
1127: -100, # ' –' | |
1585: -100, # '–' | |
2322: -100, # '—' | |
2733: -100, # ' —' | |
8290: -100, # '——' | |
20962: -100, # '————' | |
35251: -100, # '—and' | |
36553: -100, # '―' | |
41648: -100, # '————————' | |
51692: -100, # '—the' | |
54067: -100, # '.—' | |
64860: -100, # ' –\n\n' | |
65363: -100, # '—a' | |
74605: -100, # '――' | |
85865: -100, # ' ―' | |
87643: -100, # '—\n\n' | |
90877: -100, # ' –\n' | |
94012: -100, # '—but' | |
94353: -100, # '––' | |
94828: -100, # '————————————————' | |
96754: -100, # '.”—' | |
108181: -100, # '—that' | |
109774: -100, # '–\n\n' | |
112305: -100, # '.–' | |
114635: -100, # '—it' | |
118256: -100, # '—in' | |
121630: -100, # '—or' | |
121655: -100, # '—to' | |
123101: -100, # '—\n' | |
126952: -100, # '—I' | |
127126: -100, # '”—' | |
134820: -100, # ' —\n' | |
137419: -100, # '—which' | |
140135: -100, # ' ——' | |
141391: -100, # ' –,' | |
142654: -100, # ' —\n\n' | |
144129: -100, # ')—' | |
144787: -100, # '—is' | |
147994: -100, # ',—' | |
151396: -100, # '–and' | |
155638: -100, # '—as' | |
160984: -100, # '—not' | |
169785: -100, # '—you' | |
170523: -100, # '–\n' | |
178328: -100, # '—from' | |
180500: -100, # '—including' | |
183122: -100, # '—for' | |
183862: -100, # '—' | |
187349: -100, # '—they' | |
188860: -100, # '—all' | |
190702: -100, # '—with' | |
192749: -100, # '––––' | |
196615: -100, # '—we' | |
197618: -100, # '—even' | |
} | |
def main() -> None: | |
openai.api_key = "YOUR_KEY_HERE" | |
messages = [ | |
{"role": "user", "content": PROMPT}, | |
] | |
response = openai.chat.completions.create( | |
model = MODEL, | |
messages = messages, | |
max_tokens = 64, | |
temperature = 0.7, | |
logit_bias = {str(k): v for k, v in LOGIT_BIAS_EM_DASH.items()}, | |
) | |
print("\n=== COMPLETION WITH EM-DASH BANNED ===\n") | |
print(textwrap.fill(response.choices[0].message.content, 80)) | |
print("\n(If banning worked, there should be no em dashes.)\n") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A more comprehensive list including two medium-sized dashes you never knew existed: "–" and "―". Short dashes are allowed.