Created
April 5, 2025 04:47
-
-
Save NikolayXHD/d67d03be5fe797ae6efc91690b33427f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import tiktoken | |
import requests | |
# Configuration | |
DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY") | |
MODEL = "deepseek-chat" | |
ENCODING = tiktoken.get_encoding( | |
"cl100k_base" | |
) # DeepSeek uses similar tokenization | |
# Test parameters | |
NEEDLES = { | |
"start": "START_MAGIC_7X9", | |
"middle": "MIDDLE_MAGIC_3R8", | |
"end": "END_MAGIC_Q5L", | |
} | |
TARGET_TOKENS = 120000 | |
FILLER_TEXT = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. " * 100 | |
def generate_haystack(): | |
"""Generate 128K token context with needles at start/middle/end""" | |
filler_tokens = ENCODING.encode(FILLER_TEXT) | |
needles_tokens = sum(len(ENCODING.encode(n)) for n in NEEDLES.values()) | |
filler_needed = TARGET_TOKENS - needles_tokens | |
# Split filler into three equal parts | |
filler_part = filler_tokens * (filler_needed // len(filler_tokens)) | |
filler_part += filler_tokens[: filler_needed % len(filler_tokens)] | |
split = len(filler_part) // 2 | |
# Construct context with needles | |
return ENCODING.decode( | |
ENCODING.encode(NEEDLES["start"]) | |
+ filler_part[:split] | |
+ ENCODING.encode(NEEDLES["middle"]) | |
+ filler_part[split:] | |
+ ENCODING.encode(NEEDLES["end"]) | |
) | |
# API Request | |
response = requests.post( | |
"https://api.deepseek.com/v1/chat/completions", | |
headers={"Authorization": f"Bearer {DEEPSEEK_API_KEY}"}, | |
json={ | |
"model": MODEL, | |
"messages": [ | |
{ | |
"role": "system", | |
"content": "Return ALL magic phrases from the context, comma-separated.", | |
}, | |
{"role": "user", "content": generate_haystack()}, | |
], | |
"temperature": 0, | |
}, | |
) | |
# Analysis | |
if response.status_code == 200: | |
result = response.json()["choices"][0]["message"]["content"] | |
found = [n in result for n in NEEDLES.values()] | |
print(f"API Response: {result}") | |
print(f"Context Test: {'PASS' if all(found) else 'FAIL'}") | |
print(f"Tokens Used: {response.json()['usage']['prompt_tokens']}") | |
else: | |
print(f"API Error: {response.status_code} - {response.text}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment