Created
June 3, 2025 10:30
-
-
Save qoli/238e14bbb1209c1668eb8cd6291cc127 to your computer and use it in GitHub Desktop.
調用 LLM 執行翻譯
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
import requests | |
from dotenv import load_dotenv | |
import shutil | |
import re | |
# 加載 .env 文件中的環境變量 | |
load_dotenv() | |
# 配置 | |
LLM_URL = os.getenv("LLM_URL", "http://192.168.6.237:1234/v1") | |
MODEL_NAME = os.getenv("MODEL_NAME", "qwen2.5-32b-instruct-mlx") | |
LOCALIZABLE_FILE = "Localization/Localizable.xcstrings" | |
# Regex:抓出含中文的字串 | |
chinese_string_regex = r'"([^"\n\r]*[\u4E00-\u9FFF]+[^"\n\r]*)"' | |
TRANSLATE_PROMPT = """ | |
⚠️ 請嚴格只輸出翻譯後的結果文字,不要加上任何說明或格式編號。請勿包含任何引言、額外說明或格式。 | |
### 翻譯 | |
你是一個專業的翻譯人員,請將提供的文本翻譯成 {target_language}。 | |
### 輸入 | |
需要翻譯的文本:{original_text} | |
### 輸出要求: | |
1. 保持原有的格式和標點符號。 | |
2. 確保翻譯的語言自然流暢,符合目標語言的語法和用詞習慣。 | |
3. 如果文本中包含代碼片段或特殊格式,請保留這些部分不變。 | |
4. 只需要輸出翻譯結果,不需要任何額外的說明或標題。 | |
5. 嚴格只輸出翻譯後的文本,不包含任何說明、指令、提示、數字編號或格式化項目。 | |
6. 輸出結果使用 <translated> 標籤包裹。 | |
7. 保證原始符號的位置不變。 | |
### 輸出格式範例(請只輸出下列格式) | |
<translated>翻譯後文字</translated> | |
""" | |
def extract_translated_text(xml_text: str) -> str: | |
print(f"🔍 正在提取翻譯內容:", flush=True) | |
print(f"\n\n=== {xml_text} ===\n\n", flush=True) | |
"""從 <translated> 標籤中提取翻譯內容""" | |
match = re.search(r"<translated>(.*?)</translated>", xml_text, re.DOTALL) | |
return match.group(1).strip() if match else "" | |
def get_llm_translation(original_text: str, target_language: str) -> str: | |
"""使用 LLM 翻譯文本""" | |
if not original_text: | |
return "" | |
prompt = f"{TRANSLATE_PROMPT.format(original_text=original_text, target_language=target_language)}" | |
request_data = { | |
"model": MODEL_NAME, | |
"prompt": prompt, | |
"stream": False, | |
"temperature": 0.1, | |
"top_p": 0.95, | |
"max_tokens": 150, | |
} | |
try: | |
# 將 prompt 寫入 prompt.txt | |
with open("Scripts/prompt.txt", "w", encoding="utf-8") as prompt_file: | |
prompt_file.write(prompt) | |
response = requests.post( | |
f"{LLM_URL}/completions", | |
headers={"Content-Type": "application/json"}, | |
json=request_data, | |
timeout=(10, 900), # 增加超時時間 | |
) | |
response.raise_for_status() | |
data = response.json() | |
if "choices" not in data or not data["choices"]: | |
print("⚠️ 回應缺少 choices 欄位,跳過", flush=True) | |
return "" | |
text = data["choices"][0].get("text", "").strip() | |
if not text: | |
print("⚠️ 回應中的 text 為空,跳過", flush=True) | |
return "" | |
# 提取 <translated> 標籤中的內容 | |
text = extract_translated_text(text) | |
if not text: | |
print("⚠️ 提取翻譯內容失敗,返回空字符串", flush=True) | |
return "" | |
return text | |
except requests.exceptions.RequestException as e: | |
print(f"請求 LLM 失敗:{e}", flush=True) | |
return "" # 請求失敗時返回空字符串 | |
def write_localizable_file(data: dict): | |
with open(LOCALIZABLE_FILE, "w", encoding="utf-8") as f: | |
json.dump(data, f, indent=2, ensure_ascii=False) | |
def main(): | |
try: | |
# 讀取 Localizable.xcstrings 文件 | |
with open(LOCALIZABLE_FILE, "r", encoding="utf-8") as f: | |
data = json.load(f) | |
# 遍歷 Localizable.xcstrings 文件中的每一個字串 | |
for key, value in data["strings"].items(): | |
# 檢查是否為需要翻譯的字串 (只處理完全是中文的內容) | |
if re.fullmatch(chinese_string_regex, f'"{key}"'): | |
if "localizations" in value: | |
# 檢查是否已經有簡體中文翻譯 | |
if "zh-Hans" not in value["localizations"]: | |
# 翻譯成簡體中文 | |
translated_text_zh_hans = get_llm_translation(key, "簡體中文") | |
value["localizations"]["zh-Hans"] = { | |
"stringUnit": { | |
"state": "translated", | |
"value": translated_text_zh_hans, | |
} | |
} | |
print( | |
f"已翻譯 (zh-Hans):{key} -> {translated_text_zh_hans}", | |
flush=True, | |
) | |
# 立即寫入檔案 | |
write_localizable_file(data) | |
# 檢查是否已經有英文翻譯 | |
if "en" not in value["localizations"]: | |
# 翻譯成英文 | |
translated_text_en = get_llm_translation(key, "英文") | |
value["localizations"]["en"] = { | |
"stringUnit": { | |
"state": "translated", | |
"value": translated_text_en, | |
} | |
} | |
print(f"已翻譯 (en):{key} -> {translated_text_en}", flush=True) | |
# 立即寫入檔案 | |
write_localizable_file(data) | |
else: | |
print(f"跳過:{key} (沒有 localizations)", flush=True) | |
else: | |
print(f"跳過:{key} (不是純中文)", flush=True) | |
except FileNotFoundError as e: | |
print(f"錯誤:找不到文件 {e}") | |
except Exception as e: | |
print(f"發生錯誤:{e}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment