Skip to content

Instantly share code, notes, and snippets.

@ManotLuijiu
Last active March 22, 2025 06:28
Show Gist options
  • Save ManotLuijiu/4f997f5b654766eb70f2e88b054a91fc to your computer and use it in GitHub Desktop.
Save ManotLuijiu/4f997f5b654766eb70f2e88b054a91fc to your computer and use it in GitHub Desktop.
Setup for Translation from English to Thai in th.po file
#!/bin/bash
# Setup script for ERPNext translation service using OpenAI API
# This script works with or without Thai Business Suite installed
set -e # Exit on any error
# Banner
echo "==============================================="
echo "ERPNext Translation Service Setup"
echo "==============================================="
# Configuration variables
ERPNEXT_ENV="$PWD/env" # Use existing ERPNext environment
CONFIG_FILE="$PWD/.erpnext_translate_config"
# Determine where to install the scripts
if [ -d "$PWD/apps/thai_business_suite" ]; then
# Thai Business Suite is installed
echo "Thai Business Suite detected. Installing in Thai Business Suite directory."
BASE_DIR="$PWD/apps/thai_business_suite"
SCRIPTS_DIR="$BASE_DIR/thai_business_suite/utils/translation"
else
# Thai Business Suite is not installed
echo "Thai Business Suite not detected. Installing in custom directory."
BASE_DIR="$PWD/apps/frappe"
SCRIPTS_DIR="$BASE_DIR/frappe/utils/translation"
fi
REQUIREMENTS_FILE="$SCRIPTS_DIR/requirements.txt"
TRANSLATOR_SCRIPT="$SCRIPTS_DIR/translate_po_files.py"
GLOSSARY_SCRIPT="$SCRIPTS_DIR/thai_glossary.py"
# Create directory structure
echo "Creating directory structure..."
mkdir -p "$SCRIPTS_DIR"
# Display message about checking prerequisites
echo "Checking if Python 3 is installed..."
if ! command -v python3 &> /dev/null; then
echo "❌ Python 3 is required but not installed. Please install Python 3 and try again."
exit 1
else
echo "✅ Python 3 is installed."
fi
# Create requirements file
echo "Creating requirements file..."
cat > "$REQUIREMENTS_FILE" << EOF
openai>=1.3.0
polib>=1.2.0
tqdm>=4.64.0
anthropic>=0.5.0
EOF
# Check if ERPNext environment exists
echo "Checking virtual environment..."
if [ ! -d "$ERPNEXT_ENV" ]; then
echo "ERPNext environment not found at $ERPNEXT_ENV."
echo "Please run this script from your ERPNext bench directory."
exit 1
fi
# Activate ERPNext environment
echo "Activating ERPNext environment..."
source "$ERPNEXT_ENV/bin/activate"
# Install dependencies
echo "Installing dependencies..."
pip install -r "$REQUIREMENTS_FILE"
# Create the Thai glossary file
echo "Creating Thai glossary file..."
cat > "$GLOSSARY_SCRIPT" << 'EOF'
# Thai Business/ERPNext terminology glossary
# This glossary provides standard translations for common terms used in ERPNext
GLOSSARY = {
# Basic business terms
"Invoice": "ใบแจ้งหนี้",
"Sales Invoice": "ใบแจ้งหนี้ขาย",
"Purchase Invoice": "ใบแจ้งหนี้ซื้อ",
"Quotation": "ใบเสนอราคา",
"Customer": "ลูกค้า",
"Supplier": "ผู้จัดจำหน่าย",
"Item": "สินค้า",
"Account": "บัญชี",
"Journal Entry": "บันทึกทางบัญชี",
"Payment": "การชำระเงิน",
"Purchase Order": "ใบสั่งซื้อ",
"Sales Order": "ใบสั่งขาย",
"Delivery Note": "ใบส่งสินค้า",
"Receipt": "ใบเสร็จรับเงิน",
# Common ERPNext modules
"Accounting": "การบัญชี",
"Human Resources": "ทรัพยากรบุคคล",
"Manufacturing": "การผลิต",
"Buying": "การซื้อ",
"Selling": "การขาย",
"Stock": "คลังสินค้า",
"Assets": "สินทรัพย์",
"Projects": "โครงการ",
"CRM": "การบริหารลูกค้าสัมพันธ์",
# Tax-related terms
"VAT": "ภาษีมูลค่าเพิ่ม",
"Tax": "ภาษี",
"Withholding Tax": "ภาษีหัก ณ ที่จ่าย",
"Tax Invoice": "ใบกำกับภาษี",
# Date/Time terms
"Date": "วันที่",
"Time": "เวลา",
"Year": "ปี",
"Month": "เดือน",
"Day": "วัน",
# Status terms
"Pending": "รอดำเนินการ",
"Completed": "เสร็จสิ้น",
"Cancelled": "ยกเลิก",
"Draft": "ฉบับร่าง",
"Submitted": "ส่งแล้ว",
"Paid": "ชำระแล้ว",
# Add more terms as needed
}
EOF
# Download the translator script
echo "Creating translator script..."
cat > "$TRANSLATOR_SCRIPT" << 'EOF'
#!/usr/bin/env python3
"""
AI-Powered PO File Translator for ERPNext
This script translates PO files that are generated from the 'bench update-po-files' command.
It uses the OpenAI API to translate content while preserving the PO file format.
Usage:
python translate_po_files.py [options] <po_file_path>
Options:
--target-lang=<language> Target language (default: th for Thai)
--api-key=<key> OpenAI API key (required)
--model=<model> OpenAI model to use (default: gpt-4)
--batch-size=<size> Number of entries to translate in a batch (default: 10)
--output=<path> Output file path (default: input file with .translated suffix)
--help Show this help message
"""
import os
import sys
import time
import argparse
import polib
from typing import List, Dict, Optional
from datetime import datetime
import openai
import anthropic
import json
import random
from tqdm import tqdm
# Import glossary if available
try:
# Try to import from thai_business_suite first
from thai_business_suite.utils.translation.thai_glossary import GLOSSARY
except ImportError:
try:
# Try to import from frappe
from frappe.utils.translation.thai_glossary import GLOSSARY
except ImportError:
# If not available, use an empty glossary
GLOSSARY = {}
# Constants
DEFAULT_TARGET_LANG = "th"
DEFAULT_MODEL = "gpt-4-1106-preview" # Using a model that supports JSON format
DEFAULT_BATCH_SIZE = 10
SLEEP_TIME = 0.5 # Seconds to sleep between API calls to avoid rate limiting
def backoff_wait(attempt):
wait_time = min((2 ** attempt) + random.uniform(0, 0.5), 60)
print(f"Rate limit reached. Retrying in {wait_time:.2f} seconds...")
time.sleep(wait_time)
def setup_argparse() -> argparse.Namespace:
"""Set up command line argument parsing."""
parser = argparse.ArgumentParser(
description="AI-Powered PO File Translator for ERPNext"
)
parser.add_argument("po_file_path", help="Path to the PO file to translate")
parser.add_argument(
"--target-lang",
default=DEFAULT_TARGET_LANG,
help=f"Target language (default: {DEFAULT_TARGET_LANG})",
)
parser.add_argument(
"--api-key", required=True, help="API key (OpenAI or Anthropic)"
)
parser.add_argument(
"--model-provider",
default="openai",
choices=["openai", "claude"],
help="AI model provider to use (default: openai)",
)
parser.add_argument(
"--model",
default=DEFAULT_MODEL,
help=f"Model to use (default: {DEFAULT_MODEL})",
)
parser.add_argument(
"--batch-size",
type=int,
default=DEFAULT_BATCH_SIZE,
help=f"Number of entries to translate in a batch (default: {DEFAULT_BATCH_SIZE})",
)
parser.add_argument(
"--output",
help="Output file path (default: input file with .translated suffix)",
)
parser.add_argument(
"--temperature",
type=float,
default=0.3,
help="Model temperature (default: 0.3)",
)
parser.add_argument(
"--max-tokens",
type=int,
default=512,
help="Max tokens per API call (default: 512)",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Show which entries will be translated without sending API requests",
)
return parser.parse_args()
def translate_batch_claude(entries: List[Dict], target_lang: str, api_key: str, model: str, temperature: float = 0.3, max_tokens: int = 512) -> List[str]:
"""Translate a batch of entries using Anthropic Claude API."""
client = anthropic.Anthropic(api_key=api_key)
# Format the batch for translation
messages_to_translate = [entry["msgid"] for entry in entries]
# Create prompt with instructions
GLOSSARY_TEXT = json.dumps(GLOSSARY, indent=2)
prompt = f"""
You are an expert translator specializing in technical and software localization.
Translate the following text from English to {target_lang}.
For Thai language translations, use these specific term translations:
{GLOSSARY_TEXT}
Ensure proper tone and formality appropriate for business software.
Preserve any formatting placeholders like {{{{ % s }}}}, {{{{ }}}}, or {{{0}}}.
For technical terms not in the glossary, you may keep them in English if that's conventional.
Please translate each of the following messages, one per line:
{"\n".join(messages_to_translate)}
Return ONLY the translations as plain text, one per line, without any additional text.
"""
# Make the API call
try:
response = client.messages.create(
model=model or "claude-3-haiku-20240307",
max_tokens=1000,
temperature=temperature,
messages=[
{"role": "user", "content": prompt}
]
)
# Extract the response text
response_text = response.content[0].text
# Split the response by new lines
translations = [line.strip() for line in response_text.strip().split("\n")]
# Make sure we have the correct number of translations
if len(translations) != len(messages_to_translate):
print(f"Warning: Expected {len(messages_to_translate)} translations but got {len(translations)}")
# Try to fix by padding with empty strings if needed
if len(translations) < len(messages_to_translate):
translations.extend([""] * (len(messages_to_translate) - len(translations)))
else:
translations = translations[:len(messages_to_translate)]
return translations
except Exception as e:
print(f"Error during translation: {e}")
return ["" for _ in entries] # Return empty strings on error
def translate_batch(
entries: List[Dict], target_lang: str, api_key: str, model: str, temperature: float = 0.3, max_tokens: int = 512
) -> List[str]:
"""Translate a batch of entries using OpenAI API."""
client = openai.OpenAI(api_key=api_key)
GLOSSARY_TEXT = json.dumps(GLOSSARY, indent=2)
# Format the batch for translation
messages_to_translate = [entry["msgid"] for entry in entries]
# Create system prompt with instructions
system_prompt = f"""
You are an expert translator specializing in technical and software localization.
Translate the following text from English to {target_lang}.
For Thai language translations, use these specific term translations:
{GLOSSARY_TEXT}
Ensure proper tone and formality appropriate for business software.
Preserve any formatting placeholders like {{{{ % s }}}}, {{{{ }}}}, or {{{0}}}.
For technical terms not in the glossary, you may keep them in English if that's conventional.
Return translations in a JSON array in the same order as the input.
"""
# Prepare messages for the API call
messages = [
{"role": "system", "content": system_prompt},
{
"role": "user",
"content": json.dumps(messages_to_translate)
},
]
# Check if model supports JSON response format
use_json_format = "1106" in model or "0125" in model or "-turbo-" in model
# Make the API call
print(f"Attempting to call OpenAI API with model: {model}")
attempt = 0
while attempt < 5:
try:
print("Making API call...")
if use_json_format:
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
response_format={"type": "json_object"},
timeout=30,
)
else:
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
timeout=30,
)
print("API call completed successfully")
# Extract the response text and handle it based on format
response_text = response.choices[0].message.content
if use_json_format:
try:
# Parse as JSON object
response_json = json.loads(response_text)
# Handle different response structures
if isinstance(response_json, list):
return response_json
elif isinstance(response_json, dict) and "translations" in response_json:
return response_json["translations"]
elif isinstance(response_json, dict):
# Extract all values and hope they're the translations
values = list(response_json.values())
if len(values) == len(messages_to_translate):
return values
else:
raise ValueError("JSON response does not match expected format")
except json.JSONDecodeError as e:
print(f"JSON parsing error: {e}")
# Fall back to line-by-line parsing
lines = response_text.strip().split("\n")
if len(lines) == len(messages_to_translate):
return lines
raise ValueError("Could not parse response as JSON or as lines")
else:
# For non-JSON models, split the text by lines
lines = response_text.strip().split("\n")
if len(lines) == len(messages_to_translate):
return [line.strip() for line in lines]
else:
# Try to find and extract translations marked with quotes or similar patterns
import re
translations = []
for line in lines:
match = re.search(r'"([^"]*)"', line)
if match:
translations.append(match.group(1))
if len(translations) == len(messages_to_translate):
return translations
else:
raise ValueError("Response format could not be parsed")
except openai.APIError as e:
print(f"OpenAI API error: {e}")
wait_time = min((2 ** attempt) + random.uniform(0, 0.5), 60)
print(f"Retrying in {wait_time:.2f} seconds...")
time.sleep(wait_time)
attempt += 1
except Exception as e:
print(f"Unexpected error: {e}")
attempt += 1
if attempt < 5:
wait_time = min((2 ** attempt) + random.uniform(0, 0.5), 60)
print(f"Retrying with different approach in {wait_time:.2f} seconds...")
time.sleep(wait_time)
# On subsequent attempts, try different approaches
if attempt == 2:
# Try with a simpler prompt
messages[0]["content"] = f"Translate these phrases to {target_lang}. Keep any placeholders intact."
elif attempt == 3:
# Try asking for numbered responses
messages[0]["content"] = f"Translate each numbered item to {target_lang}. Return only the translations with their numbers."
numbered_inputs = []
for i, msg in enumerate(messages_to_translate):
numbered_inputs.append(f"{i+1}. {msg}")
messages[1]["content"] = "\n".join(numbered_inputs)
else:
break
else:
break
# If all attempts failed, return empty translations
return [""] * len(entries)
def translate_po_file(
po_file_path: str,
target_lang: str,
api_key: str,
model_provider: str,
model: str,
batch_size: int,
dry_run: bool = False,
temperature: float = 0.3,
max_tokens: int = 512,
output_path: Optional[str] = None,
) -> str:
"""
Translate PO file content while preserving format.
"""
# Choose the appropriate translation function
translate_function = translate_batch_claude if model_provider == "claude" else translate_batch
if not output_path:
base, ext = os.path.splitext(po_file_path)
output_path = f"{base}.translated{ext}"
# Parse the PO file
po = polib.pofile(po_file_path)
# Prepare entries that need translation
entries_to_translate = []
for entry in po:
if not entry.msgstr and entry.msgid and entry.msgid != "":
entries_to_translate.append({"entry": entry, "msgid": entry.msgid})
print(f"Found {len(entries_to_translate)} entries to translate")
if dry_run:
print("Dry run mode: Entries to be translated:")
for entry in entries_to_translate:
print(entry["msgid"])
return "Dry run completed"
# Process in batches
total_batches = (len(entries_to_translate) + batch_size - 1) // batch_size
for batch_num in tqdm(range(total_batches), desc="Translating batches..."):
start_idx = batch_num * batch_size
end_idx = min(start_idx + batch_size, len(entries_to_translate))
current_batch = entries_to_translate[start_idx:end_idx]
print(
f"Translating batch {batch_num + 1}/{total_batches} ({len(current_batch)} entries)"
)
# Translate the batch
translations = translate_function(
current_batch,
target_lang,
api_key,
model,
temperature=temperature,
max_tokens=max_tokens
)
# Apply translations to the entries
for i, translation in enumerate(translations):
if i < len(current_batch):
current_batch[i]["entry"].msgstr = translation
# Save after each batch (checkpoint)
po.save(output_path)
print(f"Saved progress to {output_path}")
# Sleep to avoid rate limiting
if batch_num < total_batches - 1:
time.sleep(SLEEP_TIME)
# Update metadata
po.metadata["PO-Revision-Date"] = datetime.now().strftime("%Y-%m-%d %H:%M%z")
po.metadata["Language"] = target_lang
# Save final version
po.save(output_path)
print(f"Translation completed! Output saved to: {output_path}")
print(f"Translated {len(entries_to_translate)} entries to {target_lang}")
return output_path
def main():
"""Main function to run the translator."""
args = setup_argparse()
# Validate inputs
if not os.path.exists(args.po_file_path):
print(f"Error: PO file not found: {args.po_file_path}")
sys.exit(1)
print(f"Starting translation of {args.po_file_path} to {args.target_lang}")
# Run the translation
output_path = translate_po_file(
args.po_file_path,
args.target_lang,
args.api_key,
args.model_provider,
args.model,
args.batch_size,
dry_run=args.dry_run,
temperature=args.temperature,
max_tokens=args.max_tokens,
output_path=args.output,
)
print(f"Translation complete! Output saved to: {output_path}")
if __name__ == "__main__":
main()
EOF
# Make the script executable
chmod +x "$TRANSLATOR_SCRIPT"
# Create a wrapper script in the bench bin directory
WRAPPER_SCRIPT="$PWD/bin/translate-po"
mkdir -p "$PWD/bin"
cat > "$WRAPPER_SCRIPT" << EOF
#!/bin/bash
# Wrapper script for translate_po_files.py
# Activate the ERPNext environment
source "$ERPNEXT_ENV/bin/activate"
# Load configuration if it exists
if [ -f "$CONFIG_FILE" ]; then
source "$CONFIG_FILE"
fi
# Run the translator with the provided arguments
python "$TRANSLATOR_SCRIPT" \${OPENAI_API_KEY:+--api-key=\$OPENAI_API_KEY} \${MODEL_PROVIDER:+--model-provider=\$MODEL_PROVIDER} "\$@"
EOF
chmod +x "$WRAPPER_SCRIPT"
# Ask the user for OpenAI API key
echo
echo "Would you like to save your OpenAI API key for future use? [Y]/n"
read -r save_key
if [[ -z "$save_key" || "$save_key" =~ ^[Yy]$ ]]; then
echo "Enter your OpenAI API key:"
read -r api_key
# Ask the user for which model provider to use as default
echo "Which AI model provider would you like to use by default? [openai]/claude"
read -r model_provider
if [[ -z "$model_provider" || "$model_provider" =~ ^[Oo]$ ]]; then
model_provider="openai"
else
model_provider="claude"
fi
# Save the API key and model provider to the config file
echo "OPENAI_API_KEY=\"$api_key\"" > "$CONFIG_FILE"
echo "MODEL_PROVIDER=\"$model_provider\"" >> "$CONFIG_FILE"
chmod 600 "$CONFIG_FILE" # Restrict access to the config file
echo "API key and model provider preferences saved to $CONFIG_FILE"
else
echo "You can set your API key later by creating a file at $CONFIG_FILE with OPENAI_API_KEY=\"your_key_here\""
fi
# Update usage instructions
echo
echo "Installation complete!"
echo
echo "Usage:"
echo " ./bin/translate-po [options] <po_file_path>"
echo
echo "Examples:"
echo " ./bin/translate-po --target-lang=th apps/frappe/frappe/locale/th.po"
echo " ./bin/translate-po --model-provider=claude apps/erpnext/erpnext/locale/th.po"
echo " ./bin/translate-po --model=gpt-4-1106-preview --batch-size=20 apps/frappe/frappe/locale/th.po"
echo
echo "For more options:"
echo " ./bin/translate-po --help"
#!/bin/bash
# Setup script for Thai Business Suite translation service
# This script sets up the necessary environment for translating PO files using OpenAI API
set -e # Exit on any error
# Banner
echo "==============================================="
echo "Thai Business Suite Translation Service Setup"
echo "==============================================="
# Configuration variables
ERPNEXT_ENV="$PWD/env" # Use existing ERPNext environment
SCRIPTS_DIR="$PWD/apps/thai_business_suite/thai_business_suite/utils/translation"
REQUIREMENTS_FILE="$SCRIPTS_DIR/requirements.txt"
TRANSLATOR_SCRIPT="$SCRIPTS_DIR/translate_po_files.py"
CONFIG_FILE="$PWD/.thai_business_suite_config"
# Create directory structure
echo "Creating directory structure..."
mkdir -p "$SCRIPTS_DIR"
# Display message about checking prerequisites
echo "Checking if Python 3 is installed..."
if ! command -v python3 &> /dev/null; then
echo "❌ Python 3 is required but not installed. Please install Python 3 and try again."
exit 1
else
echo "✅ Python 3 is installed."
fi
echo "Checking if pip3 is installed..."
if ! command -v pip3 &> /dev/null; then
echo "❌ pip3 is required but not installed. Please install pip3 and try again."
exit 1
else
echo "✅ pip3 is installed."
fi
# Create requirements file
echo "Creating requirements file..."
cat > "$REQUIREMENTS_FILE" << EOF
openai>=1.3.0
polib>=1.2.0
EOF
# Check if ERPNext environment exists
echo "Checking virtual environment..."
if [ ! -d "$ERPNEXT_ENV" ]; then
echo "ERPNext environment not found at $ERPNEXT_ENV."
echo "Please run this script from your ERPNext bench directory."
exit 1
fi
# Activate ERPNext environment
echo "Activating ERPNext environment..."
source "$ERPNEXT_ENV/bin/activate"
# Install dependencies
echo "Installing dependencies..."
pip install -r "$REQUIREMENTS_FILE"
# Download the translator script
echo "Downloading translator script..."
cat > "$TRANSLATOR_SCRIPT" << 'EOF'
#!/usr/bin/env python3
"""
AI-Powered PO File Translator for ERPNext
This script translates PO files that are generated from the 'bench update-po-files' command.
It uses the OpenAI API to translate content while preserving the PO file format.
Usage:
python translate_po_files.py [options] <po_file_path>
Options:
--target-lang=<language> Target language (default: th for Thai)
--api-key=<key> OpenAI API key (required)
--model=<model> OpenAI model to use (default: gpt-4)
--batch-size=<size> Number of entries to translate in a batch (default: 10)
--output=<path> Output file path (default: input file with .translated suffix)
--help Show this help message
"""
import os
import re
import sys
import time
import argparse
import polib
from typing import List, Dict, Optional
from datetime import datetime
import openai
import json
# Constants
DEFAULT_TARGET_LANG = "th"
DEFAULT_MODEL = "gpt-4"
DEFAULT_BATCH_SIZE = 10
SLEEP_TIME = 0.5 # Seconds to sleep between API calls to avoid rate limiting
def setup_argparse() -> argparse.Namespace:
"""Set up command line argument parsing."""
parser = argparse.ArgumentParser(description="AI-Powered PO File Translator for ERPNext")
parser.add_argument("po_file_path", help="Path to the PO file to translate")
parser.add_argument("--target-lang", default=DEFAULT_TARGET_LANG,
help=f"Target language (default: {DEFAULT_TARGET_LANG})")
parser.add_argument("--api-key", required=True, help="OpenAI API key")
parser.add_argument("--model", default=DEFAULT_MODEL,
help=f"OpenAI model to use (default: {DEFAULT_MODEL})")
parser.add_argument("--batch-size", type=int, default=DEFAULT_BATCH_SIZE,
help=f"Number of entries to translate in a batch (default: {DEFAULT_BATCH_SIZE})")
parser.add_argument("--output", help="Output file path (default: input file with .translated suffix)")
return parser.parse_args()
def translate_batch(entries: List[Dict], target_lang: str, api_key: str, model: str) -> List[str]:
"""Translate a batch of entries using OpenAI API."""
openai.api_key = api_key
# Format the batch for translation
messages_to_translate = [entry["msgid"] for entry in entries]
# Create system prompt with instructions
system_prompt = f"""
You are an expert translator specializing in technical and software localization.
Translate the following text from English to {target_lang}.
For Thai language translations, ensure proper tone and formality appropriate for business software.
Preserve any formatting placeholders like {{% s }}, {{ }}, or {0}.
For technical terms, you may keep them in English if that's conventional.
Return ONLY the translations, nothing else.
"""
# Prepare messages for the API call
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": "Please translate each of the following messages. Return only a JSON array of translations in the same order:\n" + json.dumps(messages_to_translate)}
]
# Make the API call
try:
response = openai.chat.completions.create(
model=model,
messages=messages,
temperature=0.3, # Lower temperature for more consistent translations
response_format={"type": "json_object"}
)
# Parse the JSON response
response_text = response.choices[0].message.content
response_json = json.loads(response_text)
# Verify the response structure
if "translations" in response_json and isinstance(response_json["translations"], list):
return response_json["translations"]
else:
# Try to extract translations from whatever structure was returned
if isinstance(response_json, list):
return response_json
else:
print(f"Warning: Unexpected response format: {response_json}")
# Make a best-effort attempt to extract values
return list(response_json.values()) if isinstance(response_json, dict) else []
except Exception as e:
print(f"Error during translation: {e}")
return ["" for _ in entries] # Return empty strings on error
def translate_po_file(
po_file_path: str,
target_lang: str,
api_key: str,
model: str,
batch_size: int,
output_path: Optional[str] = None
) -> str:
"""
Translate PO file content while preserving format.
Args:
po_file_path: Path to the input PO file
target_lang: Target language code (e.g., 'th' for Thai)
api_key: OpenAI API key
model: OpenAI model to use
batch_size: Number of entries to translate in a batch
output_path: Path for the output file (optional)
Returns:
Path to the translated file
"""
if not output_path:
base, ext = os.path.splitext(po_file_path)
output_path = f"{base}.translated{ext}"
# Parse the PO file
po = polib.pofile(po_file_path)
# Prepare entries that need translation
entries_to_translate = []
for entry in po:
if not entry.msgstr and entry.msgid and entry.msgid != "":
entries_to_translate.append({
"entry": entry,
"msgid": entry.msgid
})
print(f"Found {len(entries_to_translate)} entries to translate")
# Process in batches
total_batches = (len(entries_to_translate) + batch_size - 1) // batch_size
for batch_num in range(total_batches):
start_idx = batch_num * batch_size
end_idx = min(start_idx + batch_size, len(entries_to_translate))
current_batch = entries_to_translate[start_idx:end_idx]
print(f"Translating batch {batch_num + 1}/{total_batches} ({len(current_batch)} entries)")
# Translate the batch
translations = translate_batch(current_batch, target_lang, api_key, model)
# Apply translations to the entries
for i, translation in enumerate(translations):
if i < len(current_batch):
current_batch[i]["entry"].msgstr = translation
# Save after each batch (checkpoint)
po.save(output_path)
print(f"Saved progress to {output_path}")
# Sleep to avoid rate limiting
if batch_num < total_batches - 1:
time.sleep(SLEEP_TIME)
# Update metadata
po.metadata['PO-Revision-Date'] = datetime.now().strftime('%Y-%m-%d %H:%M%z')
po.metadata['Language'] = target_lang
# Save final version
po.save(output_path)
print(f"Translation completed! Output saved to: {output_path}")
print(f"Translated {len(entries_to_translate)} entries to {target_lang}")
return output_path
def main():
"""Main function to run the translator."""
args = setup_argparse()
# Validate inputs
if not os.path.exists(args.po_file_path):
print(f"Error: PO file not found: {args.po_file_path}")
sys.exit(1)
print(f"Starting translation of {args.po_file_path} to {args.target_lang}")
# Run the translation
output_path = translate_po_file(
args.po_file_path,
args.target_lang,
args.api_key,
args.model,
args.batch_size,
args.output
)
print(f"Translation complete! Output saved to: {output_path}")
if __name__ == "__main__":
main()
EOF
# Make the script executable
chmod +x "$TRANSLATOR_SCRIPT"
# Create a wrapper script in the bench bin directory
WRAPPER_SCRIPT="$PWD/bin/translate-po"
mkdir -p "$PWD/bin"
cat > "$WRAPPER_SCRIPT" << EOF
#!/bin/bash
# Wrapper script for translate_po_files.py
# Activate the ERPNext environment
source "$ERPNEXT_ENV/bin/activate"
# Load configuration if it exists
if [ -f "$CONFIG_FILE" ]; then
source "$CONFIG_FILE"
fi
# Run the translator with the provided arguments
python "$TRANSLATOR_SCRIPT" \${OPENAI_API_KEY:+--api-key=\$OPENAI_API_KEY} "\$@"
EOF
chmod +x "$WRAPPER_SCRIPT"
# Ask the user for OpenAI API key
echo
echo "Would you like to save your OpenAI API key for future use? [Y]/n"
read -r save_key
if [[ -z "$save_key" || "$save_key" =~ ^[Yy]$ ]]; then
echo "Enter your OpenAI API key:"
read -r api_key
# Save the API key to the config file
echo "OPENAI_API_KEY=\"$api_key\"" > "$CONFIG_FILE"
chmod 600 "$CONFIG_FILE" # Restrict access to the config file
echo "API key saved to $CONFIG_FILE"
else
echo "You can set your API key later by creating a file at $CONFIG_FILE with OPENAI_API_KEY=\"your_key_here\""
fi
# Update usage instructions
echo
echo "Installation complete!"
echo
echo "Usage:"
echo " ./bin/translate-po [options] <po_file_path>"
echo
echo "Examples:"
echo " ./bin/translate-po --target-lang=th apps/thai_business_suite/thai_business_suite/locale/th.po"
echo " ./bin/translate-po --target-lang=th apps/erpnext/erpnext/locale/th.po"
echo " ./bin/translate-po --target-lang=th apps/frappe/frappe/locale/th.po"
echo
echo "For more options:"
echo " ./bin/translate-po --help"
#!/bin/bash
# Setup script for Thai Business Suite translation service
# This script sets up the necessary environment for translating PO files using OpenAI API
set -e # Exit on any error
# Banner
echo "==============================================="
echo "Thai Business Suite Translation Service Setup"
echo "==============================================="
# Configuration variables
ERPNEXT_ENV="$PWD/env" # Use existing ERPNext environment
SCRIPTS_DIR="$PWD/apps/thai_business_suite/thai_business_suite/utils/translation"
REQUIREMENTS_FILE="$SCRIPTS_DIR/requirements.txt"
TRANSLATOR_SCRIPT="$SCRIPTS_DIR/translate_po_files.py"
GLOSSARY_SCRIPT="$SCRIPTS_DIR/thai_glossary.py"
CONFIG_FILE="$PWD/.thai_business_suite_config"
# Create directory structure
echo "Creating directory structure..."
mkdir -p "$SCRIPTS_DIR"
# Display message about checking prerequisites
echo "Checking if Python 3 is installed..."
if ! command -v python3 &> /dev/null; then
echo "❌ Python 3 is required but not installed. Please install Python 3 and try again."
exit 1
else
echo "✅ Python 3 is installed."
fi
echo "Checking if pip3 is installed..."
if ! command -v pip3 &> /dev/null; then
echo "❌ pip3 is required but not installed. Please install pip3 and try again."
exit 1
else
echo "✅ pip3 is installed."
fi
# Create requirements file
echo "Creating requirements file..."
cat > "$REQUIREMENTS_FILE" << EOF
openai>=1.3.0
polib>=1.2.0
tqdm>=4.64.0
anthropic>=0.5.0
EOF
# Check if ERPNext environment exists
echo "Checking virtual environment..."
if [ ! -d "$ERPNEXT_ENV" ]; then
echo "ERPNext environment not found at $ERPNEXT_ENV."
echo "Please run this script from your ERPNext bench directory."
exit 1
fi
# Activate ERPNext environment
echo "Activating ERPNext environment..."
source "$ERPNEXT_ENV/bin/activate"
# Install dependencies
echo "Installing dependencies..."
pip install -r "$REQUIREMENTS_FILE"
# Create the Thai glossary file
echo "Creating Thai glossary file..."
cat > "$GLOSSARY_SCRIPT" << 'EOF'
# Thai Business/ERPNext terminology glossary
# This glossary provides standard translations for common terms used in ERPNext
GLOSSARY = {
# Basic business terms
"Invoice": "ใบแจ้งหนี้",
"Sales Invoice": "ใบแจ้งหนี้ขาย",
"Purchase Invoice": "ใบแจ้งหนี้ซื้อ",
"Quotation": "ใบเสนอราคา",
"Customer": "ลูกค้า",
"Supplier": "ผู้จัดจำหน่าย",
"Item": "สินค้า",
"Account": "บัญชี",
"Journal Entry": "บันทึกทางบัญชี",
"Payment": "การชำระเงิน",
"Purchase Order": "ใบสั่งซื้อ",
"Sales Order": "ใบสั่งขาย",
"Delivery Note": "ใบส่งสินค้า",
"Receipt": "ใบเสร็จรับเงิน",
# Common ERPNext modules
"Accounting": "การบัญชี",
"Human Resources": "ทรัพยากรบุคคล",
"Manufacturing": "การผลิต",
"Buying": "การซื้อ",
"Selling": "การขาย",
"Stock": "คลังสินค้า",
"Assets": "สินทรัพย์",
"Projects": "โครงการ",
"CRM": "การบริหารลูกค้าสัมพันธ์",
# Tax-related terms
"VAT": "ภาษีมูลค่าเพิ่ม",
"Tax": "ภาษี",
"Withholding Tax": "ภาษีหัก ณ ที่จ่าย",
"Tax Invoice": "ใบกำกับภาษี",
# Date/Time terms
"Date": "วันที่",
"Time": "เวลา",
"Year": "ปี",
"Month": "เดือน",
"Day": "วัน",
# Status terms
"Pending": "รอดำเนินการ",
"Completed": "เสร็จสิ้น",
"Cancelled": "ยกเลิก",
"Draft": "ฉบับร่าง",
"Submitted": "ส่งแล้ว",
"Paid": "ชำระแล้ว",
# Add more terms as needed
}
EOF
# Download the translator script
echo "Downloading translator script..."
cat > "$TRANSLATOR_SCRIPT" << 'EOF'
#!/usr/bin/env python3
"""
AI-Powered PO File Translator for ERPNext
This script translates PO files that are generated from the 'bench update-po-files' command.
It uses the OpenAI API to translate content while preserving the PO file format.
Usage:
python translate_po_files.py [options] <po_file_path>
Options:
--target-lang=<language> Target language (default: th for Thai)
--api-key=<key> OpenAI API key (required)
--model=<model> OpenAI model to use (default: gpt-4)
--batch-size=<size> Number of entries to translate in a batch (default: 10)
--output=<path> Output file path (default: input file with .translated suffix)
--help Show this help message
"""
from thai_business_suite.utils.translation.thai_glossary import GLOSSARY
import os
import re
import sys
import time
import argparse
import polib
from typing import List, Dict, Optional
from datetime import datetime
import openai
import anthropic
import json
import random
from tqdm import tqdm
# Constants
DEFAULT_TARGET_LANG = "th"
DEFAULT_MODEL = "gpt-4"
DEFAULT_BATCH_SIZE = 10
SLEEP_TIME = 0.5 # Seconds to sleep between API calls to avoid rate limiting
def backoff_wait(attempt):
wait_time = min((2 ** attempt) + random.uniform(0, 0.5), 60)
print(f"Rate limit reached. Retrying in {wait_time:.2f} seconds...")
time.sleep(wait_time)
def setup_argparse() -> argparse.Namespace:
"""Set up command line argument parsing."""
parser = argparse.ArgumentParser(
description="AI-Powered PO File Translator for ERPNext"
)
parser.add_argument("po_file_path", help="Path to the PO file to translate")
parser.add_argument(
"--target-lang",
default=DEFAULT_TARGET_LANG,
help=f"Target language (default: {DEFAULT_TARGET_LANG})",
)
parser.add_argument(
"--api-key", required=True, help="API key (OpenAI or Anthropic)"
)
parser.add_argument(
"--model-provider",
default="openai",
choices=["openai", "claude"],
help="AI model provider to use (default: openai)",
)
parser.add_argument(
"--model",
default=DEFAULT_MODEL,
help=f"OpenAI model to use (default: {DEFAULT_MODEL})",
)
parser.add_argument(
"--batch-size",
type=int,
default=DEFAULT_BATCH_SIZE,
help=f"Number of entries to translate in a batch (default: {DEFAULT_BATCH_SIZE})",
)
parser.add_argument(
"--output",
help="Output file path (default: input file with .translated suffix)",
)
parser.add_argument(
"--temperature",
type=float,
default=0.3,
help="Model temperature (default: 0.3)",
)
parser.add_argument(
"--max-tokens",
type=int,
default=512,
help="Max tokens per API call (default: 512)",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Show which entries will be translated without sending API requests",
)
return parser.parse_args()
def translate_batch_claude(entries: List[Dict], target_lang: str, api_key: str, model: str) -> List[str]:
"""Translate a batch of entries using Anthropic Claude API."""
client = anthropic.Anthropic(api_key=api_key)
# Format the batch for translation
messages_to_translate = [entry["msgid"] for entry in entries]
# Create prompt with instructions
GLOSSARY_TEXT = json.dumps(GLOSSARY, indent=2)
prompt = f"""
You are an expert translator specializing in technical and software localization.
Translate the following text from English to {target_lang}.
For Thai language translations, use these specific term translations:
{GLOSSARY_TEXT}
Ensure proper tone and formality appropriate for business software.
Preserve any formatting placeholders like {{% s }}, {{ }}, or {0}.
For technical terms not in the glossary, you may keep them in English if that's conventional.
Please translate each of the following messages, one per line:
{"\n".join(messages_to_translate)}
Return ONLY the translations as plain text, one per line, without any additional text.
"""
# Make the API call
try:
response = client.messages.create(
model=model or "claude-3-haiku-20240307",
max_tokens=1000,
temperature=0.3,
messages=[
{"role": "user", "content": prompt}
]
)
# Extract the response text
response_text = response.content[0].text
# Split the response by new lines
translations = [line.strip() for line in response_text.strip().split("\n")]
# Make sure we have the correct number of translations
if len(translations) != len(messages_to_translate):
print(f"Warning: Expected {len(messages_to_translate)} translations but got {len(translations)}")
# Try to fix by padding with empty strings if needed
if len(translations) < len(messages_to_translate):
translations.extend([""] * (len(messages_to_translate) - len(translations)))
else:
translations = translations[:len(messages_to_translate)]
return translations
except Exception as e:
print(f"Error during translation: {e}")
return ["" for _ in entries] # Return empty strings on error
def translate_batch(
entries: List[Dict], target_lang: str, api_key: str, model: str, temperature: float = 0.3, max_tokens: int = 512
) -> List[str]:
"""Translate a batch of entries using OpenAI API."""
client = openai.OpenAI(api_key=api_key)
GLOSSARY_TEXT = json.dumps(GLOSSARY, indent=2)
# Format the batch for translation
messages_to_translate = [entry["msgid"] for entry in entries]
# Create system prompt with instructions
system_prompt = f"""
You are an expert translator specializing in technical and software localization.
Translate the following text from English to {target_lang}.
For Thai language translations, use these specific term translations:
{GLOSSARY_TEXT}
Ensure proper tone and formality appropriate for business software.
Preserve any formatting placeholders like {{% s }}, {{ }}, or {0}.
For technical terms not in the glossary, you may keep them in English if that's conventional.
Return a JSON array of translations in the same order.
"""
# Prepare messages for the API call
messages = [
{"role": "system", "content": system_prompt},
{
"role": "user",
"content": json.dumps(messages_to_translate)
},
]
# Make the API call
print(f"Attempting to call OpenAI API with model: {model}")
attempt = 0
while attempt < 5:
try:
print("Making API call...")
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=temperature, # Lower temperature for more consistent translations
max_tokens=max_tokens,
response_format={"type": "json_object"},
timeout=30,
)
print("API call completed successfully")
# Parse the JSON response
response_text = response.choices[0].message.content
translations = json.loads(response_text)
if not isinstance(translations, list) or len(translations) != len(messages_to_translate):
raise ValueError("Unexpected response format from OpenAI API")
return translations
except openai.APIError as e:
print(f"OpenAI API error: {e}")
wait_time = min((2 ** attempt) + random.uniform(0, 0.5), 60)
print(f"Retrying in {wait_time:.2f} seconds...")
time.sleep(wait_time)
attempt += 1
except Exception as e:
print(f"Unexpected error: {e}")
break
return [""] * len(entries) # Return empty translations on failure
def translate_po_file(
po_file_path: str,
target_lang: str,
api_key: str,
model_provider: str,
model: str,
batch_size: int,
dry_run: bool = False,
temperature: float = 0.3,
max_tokens: int = 512,
output_path: Optional[str] = None,
) -> str:
"""
Translate PO file content while preserving format.
"""
# Choose the appropriate translation function
translate_function = translate_batch_claude if model_provider == "claude" else translate_batch
if not output_path:
base, ext = os.path.splitext(po_file_path)
output_path = f"{base}.translated{ext}"
# Parse the PO file
po = polib.pofile(po_file_path)
# Prepare entries that need translation
entries_to_translate = []
for entry in po:
if not entry.msgstr and entry.msgid and entry.msgid != "":
entries_to_translate.append({"entry": entry, "msgid": entry.msgid})
print(f"Found {len(entries_to_translate)} entries to translate")
if dry_run:
print("Dry run mode: Entries to be translated:")
for entry in entries_to_translate:
print(entry["msgid"])
return "Dry run completed"
# Process in batches
total_batches = (len(entries_to_translate) + batch_size - 1) // batch_size
for batch_num in tqdm(range(total_batches), desc="Translating batches..."):
start_idx = batch_num * batch_size
end_idx = min(start_idx + batch_size, len(entries_to_translate))
current_batch = entries_to_translate[start_idx:end_idx]
print(
f"Translating batch {batch_num + 1}/{total_batches} ({len(current_batch)} entries)"
)
# Translate the batch
translations = translate_function(
current_batch,
target_lang,
api_key,
model,
temperature=temperature,
max_tokens=max_tokens
)
# Apply translations to the entries
for i, translation in enumerate(translations):
if i < len(current_batch):
current_batch[i]["entry"].msgstr = translation
# Save after each batch (checkpoint)
po.save(output_path)
print(f"Saved progress to {output_path}")
# Sleep to avoid rate limiting
if batch_num < total_batches - 1:
time.sleep(SLEEP_TIME)
# Update metadata
po.metadata["PO-Revision-Date"] = datetime.now().strftime("%Y-%m-%d %H:%M%z")
po.metadata["Language"] = target_lang
# Save final version
po.save(output_path)
print(f"Translation completed! Output saved to: {output_path}")
print(f"Translated {len(entries_to_translate)} entries to {target_lang}")
return output_path
def main():
"""Main function to run the translator."""
args = setup_argparse()
# Validate inputs
if not os.path.exists(args.po_file_path):
print(f"Error: PO file not found: {args.po_file_path}")
sys.exit(1)
print(f"Starting translation of {args.po_file_path} to {args.target_lang}")
# Run the translation
output_path = translate_po_file(
args.po_file_path,
args.target_lang,
args.api_key,
args.model_provider,
args.model,
args.batch_size,
dry_run=args.dry_run,
temperature=args.temperature,
max_tokens=args.max_tokens,
output_path=args.output,
)
print(f"Translation complete! Output saved to: {output_path}")
if __name__ == "__main__":
main()
EOF
# Make the script executable
chmod +x "$TRANSLATOR_SCRIPT"
# Create a wrapper script in the bench bin directory
WRAPPER_SCRIPT="$PWD/bin/translate-po"
mkdir -p "$PWD/bin"
cat > "$WRAPPER_SCRIPT" << EOF
#!/bin/bash
# Wrapper script for translate_po_files.py
# Activate the ERPNext environment
source "$ERPNEXT_ENV/bin/activate"
# Load configuration if it exists
if [ -f "$CONFIG_FILE" ]; then
source "$CONFIG_FILE"
fi
# Run the translator with the provided arguments
python "$TRANSLATOR_SCRIPT" \${OPENAI_API_KEY:+--api-key=\$OPENAI_API_KEY} "\$@"
EOF
chmod +x "$WRAPPER_SCRIPT"
# Ask the user for OpenAI API key
echo
echo "Would you like to save your OpenAI API key for future use? [Y]/n"
read -r save_key
if [[ -z "$save_key" || "$save_key" =~ ^[Yy]$ ]]; then
echo "Enter your OpenAI API key:"
read -r api_key
# Ask the user for which model provider to use as default
echo "Which AI model provider would you like to use by default? [openai]/claude"
read -r model_provider
if [[ -z "$model_provider" || "$model_provider" =~ ^[Oo]$ ]]; then
model_provider="openai"
else
model_provider="claude"
fi
# Save the API key and model provider to the config file
echo "OPENAI_API_KEY=\"$api_key\"" > "$CONFIG_FILE"
echo "MODEL_PROVIDER=\"$model_provider\"" >> "$CONFIG_FILE"
chmod 600 "$CONFIG_FILE" # Restrict access to the config file
echo "API key and model provider preferences saved to $CONFIG_FILE"
else
echo "You can set your API key later by creating a file at $CONFIG_FILE with OPENAI_API_KEY=\"your_key_here\""
fi
# Update usage instructions
echo
echo "Installation complete!"
echo
echo "Usage:"
echo " ./bin/translate-po [options] <po_file_path>"
echo
echo "Examples:"
echo " ./bin/translate-po --target-lang=th apps/thai_business_suite/thai_business_suite/locale/th.po"
echo " ./bin/translate-po --model-provider=claude apps/erpnext/erpnext/locale/th.po"
echo " ./bin/translate-po --batch-size=20 apps/frappe/frappe/locale/th.po"
echo
echo "For more options:"
echo " ./bin/translate-po --help"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment