Last active
September 15, 2024 01:26
-
-
Save planetis-m/95a38654b48f45dd147d45714c8b03ad to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import os | |
import string | |
import polib | |
# import tempfile | |
# import subprocess | |
from termcolor import colored | |
from difflib import SequenceMatcher | |
# Letter Frequencies of the Greek language | |
GREEK_LETTER_PENALTIES = { | |
'α': 10.81, 'τ': 7.99, 'ο': 7.23, 'ε': 7.18, 'σ': 7.00, 'ι': 6.64, | |
'ν': 6.19, 'ρ': 4.32, 'π': 4.15, 'κ': 3.77, 'μ': 3.43, 'η': 3.18, | |
'υ': 3.04, 'λ': 2.66, 'γ': 1.70, 'δ': 1.63, 'χ': 1.29, 'ω': 1.23, | |
'θ': 1.22, 'φ': 0.74, 'β': 0.67, 'ξ': 0.44, 'ζ': 0.33, 'ψ': 0.15 | |
} | |
# List of Greek letters to exclude | |
EXCLUDED_LETTERS = {'ά', 'έ', 'ή', 'ί', 'ό', 'ύ', 'ώ', 'ϊ', 'ϋ', 'ΐ', 'ΰ', 'ς'} | |
def detect_ampersand_changes(old_msgid, new_msgid): | |
""" | |
Detect if an ampersand (&) has been added or removed between old_msgid and new_msgid. | |
Ignore escaped ampersands (&&). | |
""" | |
# Count non-escaped ampersands (&) by removing '&&' and counting remaining '&' | |
old_ampersands = old_msgid.count('&') - old_msgid.count('&&') * 2 | |
new_ampersands = new_msgid.count('&') - new_msgid.count('&&') * 2 | |
if old_ampersands != new_ampersands: | |
return old_ampersands, new_ampersands | |
return old_ampersands, new_ampersands | |
def remove_unescaped_ampersand(msgstr, count_to_remove): | |
""" | |
Remove only unescaped ampersands (&) from the string, leaving escaped ampersands (&&) intact. | |
`count_to_remove` specifies how many ampersands to remove. | |
""" | |
result = [] | |
i = 0 | |
ampersands_removed = 0 | |
while i < len(msgstr): | |
# Check for escaped ampersands (&&) | |
if msgstr[i:i+2] == '&&': | |
result.append('&&') | |
i += 2 # Skip both '&' | |
elif msgstr[i] == '&' and ampersands_removed < count_to_remove: | |
# Remove the unescaped ampersand | |
ampersands_removed += 1 | |
i += 1 # Skip this '&' | |
else: | |
result.append(msgstr[i]) | |
i += 1 | |
return ''.join(result) | |
def apply_ampersand_change(old_msgid, new_msgid, msgstr): | |
""" | |
Apply the ampersand change to msgstr. If ampersand was removed, remove it from msgstr. | |
If ampersand was added, assign it randomly to a letter in msgstr. | |
""" | |
old_ampersands, new_ampersands = detect_ampersand_changes(old_msgid, new_msgid) | |
if new_ampersands > old_ampersands: | |
# Ampersand added, assign it randomly | |
ampersands_to_add = new_ampersands - old_ampersands | |
return (True, assign_ampersand_randomly(msgstr, ampersands_to_add)) | |
elif old_ampersands > new_ampersands: | |
# Ampersand removed, remove from msgstr | |
return (True, remove_unescaped_ampersand(msgstr, old_ampersands - new_ampersands)) | |
elif new_ampersands > 0: | |
# Ampersand's location moved. | |
return (True, msgstr) | |
return (False, msgstr) | |
def assign_ampersand_randomly(msgstr, ampersands_to_add): | |
""" | |
Assign ampersands randomly to letters in msgstr. | |
Penalize common Greek letters and exclude vowels with diacritics. | |
""" | |
# Filter out excluded vowels and create a list of unique letters in the msgstr | |
unique_letters = [ch for ch in set(msgstr.lower()) if ch not in EXCLUDED_LETTERS and ch.isalpha()] | |
if not unique_letters: | |
# No valid letters to assign ampersands, return unchanged msgstr | |
return msgstr | |
# Create a list of letters with their penalties | |
weighted_letters = [] | |
for letter in unique_letters: | |
penalty = GREEK_LETTER_PENALTIES.get(letter, 1) # Higher penalty for common letters | |
weighted_letters.extend([letter] * int(100 / penalty)) # More penalty = fewer chances | |
# Randomly assign ampersands to letters in msgstr | |
for _ in range(ampersands_to_add): | |
chosen_letter = random.choice(weighted_letters) | |
msgstr = insert_ampersand_before_letter(msgstr, chosen_letter) | |
return msgstr | |
def insert_ampersand_before_letter(msgstr, letter): | |
"""Insert ampersand (&) before the first occurrence of the chosen letter (lowercase or uppercase) in msgstr.""" | |
lower_letter = letter.lower() | |
upper_letter = letter.upper() | |
lower_index = msgstr.find(lower_letter) | |
upper_index = msgstr.find(upper_letter) | |
if lower_index == -1 and upper_index == -1: | |
return msgstr | |
elif lower_index == -1 or (upper_index != -1 and upper_index < lower_index): | |
return msgstr.replace(upper_letter, f'&{upper_letter}', 1) | |
else: | |
return msgstr.replace(lower_letter, f'&{lower_letter}', 1) | |
def detect_trailing_changes(old, new): | |
""" | |
Detect if certain trailing characters like '...' have been added or removed | |
between the old msgid and new msgid. | |
""" | |
# Define the trailing patterns to look for | |
trailing_patterns = ['...', '…', ': ', ':', '.', ', ', ','] | |
def find_trailing_pattern(s): | |
for pattern in trailing_patterns: | |
if s.endswith(pattern): | |
return pattern | |
return None | |
old_trailing = find_trailing_pattern(old) | |
new_trailing = find_trailing_pattern(new) | |
if old_trailing != new_trailing: | |
return old_trailing, new_trailing | |
return None, None | |
def apply_trailing_change(old_msgid, new_msgid, msgstr): | |
""" | |
Automatically apply trailing changes if the trailing pattern has been modified | |
between old_msgid and new_msgid. | |
""" | |
old_trailing, new_trailing = detect_trailing_changes(old_msgid, new_msgid) | |
if old_trailing is None and new_trailing is None: | |
# No trailing changes detected | |
return (False, msgstr) | |
# Remove old trailing pattern from msgstr if it exists | |
if old_trailing and msgstr.endswith(old_trailing): | |
msgstr = msgstr[:-len(old_trailing)].rstrip() | |
# Add the new trailing pattern to msgstr if a new one exists | |
if new_trailing and not msgstr.endswith(new_trailing): | |
msgstr = msgstr.rstrip() + new_trailing | |
return (True, msgstr) | |
def apply_case_change(old_msgid, new_msgid, msgstr): | |
def sentence_case(s): | |
return s[0].upper() + s[1:].lower() | |
# Check if new_str is the sentence-cased version of old_msgid | |
if sentence_case(old_msgid) == new_msgid: | |
# Apply the same sentence casing to msg | |
return (True, sentence_case(msgstr)) | |
if new_msgid[0].islower(): | |
# Apply lowercase to all letters in msgstr | |
return (True, msgstr.lower()) | |
if old_msgid == sentence_case(new_msgid): | |
# Words titled cased, ignore. | |
return (True, msgstr) | |
return (False, msgstr) | |
def is_minor_change(old, new): | |
""" | |
Detect if the change is minor based on normalization (case, punctuation, whitespace changes). | |
Return True if the change is minor. | |
""" | |
normalized_old = normalize_string(old) | |
normalized_new = normalize_string(new) | |
return normalized_old == normalized_new | |
def normalize_string(s): | |
"""Normalize a string by lowercasing, removing punctuation, and normalizing whitespace.""" | |
normalized = [] | |
prev_char = None | |
# punctuation = string.punctuation + "…" | |
punctuation = '.&:,…' | |
for ch in s: | |
if ch in punctuation: | |
continue # Skip punctuation | |
if ch.isspace(): | |
# Only append a single space when encountering multiple spaces | |
if prev_char != ' ': | |
normalized.append(' ') | |
prev_char = ' ' | |
else: | |
normalized.append(ch.lower()) | |
prev_char = ch.lower() | |
return ''.join(normalized).strip() | |
# def open_editor_with_content(initial_content): | |
# """Open the user's default editor to edit multiline text.""" | |
# # Create a temporary file | |
# with tempfile.NamedTemporaryFile(suffix=".tmp", mode='w+', delete=False) as tmp_file: | |
# # Write initial content (the current msgstr) to the temporary file | |
# tmp_file.write(initial_content) | |
# tmp_file_name = tmp_file.name | |
# # Try to get the user's default editor from environment variables, fallback to 'nano' | |
# editor = os.environ.get('EDITOR', 'nano') | |
# # Open the temporary file in the editor | |
# subprocess.call([editor, tmp_file_name]) | |
# # After the user closes the editor, read the file back | |
# with open(tmp_file_name, 'r') as tmp_file: | |
# edited_content = tmp_file.read() | |
# # Clean up the temporary file | |
# os.remove(tmp_file_name) | |
# | |
# return edited_content.strip() | |
def colored_inline_diff(str1, str2): | |
# Create a SequenceMatcher object | |
matcher = SequenceMatcher(None, str1, str2) | |
# Process the diff | |
for op, i1, i2, j1, j2 in matcher.get_opcodes(): | |
if op == 'equal': | |
print(str1[i1:i2], end='') | |
elif op == 'delete': | |
print(colored(str1[i1:i2], 'white', 'on_red'), end='') | |
elif op == 'insert': | |
print(colored(str2[j1:j2], 'green'), end='') | |
elif op == 'replace': | |
print(colored(str1[i1:i2], 'white', 'on_red') + colored(str2[j1:j2], 'green'), end='') | |
print() # Add a newline at the end | |
def print_header(text): | |
print(colored(f"\n=== {text} ===", "yellow", attrs=["bold"])) | |
def print_subheader(text): | |
print(colored(f" {text}", "cyan")) | |
def print_info(text): | |
print(colored(f"{text}", "magenta", attrs=["bold"])) | |
def print_change(text): | |
print(colored(f" {text}", "green")) | |
def print_unchanged(text): | |
print(colored(f" {text}", "dark_grey")) | |
from enum import Enum | |
class MsgstrChangeStatus(Enum): | |
SAVED_AS_IS = 1 # When the entry was saved as is, without further changes | |
AUTO_APPLIED = 2 # When the change was auto-applied to msgstr | |
UNCHANGED = 3 # When no changes were applied | |
def process_msgstr_change(current_msgstr, new_msgstr, change_applied): | |
""" | |
Process the changes to msgstr, returning the new msgstr and a status Enum representing the result. | |
""" | |
if new_msgstr != current_msgstr: | |
return MsgstrChangeStatus.AUTO_APPLIED # Change is auto-applied | |
elif change_applied: | |
return MsgstrChangeStatus.SAVED_AS_IS # Entry saved as is | |
else: | |
return MsgstrChangeStatus.UNCHANGED # No changes applied | |
def detect_and_preapply_changes(entry): | |
""" | |
Detect if the msgid or msgid_plural has added or removed trailing characters and apply the same change to msgstr_plural[0] (singular form) | |
and msgstr_plural[1] (plural form). If the change is minor (punctuation, case, etc.), pre-apply it automatically. | |
""" | |
old_msgid = entry.previous_msgid | |
new_msgid = entry.msgid | |
old_msgid_plural = entry.previous_msgid_plural | |
new_msgid_plural = entry.msgid_plural | |
# Helper function to apply changes to msgstr (both singular and plural) | |
def apply_changes_to_strs(old, new, msgstr): | |
change_applied = False | |
applied, new_msgstr = apply_ampersand_change(old, new, msgstr) | |
change_applied = change_applied or applied | |
applied, new_msgstr = apply_trailing_change(old, new, new_msgstr) | |
change_applied = change_applied or applied | |
applied, new_msgstr = apply_case_change(old, new, new_msgstr) | |
change_applied = change_applied or applied | |
return change_applied, new_msgstr | |
is_minor_change_plural = bool(old_msgid_plural and new_msgid_plural) and \ | |
is_minor_change(old_msgid_plural, new_msgid_plural) | |
# Handle singular and plural together if both exist | |
if old_msgid and is_minor_change(old_msgid, new_msgid) and \ | |
bool(entry.msgstr_plural) == is_minor_change_plural: # XNOR | |
print_subheader(f"Detected minor change in msgid:") | |
colored_inline_diff(old_msgid, new_msgid) | |
if is_minor_change_plural: | |
colored_inline_diff(old_msgid_plural, new_msgid_plural) | |
change_applied = False | |
# Apply changes to the singular form | |
applied, new_msgstr_singular = apply_changes_to_strs( | |
old_msgid, new_msgid, | |
entry.msgstr_plural[0] if is_minor_change_plural else entry.msgstr | |
) | |
change_applied = change_applied or applied | |
status_singular = process_msgstr_change( | |
entry.msgstr_plural[0] if is_minor_change_plural else entry.msgstr, | |
new_msgstr_singular, | |
change_applied or old_msgid == new_msgid | |
) | |
# If there is a plural form, apply changes to msgstr_plural[1] | |
if entry.msgstr_plural and 1 in entry.msgstr_plural: | |
applied, new_msgstr_plural = apply_changes_to_strs(old_msgid_plural, | |
new_msgid_plural, entry.msgstr_plural[1]) | |
change_applied = change_applied or applied | |
status_plural = process_msgstr_change( | |
entry.msgstr_plural[1], | |
new_msgstr_plural, | |
change_applied or old_msgid_plural == new_msgid_plural | |
) | |
else: | |
status_plural = MsgstrChangeStatus.SAVED_AS_IS | |
if status_singular == MsgstrChangeStatus.UNCHANGED or \ | |
status_plural == MsgstrChangeStatus.UNCHANGED: | |
print_unchanged("↳ Entry NOT changed:") | |
print(new_msgstr_singular) | |
if is_minor_change_plural: | |
print(new_msgstr_plural) | |
return False # Change is not minor, user will handle it | |
if status_singular == MsgstrChangeStatus.AUTO_APPLIED: | |
print_change("↳ Auto-applied change to entry:") | |
colored_inline_diff( | |
entry.msgstr_plural[0] if is_minor_change_plural else entry.msgstr, | |
new_msgstr_singular | |
) | |
if is_minor_change_plural: | |
colored_inline_diff(entry.msgstr_plural[1], new_msgstr_plural) | |
elif status_singular == MsgstrChangeStatus.SAVED_AS_IS: | |
print_change("↳ Entry saved as is:") | |
print(new_msgstr_singular) | |
if is_minor_change_plural: | |
print(new_msgstr_plural) | |
if status_singular == MsgstrChangeStatus.AUTO_APPLIED: | |
if is_minor_change_plural: | |
entry.msgstr_plural[0] = new_msgstr_singular | |
entry.msgstr_plural[1] = new_msgstr_plural | |
else: | |
entry.msgstr = new_msgstr_singular | |
else: | |
print_unchanged(f"No changes applied due to complexity.") | |
return False # Change is not minor, user will handle it | |
def edit_msgstr(entry): | |
"""Function to edit msgstr with multiline editing support and pre-applied changes.""" | |
# print(f"\nmsgid: {entry.msgid}") | |
# if entry.previous_msgid: | |
# print("\nDiff with previous_msgid:") | |
# colored_inline_diff(entry.previous_msgid, entry.msgid) | |
# Detect and pre-apply changes if msgid changed regarding trailing dots/ellipsis/colon/period | |
detect_and_preapply_changes(entry) | |
# # Show the current msgstr | |
# print(f"\nCurrent msgstr: {entry.msgstr}") | |
# # Prompt the user for action (edit, save, or skip) | |
# while True: | |
# action = input("Choose an action - [e]dit, [s]ave, or [k]skip: ").strip().lower() | |
# if action == 'e': | |
# # Open the user's editor with the current msgstr as the initial content | |
# new_msgstr = open_editor_with_content(entry.msgstr) | |
# # Update the msgstr if it was edited | |
# if new_msgstr != entry.msgstr: | |
# entry.msgstr = new_msgstr | |
# print(f"\nUpdated msgstr: {entry.msgstr}") | |
# break | |
# elif action == 's': | |
# # Just save the current msgstr (possibly pre-applied changes) | |
# break | |
# elif action == 'k': | |
# # Skip saving changes | |
# return False | |
# return True | |
def process_po_file(filepath): | |
"""Process the .po file and handle fuzzy entries.""" | |
po = polib.pofile(filepath) | |
changed = False | |
for entry in po.fuzzy_entries(): | |
print_header(f"Editing fuzzy entry in {filepath}:{entry.linenum}") | |
if edit_msgstr(entry): | |
entry.flags.remove('fuzzy') # Remove the fuzzy flag | |
changed = True | |
if changed: | |
print_info(f"Saving changes to {filepath}...") | |
po.save() | |
def scan_directory(directory): | |
"""Scan the directory for .po files and process them.""" | |
for root, _, files in os.walk(directory): | |
for file in files: | |
if file.endswith('.po'): | |
filepath = os.path.join(root, file) | |
process_po_file(filepath) | |
if __name__ == "__main__": | |
directory = input("Enter the directory to scan for .po files: ").strip() | |
scan_directory(directory) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment