Skip to content

Instantly share code, notes, and snippets.

@mim-Armand
Created December 23, 2024 07:06
Show Gist options
  • Save mim-Armand/fbd8e7fd1f771836f481f5f5e30e50c0 to your computer and use it in GitHub Desktop.
Save mim-Armand/fbd8e7fd1f771836f481f5f5e30e50c0 to your computer and use it in GitHub Desktop.
Convert a css file to a dictionary to be used with AI/ML (MFA)
import csv
def create_hebrew_dictionary(
input_csv="hebrew_frequency.csv",
output_dict="hebrew_dictionary.txt"
):
"""
Reads the CSV file from the scrape script and creates
a Hebrew -> Transliteration dictionary (or any mapping you choose).
Saves it in a simple text file for demonstration.
"""
# Adjust these column names/indexes based on the CSV from step 1
# E.g., if your headers are: ["Rank", "Hebrew", "Transliteration", "Part of Speech", "English"]
# We might want: Hebrew -> Transliteration
# hebrew_col_name = "Hebrew"
hebrew_col_name = "Transliteration"
translit_col_name = "Transliteration"
# Load the CSV
with open(input_csv, "r", encoding="utf-8") as f:
reader = csv.DictReader(f)
# Confirm the columns exist
if hebrew_col_name not in reader.fieldnames or translit_col_name not in reader.fieldnames:
raise ValueError(f"CSV missing required columns {hebrew_col_name} or {translit_col_name}")
# Build a dictionary in Python (Hebrew -> Transliteration)
hebrew_dict = {}
for row in reader:
hebrew_word = row[hebrew_col_name].strip()
transliteration = row[translit_col_name].strip()
# Store in a Python dict
if hebrew_word:
hebrew_dict[hebrew_word] = transliteration
# Save dictionary to a text file
with open(output_dict, "w", encoding="utf-8") as out_f:
for hebrew_word, transliteration in hebrew_dict.items():
line = f"{hebrew_word} : {transliteration}\n"
out_f.write(line)
print(f"Dictionary file saved to {output_dict}")
if __name__ == "__main__":
create_hebrew_dictionary("hebrew_frequency.csv", "hebrew_dictionary.txt")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment