sjgallagher2 · February 7, 2021 19:56
diff --git a/get_txt_words.py b/get_txt_words.py
 # Detailed instructions on how to use this:
 # 1. Make sure you have Python 3 installed on your computer. It's available here: https://www.python.org/downloads/
 #    Python is a scripting language, meaning you can run it interactively (typing into a terminal) or you can write scripts
 #    (like this one) and run them with a Python IDE like PyCharm, or from Python IDLE, or any other way you want. 
 #    Python 2 (e.g. 2.7) is NOT supported by PyWORDS, only Python 3.
 # 2. Download the PYWORDS repository here: https://www.python.org/downloads/
 #    You can download it as a .zip file. Save the zip in a folder somewhere like 
 #       C:/Users/<user>/Documents/MyDictionaries/PYWORDS.zip
 #    Where "MyDictionaries" is whatever name you choose for the files will be saved. 
 #    It doesn't matter where you save it, but for THIS script to work, you need to save this script in the same directory
 #    (MyDictionaries).
 #    Python uses directory names to detect packages (unless they're installed) so when you run this script, Python
 #    sees the "import PYWORDS.lookup" line and expects to find PYWORDS/lookup.py. 
 # 2.a Extract the .zip to the same place. Call the directory PYWORDS/ (note capitalization)
 #    Wherever you saved things, the files should look like this:
 #     * MyDictionaries/get_txt_words.py
 #     * MyDictionaries/PYWORDS/
 #       - MyDictionaries/PYWORDS/lookup.py
 #       - MyDictionaries/PYWORDS/definitions.py
 #       - and so on...
 #    and within PYWORDS/ you should have lookup.py, etc. 
 # 3. Put your .txt file in the MyDictionaries (or whatever you called it) directory, with this script (get_txt_words.py)
 #    and the whole PYWORDS repository. Call it "latin.txt", or look at the start of this script for where the filename
 #    is set and change the name to something that suits you.
 # 4. Open Python IDLE, or PyCharm, or whatever your preferred Python program is, open this script, and run it. The
 #    IDE will run each line in order, and it will print the program output to the screen. You should be set!

 fname = "latin.txt"   # YOU CAN CHANGE THIS FILENAME. ONLY use .txt (plain text) files - No PDFs

 # Everything else should be hands-off.
 import PYWORDS.lookup as lookup

 fname_base = fname[ : s.find('.txt') ] # Only keep the base filename

 f = open(fname,'r') # Open file for reading
 txt = ''.join(f.readlines()) # Put all the text together into one string variable
 f.close() # Done with the file

 # Now we're going to run the main lookup method, lookup.get_vocab_list(txt)
 # It returns two variables, both Python 'list' types (arrays), the string entries, and the missed words
 dict_entries,missed = lookup.get_vocab_list(txt)

 # That's the main method, now we just need to save the results to a new file, which we'll call <file>_DICT.txt
 f = open(fname_base+'_DICT.txt','w') # Open file for writing. Overwrites previous file. 
 f.write('====== DICTIONARY ENTRIES ======\n') # Header
 for e in dict_entries:
  f.write(e+'\n') # Write each entry, one after another

 f.write('====== MISSED WORDS ======\n') 
 for m in missed:
  f.write(m+'\n')
 f.close()
 # We're done, it should have printed out a bunch of numbers, which are the number of bytes written each
 # time we called f.write() (so you can ignore them).
	# Detailed instructions on how to use this:
	# 1. Make sure you have Python 3 installed on your computer. It's available here: https://www.python.org/downloads/
	# Python is a scripting language, meaning you can run it interactively (typing into a terminal) or you can write scripts
	# (like this one) and run them with a Python IDE like PyCharm, or from Python IDLE, or any other way you want.
	# Python 2 (e.g. 2.7) is NOT supported by PyWORDS, only Python 3.
	# 2. Download the PYWORDS repository here: https://www.python.org/downloads/
	# You can download it as a .zip file. Save the zip in a folder somewhere like
	# C:/Users/<user>/Documents/MyDictionaries/PYWORDS.zip
	# Where "MyDictionaries" is whatever name you choose for the files will be saved.
	# It doesn't matter where you save it, but for THIS script to work, you need to save this script in the same directory
	# (MyDictionaries).
	# Python uses directory names to detect packages (unless they're installed) so when you run this script, Python
	# sees the "import PYWORDS.lookup" line and expects to find PYWORDS/lookup.py.
	# 2.a Extract the .zip to the same place. Call the directory PYWORDS/ (note capitalization)
	# Wherever you saved things, the files should look like this:
	# * MyDictionaries/get_txt_words.py
	# * MyDictionaries/PYWORDS/
	# - MyDictionaries/PYWORDS/lookup.py
	# - MyDictionaries/PYWORDS/definitions.py
	# - and so on...
	# and within PYWORDS/ you should have lookup.py, etc.
	# 3. Put your .txt file in the MyDictionaries (or whatever you called it) directory, with this script (get_txt_words.py)
	# and the whole PYWORDS repository. Call it "latin.txt", or look at the start of this script for where the filename
	# is set and change the name to something that suits you.
	# 4. Open Python IDLE, or PyCharm, or whatever your preferred Python program is, open this script, and run it. The
	# IDE will run each line in order, and it will print the program output to the screen. You should be set!

	fname = "latin.txt" # YOU CAN CHANGE THIS FILENAME. ONLY use .txt (plain text) files - No PDFs

	# Everything else should be hands-off.
	import PYWORDS.lookup as lookup

	fname_base = fname[ : s.find('.txt') ] # Only keep the base filename

	f = open(fname,'r') # Open file for reading
	txt = ''.join(f.readlines()) # Put all the text together into one string variable
	f.close() # Done with the file

	# Now we're going to run the main lookup method, lookup.get_vocab_list(txt)
	# It returns two variables, both Python 'list' types (arrays), the string entries, and the missed words
	dict_entries,missed = lookup.get_vocab_list(txt)

	# That's the main method, now we just need to save the results to a new file, which we'll call <file>_DICT.txt
	f = open(fname_base+'_DICT.txt','w') # Open file for writing. Overwrites previous file.
	f.write('====== DICTIONARY ENTRIES ======\n') # Header
	for e in dict_entries:
	f.write(e+'\n') # Write each entry, one after another

	f.write('====== MISSED WORDS ======\n')
	for m in missed:
	f.write(m+'\n')
	f.close()
	# We're done, it should have printed out a bunch of numbers, which are the number of bytes written each
	# time we called f.write() (so you can ignore them).
No results found