Created
February 12, 2022 23:28
-
-
Save dotcomboom/cfcc027020a812799abee5c12ecb40d9 to your computer and use it in GitHub Desktop.
Memorization wizard - takes docx file as input, outputs memorization-friendly versions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is a python script that reads an Office DOCX file, | |
# and writes a copy of the file that is changed in that | |
# every word that is not bolded or italicized is replaced | |
# with the first letter of the word. | |
# Imports. | |
from docx import Document | |
import re | |
import sys | |
import os | |
print('Memorization Wizard') | |
print('==================') | |
# Check if the user has provided a file to read. If not, prompt the user for the filename. | |
if len(sys.argv) < 2: | |
filename = input('Enter the name of the file to read: ') | |
else: | |
filename = sys.argv[1] | |
# Check if the file exists. If not, exit the program. | |
if not os.path.isfile(filename): | |
print('The file does not exist.') | |
sys.exit() | |
print('Given file: \n ' + filename) | |
# Read the file. | |
doc = Document(filename) | |
def rewrite(doc, mode): | |
# Loop through the paragraphs. | |
for para in doc.paragraphs: | |
# Loop through the runs. | |
for run in para.runs: | |
# If the run is bolded or italicized, don't change it. | |
if run.bold or run.italic: | |
continue | |
# Check if mode is "fill" or "strip". | |
if mode == 'fill': | |
# Replace each word in the run with the first letter of the word, and then the character _ for each letter after. | |
run.text = re.sub(r'\w+', lambda m: m.group(0)[0] + '_' * (len(m.group(0)) - 1), run.text) | |
elif mode == 'strip': | |
# Replace each word in the run with the first letter of the word. | |
run.text = re.sub(r'\w+', lambda m: m.group(0)[0], run.text) | |
# Return the document. | |
return doc | |
# Save a document with fill and strip modes. | |
rewrite(doc, 'fill').save('{0}_fill.docx'.format(filename.replace('.docx', ''))) | |
rewrite(doc, 'strip').save('{0}_strip.docx'.format(filename.replace('.docx', ''))) | |
print('Wrote files\n - {0}_fill.docx\n - {0}_strip.docx'.format(filename.replace('.docx', ''))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment