Last active
May 1, 2023 16:33
-
-
Save narskidan/3a017b7f271a28f1238b279487820caa to your computer and use it in GitHub Desktop.
Helping Huad understand how to use Python to remove accidentally repeated chars from a file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# re is the regular expressions library | |
# which facilitates our search for, and | |
# replacement of, strings! | |
import re | |
# This is the string we want to remove | |
bad_text = '#' | |
# Regular expressions are too deep to explain here. | |
# Look up a tutorial on regexes if you're curious! | |
bad_text_regex = re.compile(bad_text + '+') | |
# We only remove it if it's repeated lots of times | |
# (afterall, there may be other parts of the program | |
# where we want this text to still appear!) | |
bad_text_repetition_limit = 5 | |
bad_file_name = 'code.py' | |
# r+ means we can read and write to the file | |
# (if we left this out, we'd only be able to read) | |
# Learn more about modes here: | |
# https://docs.python.org/3/library/functions.html#open | |
file_mode = 'r+' | |
with open(bad_file_name, file_mode) as bad_file: | |
bad_file_contents = bad_file.read() | |
bad_text_matches = bad_text_regex.findall(bad_file_contents) | |
problematic_matches = set( | |
match | |
for match in bad_text_matches | |
if len(match) >= bad_text_repetition_limit | |
) | |
fixed_contents = bad_file_contents | |
for match in problematic_matches: | |
print(match) | |
fixed_contents = fixed_contents.replace(match, '') | |
# Now we're ready to write the fixed contents to the file. | |
# But since we read the file, now we're at the end of it, | |
# so we have to go back to the beginning using .seek() | |
bad_file.seek(0) | |
bad_file.write(fixed_contents) | |
# Since we've made the contents smaller, | |
# We need to truncate the file to the new, | |
# smaller size! | |
bad_file.truncate() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment