Skip to content

Instantly share code, notes, and snippets.

@mim-Armand
Created December 23, 2024 04:55
Show Gist options
  • Save mim-Armand/edec604c6b0cab54e3ccc4c451680a09 to your computer and use it in GitHub Desktop.
Save mim-Armand/edec604c6b0cab54e3ccc4c451680a09 to your computer and use it in GitHub Desktop.
txt to word-per-line
import sys
import os
# Configurable variables
DEFAULT_INPUT_FILE = "001_001.txt"
DEFAULT_OUTPUT_FILE = "unique_words_001_001.txt"
def extract_unique_words(input_file, output_file):
try:
# Read the content of the file
with open(input_file, 'r') as f:
text = f.read()
# Extract unique words
words = text.split()
unique_words = sorted(set(word.strip(",.!?\"'()[]{}").lower() for word in words))
# Write unique words to the output file, one per line
with open(output_file, 'w') as f:
f.write("\n".join(unique_words))
print(f"Unique words have been written to {output_file}")
except FileNotFoundError:
print(f"Error: The file {input_file} does not exist.")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
# Use command-line arguments if provided, otherwise fall back to defaults
input_file = sys.argv[1] if len(sys.argv) > 1 else DEFAULT_INPUT_FILE
output_file = sys.argv[2] if len(sys.argv) > 2 else DEFAULT_OUTPUT_FILE
extract_unique_words(input_file, output_file)
# Run eitherway:
# python txt_to_wordsfile.py
# python txt_to_wordsfile.py path/to/input.txt path/to/output.txt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment