Skip to content

Instantly share code, notes, and snippets.

@jcdevilleres
Created July 6, 2020 10:43
Show Gist options
  • Save jcdevilleres/c339e57f0689591061e2d878ef57d0a9 to your computer and use it in GitHub Desktop.
Save jcdevilleres/c339e57f0689591061e2d878ef57d0a9 to your computer and use it in GitHub Desktop.
import os
# Initialize variables
directory = str(input("Enter directory for documents: ")) # Wait for input directory
document = ""
corpus = ""
# Utility functions
def doc_to_cor(document):
fedit = open(directory.replace('/','') + ".cor", "a")
fedit.write(document + '\n')
def clean_doc(filename):
document = ""
for line in open(directory + filename, "r"):
line = line.lower().replace('\n','')
document += line
return document
# Iterate through directory
for filename in os.listdir(directory):
# Iterate through the file
if filename.endswith(".txt"):
document = clean_doc(filename)
doc_to_cor(document)
print(os.path.join(directory, filename))
continue
else:
continue
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment