Last active
August 30, 2020 13:47
-
-
Save skj-skj/73208320ca4fcc6ea1318d450c9622a2 to your computer and use it in GitHub Desktop.
Generate List of Words Without Vowel - noVowel.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import re | |
wordsWithoutVowel = [] | |
countWords = 1 | |
def progress (current,final=362707-5): # 362707 is the total no of lines in all index files. -5 to remove extra space in each file | |
printProgress = int((current/final)*100) | |
print(str(printProgress)+' %') | |
def checkIfVowel(word): # checking the word contain any vowel or not, if not than appended to the wordsWithoutVowel list | |
check = re.findall("[aeiou]",word) | |
if not check:# checking if the "check" variable list is empty. | |
wordsWithoutVowel.append(word) | |
def openFiles(file): # Reads CSV files, in this files word are stored in the 1st column(0th index) of the row/line, | |
with open(file,'rt') as f: | |
data = csv.reader(f) | |
for row in data: | |
global countWords | |
word = row[0] | |
checkIfVowel(word) | |
progress(countWords) | |
countWords+=1 | |
# Wordnet 3.1 data files: http://wordnetcode.princeton.edu/wn3.1.dict.tar.gz or https://wordnet.princeton.edu/download/current-version | |
openFiles("index.ad.csv") | |
openFiles("index.adv.csv") | |
openFiles("index.noun.csv") | |
openFiles("index.verb.csv") | |
openFiles("index.sense.csv") | |
wordsWithoutVowel = set(wordsWithoutVowel) # To remove the duplicate numbers | |
wordsWithoutVowel = list(wordsWithoutVowel) # To make it list again | |
wordsWithoutVowel.sort() # sorting list with alphabetical order (numbers come first in alphabetical order) | |
file = open("noVowelWordsList.txt",'a') # saving the words in txt file | |
for word in wordsWithoutVowel: | |
file.write(word+"\n") | |
file.close() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Generate a list of words without vowel
Library Used: csv,re
Dataset: wordnet 3.1