PandaWhoCodes · January 27, 2020 18:29
diff --git a/n_grams.py b/n_grams.py
 import csv
 from nltk import ngrams
 import sys
 import collections
 import string
 translator=str.maketrans('','',string.punctuation)

 def to_string(list):
    """
    converts a list into string
    :return: String
    """
    s = ""
    for str in list:
        s += str + "\n"
    return s


 def get_stop_list():
    with open("SmartStoplist.txt") as f:
        return f.read().split("\n")


 def removeStopwords(wordlist):
    """
    removes stop words from the string
    :param wordlist: list of words
    :param stopwords: list of stop words -  SET()
    """
    stopwords = set(get_stop_list())
    return [w for w in wordlist if w not in stopwords]


 def get_text(filename):
    """
    Extract text from the CSV file
    """
    all_text = ""
    with open(filename, 'r', encoding="utf8") as f:
        reader = csv.reader(f)
        for row in reader:
            all_text = all_text + row[0].lower() + "\n"
    return all_text


 def ngram(text_list, n):
    """
    Perform n-grams
    return: Returns a generator containing sets of ngrams
    """
    return ngrams(text_list, n)


 def count_frequency(grams):
    """
    takes the n_grams and performs frequency check
    """
    all_grams = []
    for gram in grams:
        all_grams.append(" ".join(gram))
    return collections.Counter(all_grams)


 def handle_grams(filename):
    """
    Calls all the other functions and writes the ngrams output to csv file with their frequency
    """
    text = removeStopwords(get_text(filename).translate(translator).split())
    # text = text
    filename = "ngrams_" + filename
    csvFile = open(filename, 'w', newline='', encoding='utf-8')
    csvWriter = csv.writer(csvFile)

    for i in range(1, 4):
        a = count_frequency(ngram(text, i))
        for items in a.most_common(20):
            csvWriter.writerow(list(items))


 # handle_grams(filename="#CES.csv")
 if __name__ == '__main__':
    filename = sys.argv[1]
    handle_grams(filename)
	import csv
	from nltk import ngrams
	import sys
	import collections
	import string
	translator=str.maketrans('','',string.punctuation)

	def to_string(list):
	"""
	converts a list into string
	:return: String
	"""
	s = ""
	for str in list:
	s += str + "\n"
	return s


	def get_stop_list():
	with open("SmartStoplist.txt") as f:
	return f.read().split("\n")


	def removeStopwords(wordlist):
	"""
	removes stop words from the string
	:param wordlist: list of words
	:param stopwords: list of stop words - SET()
	"""
	stopwords = set(get_stop_list())
	return [w for w in wordlist if w not in stopwords]


	def get_text(filename):
	"""
	Extract text from the CSV file
	"""
	all_text = ""
	with open(filename, 'r', encoding="utf8") as f:
	reader = csv.reader(f)
	for row in reader:
	all_text = all_text + row[0].lower() + "\n"
	return all_text


	def ngram(text_list, n):
	"""
	Perform n-grams
	return: Returns a generator containing sets of ngrams
	"""
	return ngrams(text_list, n)


	def count_frequency(grams):
	"""
	takes the n_grams and performs frequency check
	"""
	all_grams = []
	for gram in grams:
	all_grams.append(" ".join(gram))
	return collections.Counter(all_grams)


	def handle_grams(filename):
	"""
	Calls all the other functions and writes the ngrams output to csv file with their frequency
	"""
	text = removeStopwords(get_text(filename).translate(translator).split())
	# text = text
	filename = "ngrams_" + filename
	csvFile = open(filename, 'w', newline='', encoding='utf-8')
	csvWriter = csv.writer(csvFile)

	for i in range(1, 4):
	a = count_frequency(ngram(text, i))
	for items in a.most_common(20):
	csvWriter.writerow(list(items))


	# handle_grams(filename="#CES.csv")
	if __name__ == '__main__':
	filename = sys.argv[1]
	handle_grams(filename)
No results found