kittenswolf · June 10, 2021 04:03
diff --git a/analyzer.py b/analyzer.py
 # -*- coding: utf-8 -*-

 import pprint

 import operator
 import os
 import csv
 import json

 print("written by: kittenswolf#8723")
 print("====")
 print("If you want the script to ignore common words, create a stopwords.txt file with one word per line. If you're too lazy, google '<your language> stopwords' and copy them.")

 def parse_csv(file_path):
    with open(file_path, "r", encoding="utf8") as f:
        readCSV = csv.reader(f, delimiter=',')

        return list(readCSV)

 def get_most_used_words(messages):
    try:
        stopwords = [item.lower() for item in open("stopwords.txt", "r").read().split("\n")]
    except Exception:
        stopwords = []

    stopwords.append('')

    reverse_dict = {}
    for message in messages:
        msg_words = message[2].split(" ")

        for word in msg_words:
            try:
                cur = reverse_dict[word.lower()]
            except KeyError:
                cur = 0

            if word.lower() not in stopwords:
                if len(word) > 1:
                    cur += 1
                    reverse_dict[word.lower()] = cur

    return reverse_dict

 print("Loading channels...")
 message_channels = [x[0] for x in os.walk("messages") if not x[0] == "messages"]
 print("Loaded {} channels.".format(len(message_channels)))

 print("Loading messages...")
 all_messages = []
 for channel in message_channels:
    all_messages += parse_csv(channel + "/messages.csv")

 print("Loaded {} messages.".format(len(all_messages)))

 print("Getting most used words...")
 most_used = get_most_used_words(all_messages)
 sorted_most_used = sorted(most_used.items(), key=operator.itemgetter(1))

 sorted_most_used = list(reversed(sorted_most_used))

 print()
 print("Your 100 top used words:")
 print("No. | Word | Usage")

 i = 1
 for word in sorted_most_used[:100]:
    print('{}. "{}" | x{}'.format(i, word[0], word[1]))

    i += 1
	# -- coding: utf-8 --

	import pprint

	import operator
	import os
	import csv
	import json

	print("written by: kittenswolf#8723")
	print("====")
	print("If you want the script to ignore common words, create a stopwords.txt file with one word per line. If you're too lazy, google '<your language> stopwords' and copy them.")

	def parse_csv(file_path):
	with open(file_path, "r", encoding="utf8") as f:
	readCSV = csv.reader(f, delimiter=',')

	return list(readCSV)

	def get_most_used_words(messages):
	try:
	stopwords = [item.lower() for item in open("stopwords.txt", "r").read().split("\n")]
	except Exception:
	stopwords = []

	stopwords.append('')

	reverse_dict = {}
	for message in messages:
	msg_words = message[2].split(" ")

	for word in msg_words:
	try:
	cur = reverse_dict[word.lower()]
	except KeyError:
	cur = 0

	if word.lower() not in stopwords:
	if len(word) > 1:
	cur += 1
	reverse_dict[word.lower()] = cur

	return reverse_dict

	print("Loading channels...")
	message_channels = [x[0] for x in os.walk("messages") if not x[0] == "messages"]
	print("Loaded {} channels.".format(len(message_channels)))

	print("Loading messages...")
	all_messages = []
	for channel in message_channels:
	all_messages += parse_csv(channel + "/messages.csv")

	print("Loaded {} messages.".format(len(all_messages)))

	print("Getting most used words...")
	most_used = get_most_used_words(all_messages)
	sorted_most_used = sorted(most_used.items(), key=operator.itemgetter(1))

	sorted_most_used = list(reversed(sorted_most_used))

	print()
	print("Your 100 top used words:")
	print("No. \| Word \| Usage")

	i = 1
	for word in sorted_most_used[:100]:
	print('{}. "{}" \| x{}'.format(i, word[0], word[1]))

	i += 1