Last active
June 10, 2021 04:03
-
-
Save kittenswolf/0c4f42303aa9498066e6a0e57b972201 to your computer and use it in GitHub Desktop.
discord data analyzer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import pprint | |
import operator | |
import os | |
import csv | |
import json | |
print("written by: kittenswolf#8723") | |
print("====") | |
print("If you want the script to ignore common words, create a stopwords.txt file with one word per line. If you're too lazy, google '<your language> stopwords' and copy them.") | |
def parse_csv(file_path): | |
with open(file_path, "r", encoding="utf8") as f: | |
readCSV = csv.reader(f, delimiter=',') | |
return list(readCSV) | |
def get_most_used_words(messages): | |
try: | |
stopwords = [item.lower() for item in open("stopwords.txt", "r").read().split("\n")] | |
except Exception: | |
stopwords = [] | |
stopwords.append('') | |
reverse_dict = {} | |
for message in messages: | |
msg_words = message[2].split(" ") | |
for word in msg_words: | |
try: | |
cur = reverse_dict[word.lower()] | |
except KeyError: | |
cur = 0 | |
if word.lower() not in stopwords: | |
if len(word) > 1: | |
cur += 1 | |
reverse_dict[word.lower()] = cur | |
return reverse_dict | |
print("Loading channels...") | |
message_channels = [x[0] for x in os.walk("messages") if not x[0] == "messages"] | |
print("Loaded {} channels.".format(len(message_channels))) | |
print("Loading messages...") | |
all_messages = [] | |
for channel in message_channels: | |
all_messages += parse_csv(channel + "/messages.csv") | |
print("Loaded {} messages.".format(len(all_messages))) | |
print("Getting most used words...") | |
most_used = get_most_used_words(all_messages) | |
sorted_most_used = sorted(most_used.items(), key=operator.itemgetter(1)) | |
sorted_most_used = list(reversed(sorted_most_used)) | |
print() | |
print("Your 100 top used words:") | |
print("No. | Word | Usage") | |
i = 1 | |
for word in sorted_most_used[:100]: | |
print('{}. "{}" | x{}'.format(i, word[0], word[1])) | |
i += 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment