Created
April 7, 2020 19:04
-
-
Save octohedron/f9e72bc7b93d209d93e29d7958e1dffd to your computer and use it in GitHub Desktop.
Word cloud
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pymongo | |
import numpy as np | |
from os import path | |
import os | |
from PIL import Image | |
import matplotlib.pyplot as plt | |
from collections import Counter | |
from wordcloud import WordCloud, STOPWORDS | |
d = path.dirname(__file__) if "__file__" in locals() else os.getcwd() | |
cmask = np.array(Image.open(path.join(d, "corona1.png"))) | |
stopwords = set(STOPWORDS) | |
stopwords.add("coronavirus") | |
stopwords.add("will") | |
stopwords.add("say") | |
stopwords.add("says") | |
stopwords.add("now") | |
stopwords.add("covid") | |
class TextUtil: | |
"""Class for working with text stored in MongoDB""" | |
def __init__(self, load_amount): | |
self.load_amount = load_amount | |
self.captions = [] | |
self.likes = Counter() | |
self.text = "" | |
self.client = pymongo.MongoClient( | |
"mongodb://ADDRESS:27017/admin", | |
username='USER', | |
password='PASSWORD' | |
) | |
def set_posts(self): | |
"""Set the posts from the database""" | |
db = self.client["chat"] | |
posts = db["posts"].find({}, {"title": 1}).sort( | |
"timestamp", pymongo.ASCENDING).limit(self.load_amount) | |
for p in posts: | |
self.text += " ".join(p["title"].split(" ")).lower() | |
print("Loaded posts") | |
def draw_image(self, filename): | |
"""Draws an image with the text""" | |
print("Drawing image") | |
wc = WordCloud( | |
stopwords=stopwords, | |
background_color="white", | |
max_words=1000, | |
mask=cmask, | |
contour_width=3, | |
contour_color='steelblue') | |
wc.generate(self.text) | |
wc.to_file(filename) | |
plt.imshow(wc, interpolation='bilinear') | |
plt.axis("off") | |
plt.figure() | |
plt.imshow(cmask, cmap=plt.cm.gray, interpolation='bilinear') | |
plt.axis("off") | |
plt.show() | |
t_handler = TextUtil(10000) # Amount of posts | |
t_handler.set_posts() | |
t_handler.draw_image("coronares.png") | |
t_handler.client.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment