Created
March 25, 2021 17:26
-
-
Save rishi-raj-jain/46bad3fb0fb63f6f6037f573691db047 to your computer and use it in GitHub Desktop.
Plot grayscale wordclouds with ease.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import pymongo, pandas and matplotlib | |
import tqdm, re, pandas as pd, random | |
import matplotlib.pyplot as plt | |
# Import wordcloud library | |
from wordcloud import WordCloud, STOPWORDS | |
stopwords= set(STOPWORDS) | |
# Define the colors for grayscale | |
def grey_color_func(word, font_size, position, orientation, random_state=None, **kwargs): | |
return "hsl(0, 0%%, %d%%)" % random.randint(15, 45) | |
# Preprocess the data (remove hashtag, links and mentions from the text) | |
def preprocess(data): | |
comment_words= '' | |
for i in tqdm.tqdm(data): | |
if isinstance(i, str): | |
tokens= re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", i).split() | |
for i in range(len(tokens)): | |
tokens[i]= tokens[i].lower() | |
comment_words+= " ".join(tokens) + " " | |
return comment_words | |
# Plotting the wordcloud | |
def plotWordCloud(data): | |
# Configuring the WordCloud api | |
wordcloud= WordCloud( | |
collocations= False, # To remove duplicate entries | |
width= 1000, | |
height= 1000, | |
background_color= '#ffffff', | |
stopwords= stopwords, | |
min_font_size= 10 | |
).generate(preprocess(data)) | |
wordcloud.recolor(color_func= grey_color_func) | |
# plot the WordCloud | |
plt.figure(figsize= (8, 8), facecolor= None) | |
plt.imshow(wordcloud) | |
plt.axis("off") | |
plt.tight_layout(pad= 0) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment