Skip to content

Instantly share code, notes, and snippets.

@rishi-raj-jain
Created March 25, 2021 17:26
Show Gist options
  • Save rishi-raj-jain/46bad3fb0fb63f6f6037f573691db047 to your computer and use it in GitHub Desktop.
Save rishi-raj-jain/46bad3fb0fb63f6f6037f573691db047 to your computer and use it in GitHub Desktop.
Plot grayscale wordclouds with ease.
# Import pymongo, pandas and matplotlib
import tqdm, re, pandas as pd, random
import matplotlib.pyplot as plt
# Import wordcloud library
from wordcloud import WordCloud, STOPWORDS
stopwords= set(STOPWORDS)
# Define the colors for grayscale
def grey_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
return "hsl(0, 0%%, %d%%)" % random.randint(15, 45)
# Preprocess the data (remove hashtag, links and mentions from the text)
def preprocess(data):
comment_words= ''
for i in tqdm.tqdm(data):
if isinstance(i, str):
tokens= re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", i).split()
for i in range(len(tokens)):
tokens[i]= tokens[i].lower()
comment_words+= " ".join(tokens) + " "
return comment_words
# Plotting the wordcloud
def plotWordCloud(data):
# Configuring the WordCloud api
wordcloud= WordCloud(
collocations= False, # To remove duplicate entries
width= 1000,
height= 1000,
background_color= '#ffffff',
stopwords= stopwords,
min_font_size= 10
).generate(preprocess(data))
wordcloud.recolor(color_func= grey_color_func)
# plot the WordCloud
plt.figure(figsize= (8, 8), facecolor= None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad= 0)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment