Last active
August 29, 2015 14:22
-
-
Save icaoberg/324d00717efbf383bbd4 to your computer and use it in GitHub Desktop.
Generates a word cloud from Little Women by Louisa May Alcott
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Generates a word cloud from Little Women by Louisa May Alcott | |
if [ ! -d little_women ]; then | |
mkdir little_women | |
fi | |
cd little_women | |
virtualenv --system-site-packages . | |
source ./bin/activate | |
pip install Cython | |
pip install Pillow | |
pip install git+git://github.com/amueller/word_cloud.git | |
pip install numpy | |
pip install scipy | |
wget -nc https://openclipart.org/download/1196/liftarn-Kamma-Rahbek-silhouette.svg | |
convert -density 300 liftarn-Kamma-Rahbek-silhouette.svg liftarn-Kamma-Rahbek-silhouette.png | |
echo " | |
from os import path | |
import os | |
from scipy.misc import imread | |
import matplotlib.pyplot as plt | |
import random | |
import urllib | |
from wordcloud import WordCloud, STOPWORDS | |
#change this to your taste | |
dpi = 300 | |
#i used wc to count the number of words in little_women.txt | |
number_of_words = 188986 | |
def grey_color_func(word, font_size, position, orientation, random_state=None, **kwargs): | |
return \"hsl(0, 0%%, %d%%)\" % random.randint(60, 100) | |
d = os.getcwd() | |
#you can find the original stencil in | |
#https://openclipart.org/detail/1196/kamma-rahbek-silhouette | |
filename = 'liftarn-Kamma-Rahbek-silhouette.png' | |
mask = imread(path.join(d, filename)) | |
# little women by louisa may alcott | |
filename = 'little_women.txt' | |
gutenberg_url = 'http://www.gutenberg.org/cache/epub/514/pg514.txt' | |
urllib.urlretrieve(gutenberg_url, filename) | |
text = open(filename).read() | |
# adding movie script specific stopwords | |
stopwords = STOPWORDS.copy() | |
wc = WordCloud(max_words=number_of_words, mask=mask, stopwords=stopwords, margin=0, | |
random_state=1).generate(text) | |
# store default colored image | |
img = wc.to_image() | |
img.save( 'little_women.png', 'png' ) | |
" > little_women.py | |
python little_women.py |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment