Skip to content

Instantly share code, notes, and snippets.

@leighst
Last active March 19, 2016 19:32
Show Gist options
  • Save leighst/2acff73c0b4e70457333 to your computer and use it in GitHub Desktop.
Save leighst/2acff73c0b4e70457333 to your computer and use it in GitHub Desktop.
import pandas as pd
import nltk
def count_words(text, words):
count = -1
try:
tokens = nltk.word_tokenize(text)
count = 0
for t in tokens:
norm_t = t.lower()
if (norm_t in words):
count = count + 1
except:
print("error")
return count
def count_swears(text):
return count_words(text, sw)
tw = pd.read_csv('/Users/lestew/Downloads/7983102_29842bf08e6351e434c2774a693cb64f8167d531/tweets.csv')
tw['datetime'] = pd.to_datetime(tw.timestamp)
tw = tw.set_index('datetime')
tw['day'] = tw.index.dayofweek
tw['swears'] = tw.text.apply(count_swears)
sw_days = tw[tw.swears > 0]
d = sw_days[sw_days.retweeted_status_id.isnull()].day
p = d.value_counts().sort_index().plot.bar()
p.set_xticklabels(["mon", "tue", "wed", "thu", "fri", "sat", "sun"])
pylab.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment