Last active
March 9, 2018 08:49
-
-
Save dogterbox/82f5dd9708554e26b2a90eab3bc30965 to your computer and use it in GitHub Desktop.
Twitter streaming to Word cloud
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from twitter import Twitter, OAuth | |
from wordcloud import WordCloud, STOPWORDS | |
import json | |
def to_file(text, filename="output.txt"): | |
with open(filename, "w", encoding="utf-8") as text_file: | |
text_file.write(text) | |
key_word = "#happy" | |
num_results = 500 # result number of tweets | |
# Variables that contains the user credentials to access Twitter API | |
access_token = 'XXXX' | |
access_secret = 'XXXX' | |
consumer_key = 'XXXX' | |
consumer_secret = 'XXXX' | |
oauth = OAuth(access_token, access_secret, consumer_key, consumer_secret) | |
twitter = Twitter(auth=oauth) | |
stopwords = set(STOPWORDS) | |
text_tweets, json_dumps = ["", ""] | |
result_count = 0 | |
last_id = None | |
# **** twitter streaming **** | |
'''search : https://developer.twitter.com/en/docs/tweets/search/api-reference/get-search-tweets.html | |
full_text : https://developer.twitter.com/en/docs/tweets/tweet-updates''' | |
print("--- twitter streaming processing ---") | |
while result_count < num_results: | |
count = num_results - result_count # calculate number of query remaining | |
query = twitter.search.tweets(q=key_word, result_type="mixed", lang="en", count=count, max_id=last_id, | |
tweet_mode='extended') | |
for result in query['statuses']: | |
if 'full_text' in result: # is text tweets | |
text_tweets += result['full_text'] + "\n" # append full text from tweets | |
result_count += 1 # next result count | |
last_id = result["id"] # update last tweets id | |
json_dumps += json.dumps(query, indent=4) + "\n" # append | |
# **** word cloud **** | |
print("--- world cloud processing ---") | |
# add stop words | |
stopwords.add("RT") # RT is Re Tweets | |
stopwords.add("https") # text in URL | |
stopwords.add("co") # text in URL -- Ex: https://t.co/yF3CRPrNF2 | |
stopwords.add("amp") # & is the character reference for "An ampersand" on HTML | |
'''colormap : https://matplotlib.org/examples/color/colormaps_reference.html''' | |
wc = WordCloud(background_color="black", colormap="hsv", width=1600, height=800, max_font_size=800, min_font_size=10, | |
max_words=1000, stopwords=stopwords) | |
wc.generate(text_tweets) # generate word cloud | |
wc.to_file("wordcloud.png") # save image file | |
wc.to_image().show() # show word cloud image | |
# save result from Twitter streaming (query) -- For viewing | |
to_file(text=text_tweets, filename="text_tweets.txt") # save full text tweets | |
to_file(text=json_dumps, filename="query_json_form.json") # save query on json form |
Author
dogterbox
commented
Mar 9, 2018
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment