Created
December 5, 2012 23:51
-
-
Save drewconway/4220612 to your computer and use it in GitHub Desktop.
Retweets random people's first ever tweet -- at random. Fun!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
ur1twt.py | |
Description: Retweets random people's first ever tweet. Fun! | |
Created by ([email protected]) on | |
# Copyright (c) , under the Simplified BSD License. | |
# For more information on FreeBSD see: http://www.opensource.org/licenses/bsd-license.php | |
# All rights reserved. | |
""" | |
from tweepy.streaming import StreamListener | |
from tweepy import OAuthHandler | |
from tweepy import Stream | |
from tweepy import API | |
from scipy import random | |
import pickle | |
import json | |
import re | |
""" | |
Credentials saved locally using a pickled dictionary of the form: | |
{"username" : , | |
"pasword" : , | |
"access_token" : , | |
"access_token_secret" : , | |
"consumer_secret" : , | |
"consumer_key" : } | |
""" | |
creds = pickle.load(open("creds.pickle", "r")) | |
# Go to http://dev.twitter.com and create an app. | |
# The consumer key and secret will be generated for you after | |
consumer_key=creds["consumer_key"] | |
consumer_secret=creds["consumer_secret"] | |
# After the step above, you will be redirected to your app's page. | |
# Create an access token under the the "Your access token" section | |
access_token=creds["access_token"] | |
access_token_secret=creds["access_token_secret"] | |
class PostFirstTweet(StreamListener): | |
""" | |
This listener looks for an accounts first ever posts, checks that it matches some basic | |
criteria, and if so copies the status text and posts it. | |
""" | |
def __init__(self): | |
# Authenticate with the object so tweets from stream can be posted. | |
self.auth = OAuthHandler(consumer_key, consumer_secret) | |
self.auth.set_access_token(access_token, access_token_secret) | |
self.api = API(self.auth) | |
# Add a dictoinary of American English words | |
word_doc = open("/usr/share/dict/words", "r") | |
self.word_list = map(lambda w: w.strip("\n"), word_doc.readlines()) | |
word_doc.close() | |
def on_data(self, data): | |
# Convert the JSON object into a Python dictionary | |
parse_data = json.loads(data) | |
try: | |
# First test that this is a user's first ever tweet | |
if parse_data["user"]["statuses_count"] == 1: | |
if self.is_spam(parse_data) is False: | |
# Check to see if it is in English | |
if parse_data["user"]["lang"] == "en": | |
if self.is_retweet(parse_data) is False: | |
# Finally, re-check -- via text contents -- if it | |
# contains a link and non-unicode characters | |
if parse_data["text"].find("http://") < 0 and re.match("[\w]", parse_data["text"]): | |
# If at least one of the words is in the dictionary... | |
# then post tweets that match that criteria 1% of the time | |
# ...we don't want to annoy people! | |
if self.any_word(parse_data["text"]): | |
if random.random() < 0.01: | |
self.post_tweet(parse_data["text"]) | |
except KeyError: | |
pass | |
return True | |
def on_error(self, status): | |
print status | |
def any_word(self, text): | |
# Checks to make at least one word in the tweet is an actual word from | |
# the American English dictionary. This is done to avoid tweeting | |
# gobbledygook! | |
words = text.split(" ") | |
return any(map(lambda w: self.word_list.count(w) > 0, words)) | |
def is_spam(self, parse_data): | |
# If it is, make sure it does not contain a link or is a reply to another | |
# user, i.e., rule-based spam detection | |
if len(parse_data["entities"]["urls"]) < 1 and parse_data["in_reply_to_user_id_str"] is None: | |
if parse_data["text"].find("@") < 0: | |
return False | |
else: | |
return True | |
else: | |
return True | |
def is_retweet(self, parse_data): | |
# Make sure it is not a retweet (ruled based bot-detection) | |
try: | |
rts = parse_data["retweeted_status"] | |
return True | |
except KeyError: | |
return False | |
def post_tweet(self, text): | |
self.api.update_status(text) | |
if __name__ == '__main__': | |
l = PostFirstTweet() | |
auth = OAuthHandler(consumer_key, consumer_secret) | |
auth.set_access_token(access_token, access_token_secret) | |
stream = Stream(auth, l) | |
stream.sample() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment