Skip to content

Instantly share code, notes, and snippets.

@drewconway
Created December 5, 2012 23:51
Show Gist options
  • Save drewconway/4220612 to your computer and use it in GitHub Desktop.
Save drewconway/4220612 to your computer and use it in GitHub Desktop.
Retweets random people's first ever tweet -- at random. Fun!
#!/usr/bin/env python
# encoding: utf-8
"""
ur1twt.py
Description: Retweets random people's first ever tweet. Fun!
Created by ([email protected]) on
# Copyright (c) , under the Simplified BSD License.
# For more information on FreeBSD see: http://www.opensource.org/licenses/bsd-license.php
# All rights reserved.
"""
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy import API
from scipy import random
import pickle
import json
import re
"""
Credentials saved locally using a pickled dictionary of the form:
{"username" : ,
"pasword" : ,
"access_token" : ,
"access_token_secret" : ,
"consumer_secret" : ,
"consumer_key" : }
"""
creds = pickle.load(open("creds.pickle", "r"))
# Go to http://dev.twitter.com and create an app.
# The consumer key and secret will be generated for you after
consumer_key=creds["consumer_key"]
consumer_secret=creds["consumer_secret"]
# After the step above, you will be redirected to your app's page.
# Create an access token under the the "Your access token" section
access_token=creds["access_token"]
access_token_secret=creds["access_token_secret"]
class PostFirstTweet(StreamListener):
"""
This listener looks for an accounts first ever posts, checks that it matches some basic
criteria, and if so copies the status text and posts it.
"""
def __init__(self):
# Authenticate with the object so tweets from stream can be posted.
self.auth = OAuthHandler(consumer_key, consumer_secret)
self.auth.set_access_token(access_token, access_token_secret)
self.api = API(self.auth)
# Add a dictoinary of American English words
word_doc = open("/usr/share/dict/words", "r")
self.word_list = map(lambda w: w.strip("\n"), word_doc.readlines())
word_doc.close()
def on_data(self, data):
# Convert the JSON object into a Python dictionary
parse_data = json.loads(data)
try:
# First test that this is a user's first ever tweet
if parse_data["user"]["statuses_count"] == 1:
if self.is_spam(parse_data) is False:
# Check to see if it is in English
if parse_data["user"]["lang"] == "en":
if self.is_retweet(parse_data) is False:
# Finally, re-check -- via text contents -- if it
# contains a link and non-unicode characters
if parse_data["text"].find("http://") < 0 and re.match("[\w]", parse_data["text"]):
# If at least one of the words is in the dictionary...
# then post tweets that match that criteria 1% of the time
# ...we don't want to annoy people!
if self.any_word(parse_data["text"]):
if random.random() < 0.01:
self.post_tweet(parse_data["text"])
except KeyError:
pass
return True
def on_error(self, status):
print status
def any_word(self, text):
# Checks to make at least one word in the tweet is an actual word from
# the American English dictionary. This is done to avoid tweeting
# gobbledygook!
words = text.split(" ")
return any(map(lambda w: self.word_list.count(w) > 0, words))
def is_spam(self, parse_data):
# If it is, make sure it does not contain a link or is a reply to another
# user, i.e., rule-based spam detection
if len(parse_data["entities"]["urls"]) < 1 and parse_data["in_reply_to_user_id_str"] is None:
if parse_data["text"].find("@") < 0:
return False
else:
return True
else:
return True
def is_retweet(self, parse_data):
# Make sure it is not a retweet (ruled based bot-detection)
try:
rts = parse_data["retweeted_status"]
return True
except KeyError:
return False
def post_tweet(self, text):
self.api.update_status(text)
if __name__ == '__main__':
l = PostFirstTweet()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
stream.sample()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment