Skip to content

Instantly share code, notes, and snippets.

@cfrazier91
Created November 19, 2016 15:45
Show Gist options
  • Save cfrazier91/1c6708a83c6c09b0ee20e0ee6e0ec7ce to your computer and use it in GitHub Desktop.
Save cfrazier91/1c6708a83c6c09b0ee20e0ee6e0ec7ce to your computer and use it in GitHub Desktop.
Python for capturing tweets and storing into a csv
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Instructions for capturing tweets\n",
"\n",
"\n",
"### Setting up Twitter API credentials\n",
"\n",
"1. Go to https://apps.twitter.com/\n",
"2. Sign In, and then follow the instructions below:\n",
"3. Press \"Create New App\"\n",
"4. Fill in form (you can enter your website or any website in the 'website' field i.e. http://google.com)\n",
"5. Press 'Create application'\n",
"6. Press 'Manage keys ...'\n",
"7. Press 'Create access tokens'\n",
"8. Find and save four values\n",
" - Consumer Key\n",
" - Consumer Secret\n",
" - Access Token Key\n",
" - Access Token Secret \n",
"9. Install TwitterAPI: `pip install TwitterAPI`\n",
"\n",
"\n",
"#### This folowing code has been adapted by GitHub user yanofsky\n",
"https://gist.github.com/yanofsky/5436496\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import tweepy #pip install tweepy\n",
"import csv"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Twitter API credentials\n",
"consumer_key = []\n",
"consumer_secret = []\n",
"access_key = []\n",
"access_secret = []"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def get_all_tweets(screen_name):\n",
" #Twitter only allows access to a users most recent 3240 tweets with this method\n",
" #authorize twitter, initialize tweepy\n",
" auth = tweepy.OAuthHandler(consumer_key, consumer_secret)\n",
" auth.set_access_token(access_key, access_secret)\n",
" api = tweepy.API(auth)\n",
"\n",
" #initialize a list to hold all the tweepy Tweets\n",
" alltweets = []\t\n",
"\n",
" #make initial request for most recent tweets (200 is the maximum allowed count)\n",
" new_tweets = api.user_timeline(screen_name = screen_name,count=200)\n",
"\n",
" #save most recent tweets\n",
" alltweets.extend(new_tweets)\n",
"\n",
" #save the id of the oldest tweet less one\n",
" oldest = alltweets[-1].id - 1\n",
"\n",
" #keep grabbing tweets until there are no tweets left to grab\n",
" while len(new_tweets) > 0:\n",
" print \"getting tweets before %s\" % (oldest)\n",
"\n",
" #all subsiquent requests use the max_id param to prevent duplicates\n",
" new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest)\n",
"\n",
" #save most recent tweets\n",
" alltweets.extend(new_tweets)\n",
"\n",
" #update the id of the oldest tweet less one\n",
" oldest = alltweets[-1].id - 1\n",
"\n",
" print \"...%s tweets downloaded so far\" % (len(alltweets))\n",
"\n",
" #transform the tweepy tweets into a 2D array that will populate the csv\t\n",
" outtweets = [[tweet.id_str, tweet.created_at, tweet.text.encode(\"utf-8\")] for tweet in alltweets]\n",
"\n",
" #write the csv\t\n",
" with open('%s_tweets.csv' % screen_name, 'wb') as f:\n",
" writer = csv.writer(f)\n",
" writer.writerow([\"id\",\"created_at\",\"text\"])\n",
" writer.writerows(outtweets)\n",
"\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"getting tweets before 758384475491344383\n",
"...397 tweets downloaded so far\n",
"getting tweets before 711386170748370943\n",
"...593 tweets downloaded so far\n",
"getting tweets before 659126601674682367\n",
"...788 tweets downloaded so far\n",
"getting tweets before 571319343402913792\n",
"...988 tweets downloaded so far\n",
"getting tweets before 468939403608752128\n",
"...1186 tweets downloaded so far\n",
"getting tweets before 402084512030789631\n",
"...1382 tweets downloaded so far\n",
"getting tweets before 357700821938479105\n",
"...1581 tweets downloaded so far\n",
"getting tweets before 322782553276825600\n",
"...1778 tweets downloaded so far\n",
"getting tweets before 285751483650174976\n",
"...1978 tweets downloaded so far\n",
"getting tweets before 233567323192168448\n",
"...2177 tweets downloaded so far\n",
"getting tweets before 180319913846640639\n",
"...2372 tweets downloaded so far\n",
"getting tweets before 124529860273127423\n",
"...2572 tweets downloaded so far\n",
"getting tweets before 101146258466541567\n",
"...2772 tweets downloaded so far\n",
"getting tweets before 70602291551543296\n",
"...2970 tweets downloaded so far\n",
"getting tweets before 52722748161662975\n",
"...3170 tweets downloaded so far\n",
"getting tweets before 30703386953256960\n",
"...3189 tweets downloaded so far\n",
"getting tweets before 29413266476244992\n",
"...3189 tweets downloaded so far\n"
]
}
],
"source": [
"get_all_tweets(\"mcdickenson\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [conda env:Python2]",
"language": "python",
"name": "conda-env-Python2-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment