cfrazier91 · November 19, 2016 15:45
diff --git a/capture_twitter_tweets.ipynb b/capture_twitter_tweets.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Instructions for capturing tweets\n",
    "\n",
    "\n",
    "### Setting up Twitter API credentials\n",
    "\n",
    "1. Go to https://apps.twitter.com/\n",
    "2. Sign In, and then follow the instructions below:\n",
    "3. Press \"Create New App\"\n",
    "4. Fill in form (you can enter your website or any website in the 'website' field i.e. http://google.com)\n",
    "5. Press 'Create application'\n",
    "6. Press 'Manage keys ...'\n",
    "7. Press 'Create access tokens'\n",
    "8. Find and save four values\n",
    "    - Consumer Key\n",
    "    - Consumer Secret\n",
    "    - Access Token Key\n",
    "    - Access Token Secret   \n",
    "9. Install TwitterAPI: `pip install TwitterAPI`\n",
    "\n",
    "\n",
    "#### This folowing code has been adapted by GitHub user yanofsky\n",
    "https://gist.github.com/yanofsky/5436496\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import tweepy #pip install tweepy\n",
    "import csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Twitter API credentials\n",
    "consumer_key = []\n",
    "consumer_secret = []\n",
    "access_key = []\n",
    "access_secret = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_all_tweets(screen_name):\n",
    "    #Twitter only allows access to a users most recent 3240 tweets with this method\n",
    "    #authorize twitter, initialize tweepy\n",
    "    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)\n",
    "    auth.set_access_token(access_key, access_secret)\n",
    "    api = tweepy.API(auth)\n",
    "\n",
    "    #initialize a list to hold all the tweepy Tweets\n",
    "    alltweets = []\t\n",
    "\n",
    "    #make initial request for most recent tweets (200 is the maximum allowed count)\n",
    "    new_tweets = api.user_timeline(screen_name = screen_name,count=200)\n",
    "\n",
    "    #save most recent tweets\n",
    "    alltweets.extend(new_tweets)\n",
    "\n",
    "    #save the id of the oldest tweet less one\n",
    "    oldest = alltweets[-1].id - 1\n",
    "\n",
    "    #keep grabbing tweets until there are no tweets left to grab\n",
    "    while len(new_tweets) > 0:\n",
    "        print \"getting tweets before %s\" % (oldest)\n",
    "\n",
    "        #all subsiquent requests use the max_id param to prevent duplicates\n",
    "        new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest)\n",
    "\n",
    "        #save most recent tweets\n",
    "        alltweets.extend(new_tweets)\n",
    "\n",
    "        #update the id of the oldest tweet less one\n",
    "        oldest = alltweets[-1].id - 1\n",
    "\n",
    "        print \"...%s tweets downloaded so far\" % (len(alltweets))\n",
    "\n",
    "    #transform the tweepy tweets into a 2D array that will populate the csv\t\n",
    "    outtweets = [[tweet.id_str, tweet.created_at, tweet.text.encode(\"utf-8\")] for tweet in alltweets]\n",
    "\n",
    "    #write the csv\t\n",
    "    with open('%s_tweets.csv' % screen_name, 'wb') as f:\n",
    "        writer = csv.writer(f)\n",
    "        writer.writerow([\"id\",\"created_at\",\"text\"])\n",
    "        writer.writerows(outtweets)\n",
    "\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "getting tweets before 758384475491344383\n",
      "...397 tweets downloaded so far\n",
      "getting tweets before 711386170748370943\n",
      "...593 tweets downloaded so far\n",
      "getting tweets before 659126601674682367\n",
      "...788 tweets downloaded so far\n",
      "getting tweets before 571319343402913792\n",
      "...988 tweets downloaded so far\n",
      "getting tweets before 468939403608752128\n",
      "...1186 tweets downloaded so far\n",
      "getting tweets before 402084512030789631\n",
      "...1382 tweets downloaded so far\n",
      "getting tweets before 357700821938479105\n",
      "...1581 tweets downloaded so far\n",
      "getting tweets before 322782553276825600\n",
      "...1778 tweets downloaded so far\n",
      "getting tweets before 285751483650174976\n",
      "...1978 tweets downloaded so far\n",
      "getting tweets before 233567323192168448\n",
      "...2177 tweets downloaded so far\n",
      "getting tweets before 180319913846640639\n",
      "...2372 tweets downloaded so far\n",
      "getting tweets before 124529860273127423\n",
      "...2572 tweets downloaded so far\n",
      "getting tweets before 101146258466541567\n",
      "...2772 tweets downloaded so far\n",
      "getting tweets before 70602291551543296\n",
      "...2970 tweets downloaded so far\n",
      "getting tweets before 52722748161662975\n",
      "...3170 tweets downloaded so far\n",
      "getting tweets before 30703386953256960\n",
      "...3189 tweets downloaded so far\n",
      "getting tweets before 29413266476244992\n",
      "...3189 tweets downloaded so far\n"
     ]
    }
   ],
   "source": [
    "get_all_tweets(\"mcdickenson\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda env:Python2]",
   "language": "python",
   "name": "conda-env-Python2-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Instructions for capturing tweets\n",
	"\n",
	"\n",
	"### Setting up Twitter API credentials\n",
	"\n",
	"1. Go to https://apps.twitter.com/\n",
	"2. Sign In, and then follow the instructions below:\n",
	"3. Press \"Create New App\"\n",
	"4. Fill in form (you can enter your website or any website in the 'website' field i.e. http://google.com)\n",
	"5. Press 'Create application'\n",
	"6. Press 'Manage keys ...'\n",
	"7. Press 'Create access tokens'\n",
	"8. Find and save four values\n",
	" - Consumer Key\n",
	" - Consumer Secret\n",
	" - Access Token Key\n",
	" - Access Token Secret \n",
	"9. Install TwitterAPI: `pip install TwitterAPI`\n",
	"\n",
	"\n",
	"#### This folowing code has been adapted by GitHub user yanofsky\n",
	"https://gist.github.com/yanofsky/5436496\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"import tweepy #pip install tweepy\n",
	"import csv"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"#Twitter API credentials\n",
	"consumer_key = []\n",
	"consumer_secret = []\n",
	"access_key = []\n",
	"access_secret = []"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def get_all_tweets(screen_name):\n",
	" #Twitter only allows access to a users most recent 3240 tweets with this method\n",
	" #authorize twitter, initialize tweepy\n",
	" auth = tweepy.OAuthHandler(consumer_key, consumer_secret)\n",
	" auth.set_access_token(access_key, access_secret)\n",
	" api = tweepy.API(auth)\n",
	"\n",
	" #initialize a list to hold all the tweepy Tweets\n",
	" alltweets = []\t\n",
	"\n",
	" #make initial request for most recent tweets (200 is the maximum allowed count)\n",
	" new_tweets = api.user_timeline(screen_name = screen_name,count=200)\n",
	"\n",
	" #save most recent tweets\n",
	" alltweets.extend(new_tweets)\n",
	"\n",
	" #save the id of the oldest tweet less one\n",
	" oldest = alltweets[-1].id - 1\n",
	"\n",
	" #keep grabbing tweets until there are no tweets left to grab\n",
	" while len(new_tweets) > 0:\n",
	" print \"getting tweets before %s\" % (oldest)\n",
	"\n",
	" #all subsiquent requests use the max_id param to prevent duplicates\n",
	" new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest)\n",
	"\n",
	" #save most recent tweets\n",
	" alltweets.extend(new_tweets)\n",
	"\n",
	" #update the id of the oldest tweet less one\n",
	" oldest = alltweets[-1].id - 1\n",
	"\n",
	" print \"...%s tweets downloaded so far\" % (len(alltweets))\n",
	"\n",
	" #transform the tweepy tweets into a 2D array that will populate the csv\t\n",
	" outtweets = [[tweet.id_str, tweet.created_at, tweet.text.encode(\"utf-8\")] for tweet in alltweets]\n",
	"\n",
	" #write the csv\t\n",
	" with open('%s_tweets.csv' % screen_name, 'wb') as f:\n",
	" writer = csv.writer(f)\n",
	" writer.writerow([\"id\",\"created_at\",\"text\"])\n",
	" writer.writerows(outtweets)\n",
	"\n",
	" pass"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"getting tweets before 758384475491344383\n",
	"...397 tweets downloaded so far\n",
	"getting tweets before 711386170748370943\n",
	"...593 tweets downloaded so far\n",
	"getting tweets before 659126601674682367\n",
	"...788 tweets downloaded so far\n",
	"getting tweets before 571319343402913792\n",
	"...988 tweets downloaded so far\n",
	"getting tweets before 468939403608752128\n",
	"...1186 tweets downloaded so far\n",
	"getting tweets before 402084512030789631\n",
	"...1382 tweets downloaded so far\n",
	"getting tweets before 357700821938479105\n",
	"...1581 tweets downloaded so far\n",
	"getting tweets before 322782553276825600\n",
	"...1778 tweets downloaded so far\n",
	"getting tweets before 285751483650174976\n",
	"...1978 tweets downloaded so far\n",
	"getting tweets before 233567323192168448\n",
	"...2177 tweets downloaded so far\n",
	"getting tweets before 180319913846640639\n",
	"...2372 tweets downloaded so far\n",
	"getting tweets before 124529860273127423\n",
	"...2572 tweets downloaded so far\n",
	"getting tweets before 101146258466541567\n",
	"...2772 tweets downloaded so far\n",
	"getting tweets before 70602291551543296\n",
	"...2970 tweets downloaded so far\n",
	"getting tweets before 52722748161662975\n",
	"...3170 tweets downloaded so far\n",
	"getting tweets before 30703386953256960\n",
	"...3189 tweets downloaded so far\n",
	"getting tweets before 29413266476244992\n",
	"...3189 tweets downloaded so far\n"
	]
	}
	],
	"source": [
	"get_all_tweets(\"mcdickenson\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"anaconda-cloud": {},
	"kernelspec": {
	"display_name": "Python [conda env:Python2]",
	"language": "python",
	"name": "conda-env-Python2-py"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.12"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 1
	}