Created
April 24, 2015 18:29
-
-
Save davclark/5d3e7ee2c86a0d9eab2a to your computer and use it in GitHub Desktop.
Example of using GNIP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 61, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "from requests import post, get, put\n", | |
| "from requests.auth import HTTPBasicAuth\n", | |
| "import yaml" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 65, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "with open('creds.yaml') as credfile:\n", | |
| " creds = yaml.load(credfile)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 68, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "auth = HTTPBasicAuth(creds['email'], creds['password'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 69, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "url_base = 'https://historical.gnip.com/accounts/' + creds['user'] + '/'\n", | |
| "post_url = url_base + 'jobs.json'" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 70, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "base_query = {\"publisher\": \"twitter\",\n", | |
| " \"streamType\": \"track\",\n", | |
| " \"dataFormat\": \"activity_streams\",}" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Step 1\n", | |
| "\n", | |
| "Submit a job" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 93, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# info specific to this query\n", | |
| "q = base_query.copy()\n", | |
| "q['fromDate'] = \"201504110000\"\n", | |
| "q['toDate'] = \"20150417\" # time will be inferred as 0000 if unspecified\n", | |
| "q['title'] = 'gb3' # Needs to be unique\n", | |
| "q[\"rules\"] = [\n", | |
| " {\n", | |
| " \"value\": \"from:CathrynCarson\"\n", | |
| " },\n", | |
| "]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 94, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "{'account': 'UCBerkeleyExplore',\n", | |
| " 'format': 'activity_streams',\n", | |
| " 'fromDate': '201504110000',\n", | |
| " 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/8wg55wk6vm.json',\n", | |
| " 'publisher': 'twitter',\n", | |
| " 'requestedAt': '2015-04-24T06:30:03Z',\n", | |
| " 'requestedBy': '[email protected]',\n", | |
| " 'status': 'opened',\n", | |
| " 'statusMessage': 'Waiting on quote from Gnip.',\n", | |
| " 'streamType': 'track',\n", | |
| " 'title': 'gb3',\n", | |
| " 'toDate': '201504170000'}" | |
| ] | |
| }, | |
| "execution_count": 94, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Get a quote\n", | |
| "resp = post(post_url, json=q, auth=auth)\n", | |
| "job_info = resp.json()\n", | |
| "job_info" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# Since data won't be persistent, it's a good idea to copy the jobURL into text somewhere\n", | |
| "# For a more robust solution, we'd want to write this out to a file or even better\n", | |
| "# something like a redis queue or mongodb\n", | |
| "# Note that this is also included in requests - even those where the request was TO this URL!\n", | |
| "jobURL = 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/8wg55wk6vm.json'" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Steps 2 and 4\n", | |
| "\n", | |
| "This is the general approach to checking in on the status of a job - both checking for a quote prior to acceptance, and also checking for completion of the job." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "{'acceptedAt': '2015-04-24T06:32:05Z',\n", | |
| " 'acceptedBy': '[email protected]',\n", | |
| " 'account': 'UCBerkeleyExplore',\n", | |
| " 'format': 'activity_streams',\n", | |
| " 'fromDate': '201504110000',\n", | |
| " 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/8wg55wk6vm.json',\n", | |
| " 'percentComplete': 100,\n", | |
| " 'publisher': 'twitter',\n", | |
| " 'quote': {'estimatedActivityCount': 100,\n", | |
| " 'estimatedDurationHours': '1.0',\n", | |
| " 'estimatedFileSizeMb': '0.0',\n", | |
| " 'expiresAt': '2015-05-01T06:31:14Z'},\n", | |
| " 'requestedAt': '2015-04-24T06:30:03Z',\n", | |
| " 'requestedBy': '[email protected]',\n", | |
| " 'results': {'activityCount': 5,\n", | |
| " 'completedAt': '2015-04-24T06:37:30Z',\n", | |
| " 'dataURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/8wg55wk6vm/results.json',\n", | |
| " 'expiresAt': '2015-05-09T06:37:12Z',\n", | |
| " 'fileCount': 5,\n", | |
| " 'fileSizeMb': '0.0'},\n", | |
| " 'status': 'delivered',\n", | |
| " 'statusMessage': 'Job delivered and available for download.',\n", | |
| " 'streamType': 'track',\n", | |
| " 'title': 'gb3',\n", | |
| " 'toDate': '201504170000'}" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Keep checking (max 5 / 5 sec across all GET requests) until ready\n", | |
| "resp = get(jobURL, auth=auth)\n", | |
| "job_status = resp.json()\n", | |
| "job_status" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# dynamically could do\n", | |
| "# dataURL = job_status['dataURL']\n", | |
| "# But this is persistent:\n", | |
| "dataURL = 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/8wg55wk6vm/results.json'" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Notice that the \"requestedAt\" time is in GMT. Bummer!" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Step 3" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 105, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "'2015-04-24 06:35:54'" | |
| ] | |
| }, | |
| "execution_count": 105, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "from time import gmtime, strftime\n", | |
| "strftime(\"%Y-%m-%d %H:%M:%S\", gmtime())" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 99, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "{'acceptedAt': '2015-04-24T06:32:05Z',\n", | |
| " 'acceptedBy': '[email protected]',\n", | |
| " 'account': 'UCBerkeleyExplore',\n", | |
| " 'format': 'activity_streams',\n", | |
| " 'fromDate': '201504110000',\n", | |
| " 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/8wg55wk6vm.json',\n", | |
| " 'publisher': 'twitter',\n", | |
| " 'quote': {'estimatedActivityCount': 100,\n", | |
| " 'estimatedDurationHours': '1.0',\n", | |
| " 'estimatedFileSizeMb': '0.0',\n", | |
| " 'expiresAt': '2015-05-01T06:31:14Z'},\n", | |
| " 'requestedAt': '2015-04-24T06:30:03Z',\n", | |
| " 'requestedBy': '[email protected]',\n", | |
| " 'status': 'accepted',\n", | |
| " 'statusMessage': 'Job accepted and ready to be queued.',\n", | |
| " 'streamType': 'track',\n", | |
| " 'title': 'gb3',\n", | |
| " 'toDate': '201504170000'}" | |
| ] | |
| }, | |
| "execution_count": 99, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Could also use job_status here, can also \"reject\"\n", | |
| "resp = put(job_info['jobURL'], json={\"status\": \"accept\"}, auth=auth)\n", | |
| "job_status = resp.json()\n", | |
| "job_status" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Step 5\n", | |
| "\n", | |
| "Get our results (for the sample query, this should be very small)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "{'expiresAt': '2015-05-09T06:37:12Z',\n", | |
| " 'totalFileSizeBytes': 5154,\n", | |
| " 'urlCount': 5,\n", | |
| " 'urlList': ['https://s3-us-west-1.amazonaws.com/archive.replay.snapshots/snapshots/twitter/track/activity_streams/UCBerkeleyExplore/2015/04/24/20150411-20150417_8wg55wk6vm/2015/04/11/14/10_activities.json.gz?AWSAccessKeyId=AKIAJMSYMREFVVJ6E7QQ&Expires=1432449450&Signature=uacttaiL2eEQmQrcMf2dxOOJF%2FA%3D',\n", | |
| " 'https://s3-us-west-1.amazonaws.com/archive.replay.snapshots/snapshots/twitter/track/activity_streams/UCBerkeleyExplore/2015/04/24/20150411-20150417_8wg55wk6vm/2015/04/11/14/20_activities.json.gz?AWSAccessKeyId=AKIAJMSYMREFVVJ6E7QQ&Expires=1432449450&Signature=y2vtp%2FfFN%2FAUx2KlvyKzKTRe1iE%3D',\n", | |
| " 'https://s3-us-west-1.amazonaws.com/archive.replay.snapshots/snapshots/twitter/track/activity_streams/UCBerkeleyExplore/2015/04/24/20150411-20150417_8wg55wk6vm/2015/04/11/23/50_activities.json.gz?AWSAccessKeyId=AKIAJMSYMREFVVJ6E7QQ&Expires=1432449450&Signature=NrHsR8YWf9hjD3Ks8KEwJCkPKv0%3D',\n", | |
| " 'https://s3-us-west-1.amazonaws.com/archive.replay.snapshots/snapshots/twitter/track/activity_streams/UCBerkeleyExplore/2015/04/24/20150411-20150417_8wg55wk6vm/2015/04/12/03/20_activities.json.gz?AWSAccessKeyId=AKIAJMSYMREFVVJ6E7QQ&Expires=1432449450&Signature=N4oGXFlb%2BvwXV2gaMUOjRJ%2FUcI8%3D',\n", | |
| " 'https://s3-us-west-1.amazonaws.com/archive.replay.snapshots/snapshots/twitter/track/activity_streams/UCBerkeleyExplore/2015/04/24/20150411-20150417_8wg55wk6vm/2015/04/15/13/50_activities.json.gz?AWSAccessKeyId=AKIAJMSYMREFVVJ6E7QQ&Expires=1432449450&Signature=Mj4wnN1x988HC%2F4R%2FxgEUq%2FOxdo%3D']}" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Should just be ready\n", | |
| "resp = get(dataURL, auth=auth)\n", | |
| "job_status = resp.json()\n", | |
| "job_status" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 25, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "ofnames = []\n", | |
| "for url in job_status['urlList']:\n", | |
| " ofname = url.rsplit('/', 1)[1].split('?', 1)[0]\n", | |
| " resp = get(url, stream=True)\n", | |
| " with open(ofname, 'wb') as ofile:\n", | |
| " for chunk in resp.iter_content():\n", | |
| " ofile.write(chunk)\n", | |
| " ofnames.append(ofname) # This again isn't persistent" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 26, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "['10_activities.json.gz',\n", | |
| " '20_activities.json.gz',\n", | |
| " '50_activities.json.gz',\n", | |
| " '20_activities.json.gz',\n", | |
| " '50_activities.json.gz']" | |
| ] | |
| }, | |
| "execution_count": 26, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "ofnames" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 27, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "ofnames = ['10_activities.json.gz',\n", | |
| " '20_activities.json.gz',\n", | |
| " '50_activities.json.gz',\n", | |
| " '20_activities.json.gz',\n", | |
| " '50_activities.json.gz']\n", | |
| "# or ofnames = glob('*.json.gz')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Step 6" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 28, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import json, gzip" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 29, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "data = json.load?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 56, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def read_tweet_data(fname):\n", | |
| " '''These files include tweet packages, as well as some general info at the end\n", | |
| " \n", | |
| " There may be some regular structure to the blank lines that could make this\n", | |
| " *slightly* faster, but probably doesn't matter relative to the speed of parsing.'''\n", | |
| " tweet_data = []\n", | |
| " with gzip.open(fname, 'rt') as datafile:\n", | |
| " for line in datafile:\n", | |
| " if line != '\\n':\n", | |
| " tweet_data.append(json.loads(line))\n", | |
| " \n", | |
| " return tweet_data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 57, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[{'actor': {'displayName': 'Cathryn Carson',\n", | |
| " 'favoritesCount': 45,\n", | |
| " 'followersCount': 260,\n", | |
| " 'friendsCount': 212,\n", | |
| " 'id': 'id:twitter.com:2594018798',\n", | |
| " 'image': 'https://pbs.twimg.com/profile_images/526971310090358784/eWx-JEup_normal.jpeg',\n", | |
| " 'languages': ['en'],\n", | |
| " 'link': 'http://www.twitter.com/CathrynCarson',\n", | |
| " 'links': [{'href': 'http://history.berkeley.edu/people/cathryn-carson',\n", | |
| " 'rel': 'me'}],\n", | |
| " 'listedCount': 17,\n", | |
| " 'location': {'displayName': 'Berkeley, CA', 'objectType': 'place'},\n", | |
| " 'objectType': 'person',\n", | |
| " 'postedTime': '2014-06-29T01:59:33.000Z',\n", | |
| " 'preferredUsername': 'CathrynCarson',\n", | |
| " 'statusesCount': 384,\n", | |
| " 'summary': 'Historian of science, ethnographer of contemporary research institutions. Once upon a time @DLabAtBerkeley.',\n", | |
| " 'twitterTimeZone': None,\n", | |
| " 'utcOffset': None,\n", | |
| " 'verified': False},\n", | |
| " 'body': 'RT @abuaardvark: Big Data in Social Science - new Annals packed w/interesting articles http://t.co/grLC6eRwO2',\n", | |
| " 'favoritesCount': 0,\n", | |
| " 'generator': {'displayName': 'Twitter Web Client',\n", | |
| " 'link': 'http://twitter.com'},\n", | |
| " 'gnip': {'matching_rules': [{'tag': None, 'value': 'from:CathrynCarson'}],\n", | |
| " 'urls': [{'expanded_status': 200,\n", | |
| " 'expanded_url': 'http://ann.sagepub.com/content/659/1.toc',\n", | |
| " 'url': 'http://t.co/grLC6eRwO2'}]},\n", | |
| " 'id': 'tag:search.twitter.com,2005:586896169319600128',\n", | |
| " 'link': 'http://twitter.com/CathrynCarson/statuses/586896169319600128',\n", | |
| " 'object': {'actor': {'displayName': 'Marc Lynch',\n", | |
| " 'favoritesCount': 97,\n", | |
| " 'followersCount': 35939,\n", | |
| " 'friendsCount': 666,\n", | |
| " 'id': 'id:twitter.com:18267544',\n", | |
| " 'image': 'https://pbs.twimg.com/profile_images/566579490063937536/MYo9iE8U_normal.jpeg',\n", | |
| " 'languages': ['en'],\n", | |
| " 'link': 'http://www.twitter.com/abuaardvark',\n", | |
| " 'links': [{'href': 'http://www.marclynch.com', 'rel': 'me'}],\n", | |
| " 'listedCount': 1865,\n", | |
| " 'location': {'displayName': 'ÜT: 37.892943,-122.270439',\n", | |
| " 'objectType': 'place'},\n", | |
| " 'objectType': 'person',\n", | |
| " 'postedTime': '2008-12-20T15:33:59.000Z',\n", | |
| " 'preferredUsername': 'abuaardvark',\n", | |
| " 'statusesCount': 29649,\n", | |
| " 'summary': 'Abu Aardvark. GWU. Monkey Cage. POMEPS. CNAS. Go Brewers and Packers!',\n", | |
| " 'twitterTimeZone': 'Eastern Time (US & Canada)',\n", | |
| " 'utcOffset': '-14400',\n", | |
| " 'verified': False},\n", | |
| " 'body': 'Big Data in Social Science - new Annals packed w/interesting articles http://t.co/grLC6eRwO2',\n", | |
| " 'favoritesCount': 10,\n", | |
| " 'generator': {'displayName': 'Twitter for Mac',\n", | |
| " 'link': 'http://itunes.apple.com/us/app/twitter/id409789998?mt=12'},\n", | |
| " 'id': 'tag:search.twitter.com,2005:586855911601348608',\n", | |
| " 'link': 'http://twitter.com/abuaardvark/statuses/586855911601348608',\n", | |
| " 'object': {'id': 'object:search.twitter.com,2005:586855911601348608',\n", | |
| " 'link': 'http://twitter.com/abuaardvark/statuses/586855911601348608',\n", | |
| " 'objectType': 'note',\n", | |
| " 'postedTime': '2015-04-11T11:38:47.000Z',\n", | |
| " 'summary': 'Big Data in Social Science - new Annals packed w/interesting articles http://t.co/grLC6eRwO2'},\n", | |
| " 'objectType': 'activity',\n", | |
| " 'postedTime': '2015-04-11T11:38:47.000Z',\n", | |
| " 'provider': {'displayName': 'Twitter',\n", | |
| " 'link': 'http://www.twitter.com',\n", | |
| " 'objectType': 'service'},\n", | |
| " 'twitter_entities': {'hashtags': [],\n", | |
| " 'symbols': [],\n", | |
| " 'trends': [],\n", | |
| " 'urls': [{'display_url': 'ann.sagepub.com/content/659/1.…',\n", | |
| " 'expanded_url': 'http://ann.sagepub.com/content/659/1.toc',\n", | |
| " 'indices': [71, 93],\n", | |
| " 'url': 'http://t.co/grLC6eRwO2'}],\n", | |
| " 'user_mentions': []},\n", | |
| " 'twitter_filter_level': 'low',\n", | |
| " 'twitter_lang': 'en',\n", | |
| " 'verb': 'post'},\n", | |
| " 'objectType': 'activity',\n", | |
| " 'postedTime': '2015-04-11T14:18:45.000Z',\n", | |
| " 'provider': {'displayName': 'Twitter',\n", | |
| " 'link': 'http://www.twitter.com',\n", | |
| " 'objectType': 'service'},\n", | |
| " 'retweetCount': 6,\n", | |
| " 'twitter_entities': {'hashtags': [],\n", | |
| " 'symbols': [],\n", | |
| " 'trends': [],\n", | |
| " 'urls': [{'display_url': 'ann.sagepub.com/content/659/1.…',\n", | |
| " 'expanded_url': 'http://ann.sagepub.com/content/659/1.toc',\n", | |
| " 'indices': [88, 110],\n", | |
| " 'url': 'http://t.co/grLC6eRwO2'}],\n", | |
| " 'user_mentions': [{'id': 18267544,\n", | |
| " 'id_str': '18267544',\n", | |
| " 'indices': [3, 15],\n", | |
| " 'name': 'Marc Lynch',\n", | |
| " 'screen_name': 'abuaardvark'}]},\n", | |
| " 'twitter_filter_level': 'low',\n", | |
| " 'twitter_lang': 'en',\n", | |
| " 'verb': 'share'},\n", | |
| " {'info': {'activity_count': 1,\n", | |
| " 'message': 'Replay Request Completed',\n", | |
| " 'sent': '2015-04-24T06:36:32+00:00'}}]" | |
| ] | |
| }, | |
| "execution_count": 57, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "read_tweet_data(ofnames[0])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 72, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "data = [read_tweet_data(ofname) for ofname in ofnames]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 74, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[2, 2, 2, 2, 2]" | |
| ] | |
| }, | |
| "execution_count": 74, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Cathryn never tweets twice in 10 minutes\n", | |
| "[len(d) for d in data]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Step 0\n", | |
| "\n", | |
| "Did you forget about your job status? (Or want to spy on the others?)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 58, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# Generally: https://historical.gnip.com/accounts/<account_name>/jobs.json\n", | |
| "jobs_url = url_base + 'jobs.json'" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 71, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "{'delivered': {'activityCount': 74,\n", | |
| " 'jobCount': 3,\n", | |
| " 'jobDaysRun': 10,\n", | |
| " 'period': 'trial',\n", | |
| " 'since': '2015-04-16T21:44:19Z'},\n", | |
| " 'jobs': [{'expiresAt': '2015-05-08T05:47:10Z',\n", | |
| " 'fromDate': '201201010000',\n", | |
| " 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/9mkbvaz928.json',\n", | |
| " 'percentComplete': 100,\n", | |
| " 'publisher': 'twitter',\n", | |
| " 'status': 'delivered',\n", | |
| " 'streamType': 'track',\n", | |
| " 'title': 'my_job',\n", | |
| " 'toDate': '201201010001',\n", | |
| " 'uuid': '9mkbvaz928'},\n", | |
| " {'expiresAt': '2015-05-01T06:14:35Z',\n", | |
| " 'fromDate': '201404230000',\n", | |
| " 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/g75dcp53nh.json',\n", | |
| " 'percentComplete': 0,\n", | |
| " 'publisher': 'twitter',\n", | |
| " 'status': 'quoted',\n", | |
| " 'streamType': 'track',\n", | |
| " 'title': 'gb1',\n", | |
| " 'toDate': '201504230000',\n", | |
| " 'uuid': 'g75dcp53nh'},\n", | |
| " {'expiresAt': '2015-05-01T06:28:19Z',\n", | |
| " 'fromDate': '201501010000',\n", | |
| " 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/w9r3bb41z2.json',\n", | |
| " 'percentComplete': 0,\n", | |
| " 'publisher': 'twitter',\n", | |
| " 'status': 'quoted',\n", | |
| " 'streamType': 'track',\n", | |
| " 'title': 'gb2',\n", | |
| " 'toDate': '201504230000',\n", | |
| " 'uuid': 'w9r3bb41z2'},\n", | |
| " {'expiresAt': '2015-05-09T06:37:12Z',\n", | |
| " 'fromDate': '201504110000',\n", | |
| " 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/8wg55wk6vm.json',\n", | |
| " 'percentComplete': 100,\n", | |
| " 'publisher': 'twitter',\n", | |
| " 'status': 'delivered',\n", | |
| " 'streamType': 'track',\n", | |
| " 'title': 'gb3',\n", | |
| " 'toDate': '201504170000',\n", | |
| " 'uuid': '8wg55wk6vm'},\n", | |
| " {'expiresAt': '2015-05-01T16:07:26Z',\n", | |
| " 'fromDate': '201501010000',\n", | |
| " 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/80b1dnt6q.json',\n", | |
| " 'percentComplete': 0,\n", | |
| " 'publisher': 'twitter',\n", | |
| " 'status': 'rejected',\n", | |
| " 'streamType': 'track',\n", | |
| " 'title': 'mike j',\n", | |
| " 'toDate': '201504230001',\n", | |
| " 'uuid': '80b1dnt6q'},\n", | |
| " {'expiresAt': '2015-05-09T16:23:16Z',\n", | |
| " 'fromDate': '201504210000',\n", | |
| " 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/pvnzbnwf0b.json',\n", | |
| " 'percentComplete': 100,\n", | |
| " 'publisher': 'twitter',\n", | |
| " 'status': 'delivered',\n", | |
| " 'streamType': 'track',\n", | |
| " 'title': 'mike j',\n", | |
| " 'toDate': '201504230001',\n", | |
| " 'uuid': 'pvnzbnwf0b'},\n", | |
| " {'expiresAt': '2015-05-01T18:20:13Z',\n", | |
| " 'fromDate': '201301010000',\n", | |
| " 'jobURL': 'https://historical.gnip.com:443/accounts/UCBerkeleyExplore/publishers/twitter/historical/track/jobs/ea88dwtjve.json',\n", | |
| " 'percentComplete': 0,\n", | |
| " 'publisher': 'twitter',\n", | |
| " 'status': 'quoted',\n", | |
| " 'streamType': 'track',\n", | |
| " 'title': 'my historical job python',\n", | |
| " 'toDate': '201301010001',\n", | |
| " 'uuid': 'ea88dwtjve'}]}" | |
| ] | |
| }, | |
| "execution_count": 71, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "resp = get(jobs_url, auth=auth)\n", | |
| "jobs_info = resp.json()\n", | |
| "jobs_info" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.4.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment