Created
January 9, 2015 14:58
-
-
Save revox/2657e80f7fa857fc6043 to your computer and use it in GitHub Desktop.
Get twitter users details
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import twitter | |
| # http://mike.verdone.ca/twitter/ and Russel, MTSW, second edition, 2013 | |
| import json, csv, time, sys | |
| # == OAuth Authentication == | |
| # The consumer keys can be found on your application's Details | |
| # page located at https://dev.twitter.com/apps (under "OAuth settings") | |
| consumer_key="" | |
| consumer_secret="" | |
| # After the step above, you will be redirected to your app's page. | |
| # Create an access token under the the "Your access token" section | |
| access_token="" | |
| access_token_secret="" | |
| auth = twitter.oauth.OAuth(access_token, access_token_secret,consumer_key, consumer_secret) | |
| twitter_api = twitter.Twitter(auth=auth) | |
| # http://www.tutorialspoint.com/python/python_files_io.htm | |
| outfile = open('users_data.csv', 'w') | |
| csvwriter = csv.writer(outfile) | |
| # if there is a value then encode as unicode, if not return the empty string (instead of null) | |
| def clean(val): | |
| clean = "" | |
| if val: | |
| clean = val.encode('utf-8') | |
| return clean | |
| # read the tweets file from disk it should have a timestamp in position 0 and a user name in position 1 | |
| # NB: in universal newline mode as tweets can have newlines - see http://stackoverflow.com/questions/6726953/open-the-file-in-universal-newline-mode-using-csv-module-django | |
| with open('tweetdata_one_day.csv', 'rU') as inputfile: | |
| processed_total = 0 | |
| count = 0 | |
| users = [] | |
| tweetreader = csv.reader(inputfile) | |
| for row in tweetreader: | |
| print row[1] | |
| count += 1 | |
| processed_total += 1 | |
| users.append(row[1]) | |
| if (count>99): | |
| print "Processed total = ",processed_total | |
| try: | |
| resp = twitter_api.users.lookup(screen_name=','.join(users)) | |
| # print json.dumps(resp, indent=1) | |
| for user in resp: | |
| csvwriter.writerow([clean(user['screen_name']),user['created_at'],user['profile_image_url'],clean(user['location']),user['geo_enabled'],user['lang'], clean(user['time_zone'])]) | |
| outfile.flush() | |
| time.sleep(16) # 180 requests per 15 minutes equals one each 15 seconds, add a second for good luck! | |
| except KeyboardInterrupt: | |
| sys.exit(0) # this allows the keyboard CTRL-C to work to break this loop | |
| except Exception as e: | |
| print "error :: ", e # all other errors should get printed here | |
| raise | |
| count = 0 | |
| users = [] | |
| outfile.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment