-
-
Save jokull/b87ceceb4ffcb5db807bd27a00bb13a4 to your computer and use it in GitHub Desktop.
Find follower accounts that have profile pics with faces in them
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
OAUTH_CONSUMER_KEY= | |
OAUTH_CONSUMER_SECRET= | |
OAUTH_TOKEN= | |
OAUTH_TOKEN_SECRET= | |
IMGIX_TOKEN= |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
imgix==1.0.0 | |
requests==2.9.1 | |
requests-oauthlib==0.6.0 | |
tablib==0.11.2 | |
python-dotenv==0.3.0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding=utf-8 | |
import sys | |
import shelve | |
import os | |
import json | |
from os.path import join, dirname | |
from clint.textui import progress | |
from dotenv import load_dotenv | |
load_dotenv(join(dirname(__file__), '.env')) | |
import requests | |
from requests_oauthlib import OAuth1Session | |
import imgix | |
import tablib | |
builder = imgix.UrlBuilder( | |
"takumi-twitter-dp.imgix.net", | |
sign_key=os.environ['IMGIX_TOKEN'] | |
) | |
def chunks(l, n): | |
"""Yield successive n-sized chunks from l.""" | |
for i in xrange(0, len(l), n): | |
yield l[i:i + n] | |
twitter = OAuth1Session(os.environ['OAUTH_CONSUMER_KEY'], | |
client_secret=os.environ['OAUTH_CONSUMER_SECRET'], | |
resource_owner_key=os.environ['OAUTH_TOKEN'], | |
resource_owner_secret=os.environ['OAUTH_TOKEN_SECRET']) | |
api_url = 'https://api.twitter.com/1.1' | |
url = api_url + '/users/lookup.json' | |
def main(path, size=None): | |
# Split into chunks of 100 | |
book = tablib.Dataset(headers=['ID', 'Name', 'Handle', 'Bio', 'Image', 'Location', 'Verified', 'URL']) | |
scratch = shelve.open('.scratch', writeback=True) | |
scratch.setdefault('skippable', []) | |
followers = map(str, json.load(sys.stdin)['data']['followers']) | |
sys.stdin = open('/dev/tty') | |
if os.path.exists(path): | |
with open(path) as fp: | |
book.load(fp.read()) | |
booked_ids = book['ID'] | |
if size is not None: | |
followers = followers[:int(size)] | |
for i, _followers in enumerate(progress.bar(list(chunks(followers, 100)))): | |
_followers = [f for f in _followers if f not in (booked_ids + scratch['skippable'])] | |
if not _followers: | |
continue | |
response = twitter.get( | |
url + '?user_id=' + ','.join(_followers), | |
headers={'content-type': 'application/json'}, | |
) | |
for user in response.json(): | |
if user['id_str'] in (booked_ids + scratch['skippable']): | |
continue | |
imgix_url = builder.create_url(user['profile_image_url'], opts=dict(faces=1, fm='json')) | |
imgix_response = requests.get(imgix_url) | |
if imgix_response.status_code != 200: | |
continue | |
if imgix_response.json().get('Faces'): | |
book.append(( | |
user['id_str'], | |
user['name'], | |
user['screen_name'], | |
user['description'], | |
user['profile_image_url'], | |
user['location'], | |
user['verified'], | |
user['url'], | |
)) | |
else: | |
scratch['skippable'].append(user['id_str']) | |
with open(path, 'w') as fp: | |
fp.write(book.csv) | |
scratch.sync() | |
scratch.close() | |
if __name__ == "__main__": | |
from ipdb import launch_ipdb_on_exception | |
with launch_ipdb_on_exception(): | |
main(*sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment