Skip to content

Instantly share code, notes, and snippets.

@vuori
Created February 21, 2021 13:07
Show Gist options
  • Save vuori/250d0b560d8ba5a11dcaf44c3f8bb513 to your computer and use it in GitHub Desktop.
Save vuori/250d0b560d8ba5a11dcaf44c3f8bb513 to your computer and use it in GitHub Desktop.
Get a user's Twitter likes (favorites) in twint JSON format using the official API
#!/usr/bin/env python3
# pylint: disable=invalid-name,line-too-long,broad-except
"""
Returns as many liked tweets from given Twitter user
as the API allows. Output format is compatible with twint
(https://github.com/twintproject/twint) JSON output
(i.e. one JSON object per line, _not_ wrapped into an array).
This uses the official API since likes seem to be inaccessible
without logging in at the moment.
First run "pip3 install tweepy" or similar to install the tweepy
library.
Next fill in TW_API_* values below with your Twitter API credentials.
Fill in the forms at https://developer.twitter.com/ to get new ones.
See https://gist.github.com/vuori/67ed39b85785e273ff051a69e26874d8
for a simple tool to grab images from tweets based on
the output of this tool (or twint).
"""
import logging
import argparse
import json
import urllib.parse
import sys
import pprint
from typing import Optional, Dict, Any
import tweepy
# Fill in these with your API access keys
TW_API_KEY = 'FILLME'
TW_API_SECRET = 'FILLME'
def get_api() -> tweepy.API:
"""Get an app-authenticated API handle."""
auth = tweepy.AppAuthHandler(TW_API_KEY, TW_API_SECRET)
api = tweepy.API(auth)
return api
def format_status(s: tweepy.Status) -> Dict[str, Any]:
"""Format a Status object into dict compatibe with twint output."""
reply_to = []
if s.in_reply_to_user_id:
reply_to = [{
'user_id': s.in_reply_to_user_id,
'username': s.in_reply_to_screen_name
}]
item = {
'id': s.id,
'created_at': int(s.created_at.timestamp()*1000),
'date': s.created_at.strftime('%Y-%m-%d'),
'time': s.created_at.strftime('%H:%M:%S'),
'timezone': 'UTC',
'user_id': s.user.id,
'username': s.user.screen_name,
'name': s.user.name,
'tweet': s.text,
'urls': s.entities.get('urls'),
'mentions': s.entities.get('user_mentions'),
'photos': [x.get('media_url_https') for x in s.entities.get('media', [])],
'likes_count': s.favorite_count,
'retweets_count': s.retweet_count,
'replies_count': 0, # not available through official API
'hashtags': [x.get('text') for x in s.entities.get('hashtags', [])],
'cashtags': [x.get('text') for x in s.entities.get('symbols', [])],
'link': f'https://twitter.com/{urllib.parse.quote(s.user.screen_name)}/status/{s.id}',
'retweet': s.retweeted,
'source': s.source,
'reply_to': reply_to,
# TBD, some not available through official API
'place': None,
'quote_url': None,
'video': 0,
'near': None,
'geo': None,
'user_rt_id': None,
'user_rt': None,
'retweet_id': None,
'retweet_date': None,
'translate': None,
'trans_src': None,
'trans_dest': None
}
return item
def get_likes(username: str,
up_to: int = -1,
api: Optional[tweepy.API] = None) -> int:
"""Get likes (favorites) for the given user, optionally
up to the given ID. Prints JSON object per line,
returns program exit code."""
log = logging.getLogger('get_likes')
if api is None:
api = get_api()
try:
user = api.get_user(username)
except tweepy.TweepError as exc:
try:
reason = exc.args[0][0]['message']
except (IndexError, KeyError):
reason = str(exc)
log.fatal('could not find user: %s', reason)
return 2
log.debug('found user %s for name %r', user.id, username)
count = 0
for s in tweepy.Cursor(api.favorites, id=user.id).items():
if s.id == up_to:
log.info('encountered up_to id %d, stopping', up_to)
break
count += 1
try:
item = format_status(s)
except (TypeError, KeyError):
# pylint: disable=protected-access
log.exception('failed to parse %s, tweet content: %s',
s.id, pprint.pformat(s._json))
continue
json.dump(item, sys.stdout, ensure_ascii=False)
sys.stdout.write('\n')
log.info('retrieved %d liked tweets', count)
return 0
def read_up_to(up_to_name: str) -> int:
"""Try to read an up_to value from the named file."""
log = logging.getLogger('read_up_to')
try:
with open(up_to_name, 'r') as fp:
line = fp.readline()
data = json.loads(line)
value = int(data['id'])
log.info('found up-to id %d in %r', value, up_to_name)
return value
except FileNotFoundError:
log.error('up-to file %r not found', up_to_name)
except Exception:
log.exception('up-to file %r did not contain a tweet', up_to_name)
return -2
def main() -> int:
"""CLI entrypoint."""
parser = argparse.ArgumentParser(description='Fetch Twitter user likes')
parser.add_argument('user', metavar='USER', nargs=1,
help='User name to fetch likes for')
parser.add_argument('-u', dest='up_to', metavar='ID/FILE', action='store',
help='Fetch tweets up to this ID. If given a file name, '
'attempts to read the newest tweet ID from that file '
'(default: fetch all)')
parser.add_argument('-v', dest='verbose', action='count',
help='verbose output (repeat for more)')
args = parser.parse_args()
log_level = logging.WARNING
if args.verbose > 1:
log_level = logging.DEBUG
elif args.verbose > 0:
log_level = logging.INFO
logging.basicConfig(level=log_level, stream=sys.stderr)
log = logging.getLogger('main')
username = args.user[0]
log.info('reading from user %r', username)
up_to = args.up_to
up_to_id = -1
if up_to:
try:
up_to_id = int(up_to)
except ValueError:
up_to_id = read_up_to(up_to)
if up_to_id >= 0:
log.info('reading up to tweet %d', up_to_id)
elif up_to_id == -2:
return 3
return get_likes(username, up_to=up_to_id)
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment