Last active
January 10, 2018 03:57
-
-
Save drench/75bd6bbbcd75ec72768e6355e82d172a to your computer and use it in GitHub Desktop.
Twitter: Get user timeline
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# This grabs all tweets from the given user (or as many as Twitter will give | |
# you; the docs say 3200) and stores them as a JSON array in the file | |
# ${screen_name}-timeline.json in the current directory. | |
# | |
# It requires jq and twurl, and assumes you have set up twurl with working API | |
# credentials. | |
# | |
# It doesn't clean up after itself, so make sure the current directory is clear | |
# of any files matching ${screen_name}-*.json before running. | |
# | |
# Ref: https://developer.twitter.com/en/docs/tweets/timelines/api-reference/get-statuses-user_timeline | |
screen_name=$1 | |
if [ -z "$screen_name" ]; then | |
echo "usage: $0 screen_name" | |
exit 111 | |
fi | |
base_url="/1.1/statuses/user_timeline.json?exclude_replies=false" | |
base_url+="&include_rts=true&count=200&trim_user=true" | |
base_url+="&screen_name=${screen_name}" | |
start_id() { | |
if ls ${screen_name}-*-*.json > /dev/null 2>&1; then | |
ls ${screen_name}-*-*.json | | |
sort -t- -k3 -n | | |
tail -n1 | | |
cut -d- -f3 | | |
cut -d. -f1 | |
else | |
echo "0" | |
fi | |
} | |
max_id=$(start_id) # newest first on first pass | |
dot_sleep() { | |
c=$1 | |
while [ $c -gt 0 ]; do | |
echo ".\c" > /dev/stderr | |
sleep 1 | |
((c--)) | |
done | |
echo > /dev/stderr | |
} | |
while true; do | |
raw_out=$(mktemp) | |
url=$base_url | |
if [ $max_id -gt 0 ]; then | |
url+="&max_id=${max_id}" | |
fi | |
echo "calling $url and sending output to $raw_out" > /dev/stderr | |
twurl $url > $raw_out | |
type=$(jq -r type < $raw_out) | |
case $type in | |
object) | |
error_code=$(jq '.errors[0].code' < $raw_out) | |
if [ "$error_code" -eq 88 ]; then | |
echo "Rate limited. Let's wait a minute." > /dev/stderr | |
dot_sleep 60 | |
rm -v $raw_out | |
continue | |
else | |
echo "Unexpected response in $raw_out:" > /dev/stderr | |
cat $raw_out > /dev/stderr | |
exit 111 | |
fi | |
;; | |
array) | |
if [ $(jq length < $raw_out) -eq 0 ]; then | |
echo "Empty results: done." > /dev/stderr | |
break | |
fi | |
;; | |
*) | |
echo "Unknown type '$type' for $raw_out:" > /dev/stderr | |
cat $raw_out > /dev/stderr | |
exit 111 | |
;; | |
esac | |
# tweets come back newest to oldest, so sort by id ascending | |
sorted_out=$(mktemp) | |
jq 'sort_by(.id)' < $raw_out > $sorted_out && rm -v $raw_out | |
low_id=$(jq '.[0].id' < $sorted_out) | |
high_id=$(jq '.[-1].id' < $sorted_out) | |
outfile="${screen_name}-${low_id}-${high_id}.json" | |
if [ -e $outfile ]; then | |
echo "$outfile already exists! Exiting." > /dev/stderr | |
break | |
fi | |
mv -iv $sorted_out $outfile | |
max_id=$low_id | |
sleep 2 | |
done | |
cat ${screen_name}-*-*.json | | |
jq -s add | jq -s add | jq 'unique_by(.id)' > "${screen_name}-timeline.json" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment