Last active
April 17, 2019 21:39
-
-
Save ibnesayeed/03ff191bf8c7c796d155d0314ebc82de to your computer and use it in GitHub Desktop.
A Shell script to download TimeMaps of a Twitter handle in various language variants using MemGator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# A script to download TimeMaps of a Twitter handle in various language variants. | |
# | |
# ./twitter-timemap-lang-downloader.sh <TwitterHandle> [<OutputDir>] | |
# | |
# Author: Sawood Alam <@ibnesayeed> | |
if [ $# -eq 0 ] | |
then | |
echo -e "Usage:\n $0 <TwitterHandle> [<OutputDir>]" | |
exit 1 | |
fi | |
# First argument: Twitter hangle without the @ sign | |
thandle=$1 | |
timeline=https://twitter.com/$thandle | |
echo -e "Downloading TimeMaps of \e[4m$timeline\e[0m in various languages" | |
# Second argument: Output directory to save TimeMaps in | |
opdir=${2:-/tmp/$thandle} | |
mkdir -p $opdir | |
echo -e "Saving TimeMaps in \e[4m$opdir\e[0m directory" | |
# Use this command if MemGator is installed locally | |
# cmd="memgator -f cdxj https://twitter.com/$thandle" | |
# | |
# Use this command if Docker is installed | |
# cmd="docker container run --rm -it ibnesayeed/memgator -f cdxj https://twitter.com/$thandle" | |
# | |
# Use this command to utilize ODU's MemGator service | |
cmd="curl -sf https://memgator.cs.odu.edu/timemap/cdxj/https://twitter.com/$thandle" | |
# Twitter supports the following 47 languages | |
tlangs="ar bg bn ca cs da de el en en-gb es eu fa fi fil fr ga gl gu he hi hr hu id it ja kn ko mr ms nl no pl pt ro ru sk sr sv ta th tr uk ur vi zh-cn zh-tw" | |
echo "=================== Mementos in Languages ==================" | |
opf="$opdir/$thandle-default.cdxj" | |
$cmd | grep -v "^@" > $opf | |
mcount=$(wc -l < $opf) | |
echo -e "default\t$mcount" | |
totalc=$mcount | |
for l in $tlangs | |
do | |
opf="$opdir/$thandle-$l.cdxj" | |
$cmd?lang=$l | grep -v "^@" > $opf | |
mcount=$(wc -l < $opf) | |
echo -e "$l\t$mcount" | |
totalc=$((totalc + mcount)) | |
done | |
echo "------------------------------------------------------------" | |
echo "TOTAL: $totalc mementos" | |
echo "=================== Mementos in Archives ===================" | |
cat $opdir/$thandle-*.cdxj | cut -d'/' -f 3 | sort | uniq -c | sort -nr | awk '{print $2"\t"$1}' | |
echo "=================== Yearly Mementos ===================" | |
cat $opdir/$thandle-*.cdxj | cut -c -4 | sort | uniq -c | awk '{print $2"\t"$1}' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment