Last active
December 19, 2022 12:10
-
-
Save Wikinaut/39b2be7a5570a6cd41181f11c2577e30 to your computer and use it in GitHub Desktop.
patch-parsed-twitter-archive
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/bash | |
# Patch parsed Twitter Archiv Parser | |
# To clarify: the script does not patch the parser, but the twitter-archive-parser-parsed archive | |
# It is not modifying the parsed files themselves, but creating copies with the extension .body | |
# Postprocessor for files generated by https://github.com/timhutton/twitter-archive-parser | |
# init 18.12.2022 | |
# Usage: in the parsed twitter-archive directory with the numerous *.html, run | |
# ./patch-parsed-twitter-archive-sh | |
# It creates: | |
# for each existing monthly (or so) *.html it creates one new *.html.body file | |
# patches and strips several html tags | |
# adds jQuery and the lazy-images loader | |
# adds links to the images to facilitate the original view, opens the original image in a new tab | |
# concatenates all *.html.body files into a single all.html file | |
# TODO: | |
# the sorting order in each monthly block is downwards (most recent tweet: at the end) | |
# where as the concatenated monthly files are added with the "most recent on top" | |
# Example: NOV 1, 2, ... 30, OCT 1, 2, ... 31 | |
# License: WTFPL | |
IFSsave=$IFS | |
IFS='' # to preserve html page code layout | |
{ header=$(cat) ; } << 'EOF' | |
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> | |
<html> | |
<head> | |
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> | |
<meta name="viewport" content="initial-scale=1.0, user-scalable=no" /> | |
<script type="text/javascript" src="https://code.jquery.com/jquery-3.6.2.min.js"></script> | |
<script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/jquery.lazy/1.7.9/jquery.lazy.min.js"></script> | |
<style> | |
.container { | |
position: relative; | |
padding-left: 33%; | |
width: 33%; | |
font-size: small; | |
font-family: -apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,Helvetica,Arial,sans-serif; | |
font-size: 15px; | |
} | |
blockquote { | |
} | |
img.lazy { | |
max-height: 33vh; /* percentage of viewport height */ | |
display: flex; | |
margin-right: auto; | |
margin-left: auto; | |
} | |
video { | |
display: flex; | |
margin-right: auto; | |
margin-left: auto; | |
max-height: 33vh; | |
} | |
</ | |
</style> | |
</head> | |
<body> | |
<script> | |
$(function($) { | |
$("img.lazy").Lazy(); | |
$("img.lazy").each( function() { | |
$(this).wrap("<a href='"+$(this).attr("data-src")+"' target='_blank'></a>"); | |
}); | |
}); | |
</script> | |
EOF | |
{ footer=$(cat) ; } << 'EOF' | |
</body> | |
</html> | |
EOF | |
for f in *Tweet-Archive*.html ; do | |
sed "1,/<body*/d;/<\/body/,//d;/<main class=\"container\">/d; | |
s/^ *<h1>Your twitter archive<\/h1>/<main class=\"container\">\n<h3>$f<\/h3>\n<hr>\n/g; \ | |
s/<img src=\"media/<img class='lazy' data-src=\"media/g; \ | |
s/\/><br><img/\/><img/g" \ | |
"$f" > "$f.body" | |
done | |
outfile="all.html" | |
echo $header | cat > "$outfile" | |
IFS=$IFSsave | |
cat $(ls -r *Tweet-Archive*.html.body) >> "$outfile" | |
# cleaning up | |
rm *Tweet-Archive*.html.body | |
IFS='' | |
echo $footer |cat >> "$outfile" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment