Last active
July 16, 2024 10:36
-
-
Save ctrlcctrlv/fcf67309c519139d460f97d74bcd6850 to your computer and use it in GitHub Desktop.
JSTOR remove PDF download information (your IP and the date)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# RIP Aaron Swartz, killed by corporate greed. (Not JSTOR itself, they are a charity; | |
# I mean the publishers they must contract with | |
# to offer the limited public service that they do.) | |
# | |
# “There is no justice in following unjust laws. | |
# It’s time to come into the light and, in the grand tradition of civil disobedience, | |
# declare our opposition to this private theft of public culture.” ~ Aaron Swartz | |
# | |
# Requires: | |
# * pdftk (Java edition) | |
# * mat2 (Metadata anonymisation toolkit 2 [by Tails project]) | |
# * qpdf (https://qpdf.readthedocs.io [by Jay Berkenbilt]) | |
# | |
# Usage: | |
# source jstor_unmetadata.bash # perhaps add me to $HOME/.bashrc | |
# jstor_unmetadata "$INPUT" "$OUTPUT" | |
mktemp_eval () | |
{ | |
T="${T:-"`mktemp`.pdf"}" | |
cat /dev/stdin > "$T" | |
eval "$@" && cat "$T" && rm "$T" | |
return 0 | |
} | |
__jstor_unmetadata_inner () | |
{ | |
PAT1='This content downloaded from' | |
PAT2='on .*[\000]' | |
mapfile -d -t JSTOR_AWK /dev/stdin << ' EOF' | |
{s=$0;} | |
function DOIT(s){ | |
var=""; | |
for (i=0; i<length(s); i++) { var=var"\0"; } | |
gsub(".*", var, s); | |
return s; | |
} | |
$0~PAT1{s=DOIT($0)} | |
$0~PAT2{s=DOIT($0)} | |
{print s} | |
EOF | |
gawk -v PAT1="$PAT1" -v PAT2="$PAT2" "$JSTOR_AWK" < /dev/stdin | |
return 0 | |
} | |
jstor_unmetadata() { | |
( ( mktemp_eval 'pdftk "$T" output - uncompress' | __jstor_unmetadata_inner ) | mktemp_eval 'mat2 -L --inplace "$T"' | mktemp_eval 'qpdf "$T" --linearize -' ) < "$1" > "$2" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
NB: GitHub is not as good at Bash as I am.
' EOF'
is perfectly valid, their syntax highlighter is wrong, this script does run.