Last active
October 18, 2017 20:47
-
-
Save iamlemec/fd0ae0bd09a65179b1336369c3dd21fc to your computer and use it in GitHub Desktop.
Diff between two PDFs. Crude, but useful for revisions. Requires wdiff and pdftotext. See diff.sh for usage.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# Use this instead of diff[1] to get colored[2] word-based diffs. | |
# Useful for text documents that have reflowed paragraphs. | |
# Requires that wdiff is installed in your $PATH. | |
# | |
# [1] All diff options are ignored. Only replaces simplest usage. | |
# [2] Colors are always emitted. If piping into less, use "-R" or set LESS=-R | |
# Iain Murray, February 2009, Tweaked in June 2011 | |
if [ "$#" -lt 2 ] ; then | |
echo 'Usage: cwdiff FILE1 FILE2' | |
echo ' cwdiff [any options will be discarded] FILE1 FILE2' | |
echo | |
echo 'cwdiff emits colored, word-based diffs. Requires wdiff(1).' | |
echo 'If piping into less use -R option or set LESS=-R' | |
echo | |
exit | |
fi | |
# Discard any options, for example SVN attempts to set a bunch of GNU diff | |
# options | |
shift $(($# - 2)) | |
# Color commands adapted from cdiff alias by | |
# Paul Warren <[email protected]> 12/01/2001 | |
# http://ex-parrot.com/pdw/cdiff.html | |
# colour for added lines (bright blue) | |
# (yellow, used in cdiff, alias is less robust to terminal background colour) | |
diffnew=`tput setf 1``tput bold` | |
# colour for removed lines (bright red) | |
diffold=`tput setf 4``tput bold` | |
# reset - original pair, unset all attributes | |
reset=`tput op``tput sgr0` | |
# The -3 option to wdiff gives no context, so I'm getting wdiff to spit out the | |
# whole document and using grep to trim it down. The regex for grep could be | |
# improved, but for text documents is probably ok, and at worst will include | |
# some extra output that can be ignored.. | |
wdiff \ | |
--start-delete "${diffold}[-" \ | |
--end-delete "-]${reset}" \ | |
--start-insert "${diffnew}{+" \ | |
--end-insert "+}${reset}" \ | |
-n "$1" "$2" \ | |
| grep -C2 '\[' | |
# CAREFUL: The line above contains an escape character. | |
# Don't copy the grep command by typing ^[ instead of | |
# This grep line hits false positives more often for me: | |
# | grep -C2 '[{[][-+].*[-+][]}]' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
# Copyright (C) 2009-2015 Christoph Junghans | |
# | |
# This program is free software; you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation; either version 2 of the License, or | |
# (at your option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# | |
# You should have received a copy of the GNU General Public License | |
# along with this program; if not, write to the Free Software | |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
# | |
#version 0.1 04.08.10 -- initial version | |
#version 0.1.1, 05.08.10 -- added -D and -o | |
#version 0.1.2, 04.10.10 -- make -D work again and better help | |
#version 0.1.3, 10.05.11 -- added --text to diff_opts to allow diff of binary files (issue #3) | |
#version 0.1.4, 10.05.11 -- removed --text again and handle case of binary files | |
#version 0.2.0, 15.04.12 -- clean up + bugfix thanks to Arne | |
#version 0.2.1, 15.04.12 -- clean up + new bug report address | |
#version 0.2.2, 20.04.12 -- fixed a bug if argument were dirs | |
#version 0.2.3, 18.12.12 -- replace usage of mktemp with bash built-ins | |
#version 0.2.4, 31.08.13 -- fixed deleted leading spaces | |
#version 0.2.5, 06.09.13 -- do not expand escape sequences in diff | |
#version 0.2.6, 18.09.13 -- allow 1st argument begin a file with --filter | |
#version 0.2.7, 09.03.15 -- included a patch from NetBSD | |
#version 0.2.8, 13.03.15 -- moved issue tracker to github | |
#version 0.3.0, 15.03.15 -- print help2man compliant help and version | |
#version 0.3.1, 30.06.15 -- fix --diffopts option (fixes #4) | |
#version 0.3.2, 12.11.15 -- make output processable by less -r/-R | |
#version 0.4.0, 12.11.15 -- dropped a2ps and out option, clean up | |
FAT_GREEN="[32;01m" | |
GREEN="[32m" | |
FAT_RED="[31;01m" | |
RED="[31m" #also used as indicator | |
MAGENTA="[35m" | |
FAT_BLACK="[1m" | |
OFF="[0m" | |
NL=" | |
" | |
usage="Usage: ${0##*/} [OPTIONS] FILE1 FILE2" | |
diff="no" | |
filter= | |
diff_opts='--new-file --unified --show-c-function --recursive' | |
ab= | |
color_filter() { | |
#use RED as an indicator if colors are on | |
if [[ ${RED} ]]; then | |
sed -e "s/\[-/$RED/g" -e "s/-\]/$OFF/g" -e "s/{+/$GREEN/g" -e "s/+}/$OFF/g" $1 | |
else | |
cat $1 | |
fi | |
} | |
die() { | |
[[ $* ]] && echo "$*" | |
exit 1 | |
} | |
show_help () { | |
cat << eof | |
A colorized version of (w)diff, which can be used a wrapper script, too. | |
$usage | |
Options: | |
-f, --filter Act as a wdiff color filter only and don't execute diff/wdiff | |
internally, just colorize input (no ARGS = read from stdin) | |
-d, --diff Preprocess input with diff and before giving it to wdiff | |
(very useful for dirs). Option can be used in combination | |
with --filter option meaning input is a patch. | |
-D, --diffonly Process input with diff, but NOT with wdiff, so ${0##*/} | |
basically acts like a colorized version of diff. Option | |
can be used in combination with --filter option meaning | |
input is a patch. | |
--diffopts XXX Change options given to diff. The default is '$diff_opts'. | |
--ab Replace common trunc of dirname by 'a' and 'b' | |
--no-color Disable color | |
-- Stop parsing options | |
-h, --help Show this help | |
-v, --version Show version | |
Examples: | |
${0##*/} -d dir1 dir2 Compare two directories | |
${0##*/} file1 file2 Compare two files | |
${0##*/} --ab -D dir1 dir2 VCS like output from compaing two directories | |
wdiff file1 file2 | ${0##*/} -f Colorize the output of wdiff | |
diff -u file1 file2 | ${0##*/} -D -f Colorize the output of diff | |
Report bugs and comments at https://github.com/junghans/cwdiff/issues or [email protected] | |
eof | |
} | |
shopt -s extglob | |
while [[ ${1} = -?* ]]; do | |
if [[ ${1} = --??*=* ]]; then # case --xx=yy | |
set -- "${1%%=*}" "${1#*=}" "${@:2}" # --xx=yy to --xx yy | |
elif [[ ${1} = -[^-]?* ]]; then # case -xy split | |
if [[ ${1} = -[o]* ]]; then #short opts with arguments | |
set -- "${1:0:2}" "${1:2}" "${@:2}" # -xy to -x y | |
else #short opts without arguments | |
set -- "${1:0:2}" "-${1:2}" "${@:2}" # -xy to -x -y | |
fi | |
fi | |
case $1 in | |
--ab) | |
ab="yes" | |
shift;; | |
--no-color) | |
unset FAT_GREEN GREEN FAT_RED RED MAGENTA OFF FAT_BLACK | |
shift;; | |
-f | --filter) | |
filter="yes" | |
shift ;; | |
-d | --diff) | |
diff="yes" | |
shift ;; | |
-D | --diffonly) | |
diff="only" | |
shift ;; | |
--diffopts) | |
diff_opts="$2" | |
shift 2;; | |
-h | --help) | |
show_help | |
exit 0;; | |
--vcs) | |
echo "${0##*/}: $(sed -ne 's/^#version.* -- \(.*$\)/\1/p' $0 | sed -n '$p')" | |
exit 0;; | |
-v | --version) | |
cat << eov | |
${0##*/} $(sed -ne 's/^#version \(.*\) -- .*$/\1/p' "$0" | sed -n '$p') | |
$(sed -ne 's/^# \(Copyright.*\)/\1/p' "$0") | |
This is free software: you are free to change and redistribute it. | |
License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl2.html> | |
There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
Written by C. Junghans <[email protected]> | |
eov | |
exit 0;; | |
--) | |
shift | |
break;; | |
*) | |
die "Unknown option '$1'";; | |
esac | |
done | |
[[ $1 = - ]] && filter="yes" && shift | |
if [[ ! $filter ]]; then | |
[[ ! $1 || ! $2 ]] && die "Please specify two files/dirs or add --filter option" | |
#use -e as it could be file or dir | |
[[ -e $1 ]] || die "Could not read file/dir '$1'" | |
[[ -e $2 ]] || die "Could not read file/dir '$2'" | |
[[ $3 ]] && die "I don't know what to do with arguments '${@:3}'" | |
else | |
[[ -n $1 && ! -f $1 ]] && die "Could not read file '$1'" | |
[[ $2 ]] && die "I don't know what to do with arguments '$*' together --filter option" | |
fi | |
if [[ $diff != no ]]; then | |
if [[ ! $filter ]]; then | |
exec 3< <(diff $diff_opts "$1" "$2") | |
#don't die here, because diff of binary files give exit code = 2 | |
else | |
[[ $1 ]] && exec 3<$1 || exec 3<&0 | |
fi | |
# don't do this if we have not files ;-) | |
if [[ $ab && $1 && $2 ]]; then | |
#find the longest equal part in $1 and $2 from the end | |
for ((i=1;i<=(${#1}<${#2}?${#1}:${#2});i++)); do | |
[[ ${1:0-$i} != ${2:0-$i} ]] && break | |
done | |
((i--)) | |
a="${1:0:${#1}-$i}" | |
b="${2:0:${#2}-$i}" | |
else | |
a=a; b=b | |
fi | |
# -r to not expand escape sequences in diff | |
while read -r -u 3; do | |
#small trick, because "read -u 3 i" would split the line and | |
#leading space would be lost. | |
i="${REPLY}" | |
if [[ $i = "Files "*" and "*" differ" ]]; then # binary case | |
i="${i/$a/a}" | |
i="${i/$b/b}" | |
echo "$i" | |
elif [[ $i = diff* ]]; then # diff header line | |
i="${i/ $diff_opts}" | |
i="${i/$a/a}" | |
echo "$FAT_BLACK${i/$b/b}$OFF" | |
elif [[ $i = ---* ]]; then | |
echo "${FAT_RED}${i/$a/a}${OFF}" | |
elif [[ $i = +++* ]]; then | |
echo "${FAT_GREEN}${i/$b/b}${OFF}" | |
elif [[ $i = @@* ]]; then | |
echo "${MAGENTA}${i}${OFF}" | |
elif [[ $i = -* ]]; then | |
[[ $diff = only ]] && echo "${RED}${i}${OFF}" || t1+="${i#-}$NL" | |
elif [[ $i = +* ]]; then | |
[[ $diff = only ]] && echo "${GREEN}${i}${OFF}" || t2+="${i#+}$NL" | |
else | |
# only true for diff != only | |
# cut the last newline do avoid an empty line at the end (echo append newline) | |
# echo -n would also work, but wdiff has strange behaviour if the 2nd file is | |
# empty, it will not append newline, which make the output look strange | |
[[ $t1 || $t2 ]] && { wdiff ${RED:+-n} <(echo "${t1%$NL}") <(echo "${t2%$NL}") | color_filter; } | |
t1= | |
t2= | |
[[ $diff = only ]] && echo "${i}" || echo "${i## }" | |
fi | |
done | |
# thanks to Arne Babenhauserheide for pointing out this case is missing | |
# if there was + or - lines at the end, which has not been printed yet | |
[[ $t1 || $t2 ]] && { wdiff ${RED:+-n} <(echo "${t1%$NL}") <(echo "${t2%$NL}") | color_filter; } | |
elif [[ $filter ]]; then | |
color_filter $1 | |
else | |
wdiff ${RED:+-n} "$1" "$2" | color_filter | |
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# sample usage | |
# cwdiff gives {+added+} and {-removed-} in highlighted text | |
# cwdiff2 gives add words in green and removed words in red | |
pdftotext old.pdf old.txt | |
pdftotext new.pdf new.txt | |
cwdiff old.txt new.txt > diff.txt | |
less -N diff.txt |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment