Last active
August 25, 2020 15:21
-
-
Save mcculley/c726bbb58e96f3e3b712ca2840f28994 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# Show the differences between two Microsoft Word files. | |
echo Detecting differences between $1 and $2. | |
echo MD5 hashes: | |
md5 "$1" "$2" | |
# Unpack the contents of the documents. The Word .docx format is a ZIP file containing XML files. | |
mkdir "$1".contents | |
mkdir "$2".contents | |
unzip -d "$1".contents "$1" | |
unzip -d "$2".contents "$2" | |
# Run an XML formatter on each XML file in the document. | |
find "$1".contents -name '*.xml' -exec xmllint --format --output {}.formatted {} \; -exec mv {}.formatted {} \; | |
find "$2".contents -name '*.xml' -exec xmllint --format --output {}.formatted {} \; -exec mv {}.formatted {} \; | |
# Recursively find the differences. | |
diff --unified --recursive "$1".contents "$2".contents > differences.txt |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment