Created
February 20, 2017 15:41
-
-
Save jn0/e17b4aa728532976e9dd9cc4aacf8acf to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# a tool to extract data from those MS Winsoze "web-archives" arrived as a ".DOC" file... | |
# run as ```splitmime.sh < /tmp/where-it-is.doc``` and check for "./C_/" directory. | |
div='' | |
typeset -i nl=0 | |
typeset -i ol=0 | |
file_header='yes' | |
header='no' | |
file='' | |
cte='' | |
qpd() { | |
perl -MMIME::QuotedPrint -pe '$_=MIME::QuotedPrint::decode($_);' | |
} | |
decode() { | |
local cte="$1" | |
local name="$2" | |
# echo "#####: ($cte) [$name]">/dev/tty | |
case "$cte" in | |
base64) base64 -d < "${name}.xxx" > "${name}" && rm "${name}.xxx";; | |
quoted-printable) qpd < "${name}.xxx" > "${name}" && rm "${name}.xxx";; | |
*) echo "Unsupported '$cte' for '$name'.">/dev/tty; return 1;; | |
esac | |
} | |
while read line; do | |
let nl+=1 | |
line=$(echo /$line | tr -d '\r' | cut -d/ -f2-) | |
# echo "$nl [$line]" > /dev/tty | |
if (( nl == 1 )); then | |
[[ "/$line" =~ '/MIME-Version:' ]] || { echo "Not a MIME file."; exit 1; } | |
echo "$nl: $line" > /dev/tty | |
continue | |
fi | |
if [ -z "$div" ]; then | |
if [[ "/$line" =~ '/Content-Type:' ]]; then | |
# Content-Type: multipart/related; boundary="----=_NextPart_01D28B89" | |
line=$(echo "$line" | tr -s '[ \t]' '_') | |
[[ "$line" =~ 'Content-Type:_multipart/related;_boundary=' ]] \ | |
|| { echo "No boundary."; exit 1; } | |
div="--$(echo "$line" | cut -d\" -f2)" | |
echo "$nl: Divider=[$div]">/dev/tty | |
fi | |
continue | |
fi | |
if [ "/$line" = "/${div}--" ]; then | |
echo "$nl: Last divider ($file:$ol)." >/dev/tty | |
file_header='no' | |
if [ -n "$cte" ] && [ -n "$file" ]; then | |
decode "$cte" "$file"; | |
fi | |
file=''; ol=0 | |
break | |
elif [ "/$line" = "/${div}" ]; then | |
echo "$nl: Divider ($file:$ol)." >/dev/tty | |
file_header='no' | |
if [ -n "$file" -a -f "${file}.xxx" ]; then | |
decode "$cte" "$file" | |
fi | |
header='yes' | |
file=''; ol=0 | |
continue | |
fi | |
[ "$file_header" = 'yes' ] && continue | |
if [ "$header" = 'yes' ]; then | |
# echo "$nl: [$line]">/dev/tty | |
if [ -z "$line" ]; then | |
# echo "$nl: header ends.">/dev/tty | |
header='no' | |
elif [[ "/$line" =~ '/Content-Location:' ]]; then | |
file=$(echo "/$line" | tr -d '[ \t]' | cut -d: -f2-) | |
if [[ "$file" =~ 'file:///' ]]; then | |
file="./$(echo "${file:8}" | tr ':' '_')" | |
dirn=$(dirname "$file") | |
flnm=$(basename "$file") | |
mkdir -p "$dirn" || { echo "Cannot mkdir '$dirn'."; exit 1; } | |
echo "$nl: Will save '$flnm' to '$dirn/'">/dev/tty | |
touch "${file}.xxx" || { echo "Cannot touch '${file}.xxx'."; exit 1; } | |
else | |
echo "$nl: Cannot save to '$file'">/dev/tty | |
file=''; ol=0; | |
break | |
fi | |
elif [[ "/$line" =~ 'Content-Transfer-Encoding:' ]]; then | |
cte=$(echo "/$line" | tr -d '[ \t]' | cut -d: -f2) | |
echo "$nl: Content-Transfer-Encoding: [$cte]">/dev/tty | |
fi | |
continue | |
fi | |
if [ -n "$file" ]; then | |
echo "$line" >> "${file}.xxx" | |
let ol+=1 | |
fi | |
done | |
# EOF # |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment