Created
February 3, 2009 18:56
-
-
Save sunny/57670 to your computer and use it in GitHub Desktop.
ease the pain of double-encoded messy text files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Demunger : ease the pain of double-encoded messy text files | |
# http://www.schwarzvogel.de/software-misc.shtml | |
# | |
# Creates a .new version of each file given on the command line: | |
# $ ./demunger file [file [file [...]]] | |
for i in "$@" ; do | |
sed \ | |
-e 's/é/é/g' -e 's/è/è/g' -e 's/ê/ê/g' -e 's/ë/ë/g' \ | |
-e 's/ / /g' -e 's/«/«/g' -e 's/»/»/g' -e 's/°/°/g' \ | |
-e 's/ä/ä/g' -e 's/â/â/g' -e 's/î/î/g' -e 's/Â/’/g' \ | |
-e 's/ï/ï/g' -e 's/ì/ì/g' -e 's/ò/ò/g' -e 's/ô/ô/g' \ | |
-e 's/ö/ö/g' -e 's/ÿ/ÿ/g' -e 's/ù/ù/g' -e 's/ü/ü/g' \ | |
-e 's/û/û/g' -e 's/ç/ç/g' -e 's/É/É/g' -e 's/È/È/g' \ | |
-e 's/Ê/Ê/g' -e 's/Ë/Ë/g' -e 's/À/À/g' -e 's/Ä/Ä/g' \ | |
-e 's/Â/Â/g' -e 's/ÃŽ/Î/g' -e 's/Ã/Ï/g' -e 's/ÃŒ/Ì/g' \ | |
-e 's/Ã’/Ò/g' -e 's/Ô/Ô/g' -e 's/Ö/Ö/g' -e 's/Ÿ/Ÿ/g' \ | |
-e 's/…/…/g' -e 's/’/’/g' -e 's/àƒâ‚¬/ä/g' -e 's/‘//g' \ | |
-e 's/“/“/g' -e 's/â€/”/g' -e 's/‹/‹/g' -e 's/›/›/g' \ | |
-e 's/—/—/g' -e 's/–/—/g' -e 's/â€?/”/g' -e 's/àƒ’/à/g' \ | |
-e 's/â’€’™/’/g' -e 's/â’€’œ/“/g' -e 's/â’€?/”/g' -e 's/àƒ’©/é/g' \ | |
-e 's/â’€’¦//g' -e 's/–/\•/g' \ | |
-e 's/Ù/Ù/g' -e 's/Ü/Ü/g' -e 's/Û/Û/g' -e 's/Ç/Ç/g' \ | |
$i | sed -e 's/Ã/à/g' > $i.new | |
echo $i.new | |
done | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment