Created
May 29, 2012 10:03
-
-
Save PiotrNowicki/2823742 to your computer and use it in GitHub Desktop.
Polish letters to UTF-8 translator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
################################################################## | |
## Polish letters to UTF-8 translator | |
## | |
## www.PiotrNowicki.com | |
################################################################## | |
## | |
## The purpose of this script is to translate polish letters (with | |
## diacritical marks) to appropriate unicode characters. | |
## | |
## Some files need to be encoded using standards that doesn't | |
## allow polish letters (like ISO-8859-1 / Java Properties files.) | |
## | |
## Requires: BASH 4 | |
## | |
## Usage: sed_pl.sh FILE | |
## Input: file to be transformed | |
## Output: transformed file printed to stdout | |
## | |
################################################################# | |
# We accept only one argument - input file to be transformed | |
if [ $# -ne 1 ]; then | |
echo 'Usage: sed_pl.sh FILE' | |
exit 1; | |
fi | |
INPUT=$1 | |
if [ ! -f "$INPUT" ]; then | |
echo "File '$INPUT' cannot be found" | |
exit 1; | |
fi | |
# We use BASH 4.0 associate arrays | |
declare -A SWAPS | |
# Define what characters we are translating to what codes | |
SWAPS=( | |
['ą']='\\u0105' | |
['Ą']='\\u0104' | |
['ć']='\\u0107' | |
['Ć']='\\u0106' | |
['ę']='\\u0119' | |
['Ę']='\\u0118' | |
['ł']='\\u0142' | |
['Ł']='\\u0141' | |
['ń']='\\u0144' | |
['Ń']='\\u0143' | |
['ó']='\\u00F3' | |
['Ó']='\\u00D3' | |
['ś']='\\u015B' | |
['Ś']='\\u015A' | |
['ż']='\\u017C' | |
['Ż']='\\u017B' | |
['ź']='\\u017A' | |
['Ź']='\\u0179' | |
) | |
CMD="sed" | |
# We create sed translation script in this loop | |
for KEY in "${!SWAPS[@]}"; do | |
VALUE=${SWAPS["$KEY"]} | |
CMD="$CMD -e s/$KEY/$VALUE/g" | |
done | |
# For debugging purposes | |
#echo "$CMD" | |
# Print result of the transformation to stdout | |
$CMD "$INPUT" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment