Skip to content

Instantly share code, notes, and snippets.

@amake
Last active November 10, 2015 05:39
Show Gist options
  • Save amake/5d13af41cd2b6d0cdc85 to your computer and use it in GitHub Desktop.
Save amake/5d13af41cd2b6d0cdc85 to your computer and use it in GitHub Desktop.
Okapi Rainbow pipeline for converting Android strings to TMX
<?xml version="1.0" encoding="UTF-8"?>
<!--
Pipeline for converting pairs of raw Android SDK values*/strings.xml
files into TMX (for translation reference, etc.).
Load this pipeline into Okapi Rainbow and set the input files, e.g.:
Input List 1: $ANDROID_HOME/.../values/strings.xml
Input List 2: $ANDROID_HOME/.../values-ja/strings.xml
Use the [email protected] filter config included
in this gist as the configuration for both files. Be sure to set
languages and encodings appropriately, e.g.:
Source: en-us, UTF-8
Target: ja-jp, UTF-8
Set the TMX output location in the Format Conversion step.
Note that surrounding quotes are stripped.
-->
<rainbowPipeline version="1"><step class="net.sf.okapi.steps.common.RawDocumentToFilterEventsStep"></step>
<step class="net.sf.okapi.steps.idbasedcopy.IdBasedCopyStep">#v1
markAsTranslateNo.b=false
markAsApproved.b=false</step>
<step class="net.sf.okapi.steps.searchandreplace.SearchAndReplaceStep">#v1
regEx.b=true
dotAll.b=false
ignoreCase.b=false
multiLine.b=false
target.b=true
source.b=true
replaceALL.b=true
replacementsPath=
logPath=${rootDir}/replacementsLog.txt
saveLog.b=false
count.i=1
use0=true
search0=^"|^&amp;quot;|"$|&amp;quot;$
replace0=</step>
<step class="net.sf.okapi.steps.formatconversion.FormatConversionStep">#v1
singleOutput.b=true
autoExtensions.b=false
targetStyle.i=0
outputPath=Android.tmx
outputFormat=tmx
useGenericCodes.b=false
skipEntriesWithoutText.b=true
approvedEntriesOnly.b=false
overwriteSameSource.b=false</step>
</rainbowPipeline>
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!--
An improved Okapi Rainbow filter configuration for Android strings.xml files.
This config will extract all translatable text AND produce unique unit IDs
for all strings, plural items, and string-array items.
-->
<its:rules xmlns:its="http://www.w3.org/2005/11/its"
xmlns:itsx="http://www.w3.org/2008/12/its-extensions"
xmlns:a="http://schemas.android.com/apk/res/android"
xmlns:okp="okapi-framework:xmlfilter-options"
version="1.0">
<its:translateRule selector="/*" translate="no"/>
<its:translateRule selector="//*/@a:text[not(starts-with(.,'@'))]" translate="yes" itsx:whiteSpaces="preserve" />
<its:translateRule selector="//string[not(@product)]" translate="yes" itsx:idValue="./@name" itsx:whiteSpaces="preserve" />
<its:translateRule selector="//string[@product]" translate="yes" itsx:idValue="concat(@name, '_', @product)" itsx:whiteSpaces="preserve" />
<its:translateRule selector="//item[@quantity]" translate="yes" itsx:idValue="concat(../@name, '_', @quantity)" itsx:whiteSpaces="preserve" />
<its:translateRule selector="//item[not(@quantity)]" translate="yes" itsx:idValue="concat(../@name, '_', count(preceding-sibling::item))" itsx:whiteSpaces="preserve" />
<its:withinTextRule selector="//string/descendant::*|//item/descendant::*" withinText="yes"/>
<okp:codeFinder useCodeFinder="yes">#v1
count.i=3
rule0=%\d?\$?,?\.?\d?[sdf]
rule1=\{\d?\}
rule2=\[\[\d\$(?:\]\])?|\]\]
</okp:codeFinder>
</its:rules>
#!/bin/sh
# Sample script to generate Android strings TMX files
# Preparing the environment ($ANDROID_HOME, $OKAPI_HOME, etc.)
# is an exercise left to the reader.
if [ "$#" != 3 ]; then
echo "Usage: android-gen.sh API_LEVEL SRC_LANG TRG_LANG"
exit 1
fi
API_LEVEL=$1
SRC_LANG=$2
TRG_LANG=$3
echo "Extracting ${SRC_LANG}-to-${TRG_LANG} TMX of Android API ${API_LEVEL} resources"
RAINBOW="java -XstartOnFirstThread -jar ${OKAPI_HOME}/lib/rainbow.jar"
FILTERS_DIR="${OKAPI_FILTERS_HOME}"
FILTER_CONFIG="okf_xml@AndroidStringsImproved"
RES_DIR="${ANDROID_HOME}/platforms/android-${API_LEVEL}/data/res"
OUT_FILE="Android_${API_LEVEL}_${SRC_LANG}_${TRG_LANG}.tmx"
function android_langify() {
if [ "$1" != "en-us" ]; then
echo $1 | python -c 'import sys; a=sys.stdin.read().split("-"); sys.stdout.write(a[0] if len(a) == 1 else "-r".join(["-"+a[0],a[1].upper()]))'
fi
}
function get_strings_file() {
TEST="${RES_DIR}/values$(android_langify $1)/strings.xml"
if [ ! -f "$TEST" ]; then
TEST="${RES_DIR}/values-$(echo $1 | sed -e 's/-.*//')/strings.xml"
fi
echo "$TEST"
}
FILE1=$(get_strings_file $SRC_LANG)
FILE2=$(get_strings_file $TRG_LANG)
echo "File 1: ${FILE1}"
echo "File 2: ${FILE2}"
echo "Out: ${OUT_FILE}"
PIPELINE=$(pwd)/android-gen-pipeline.pln
cat <<EOF > "$PIPELINE"
<?xml version="1.0" encoding="UTF-8"?>
<rainbowPipeline version="1"><step class="net.sf.okapi.steps.common.RawDocumentToFilterEventsStep"></step>
<step class="net.sf.okapi.steps.idbasedcopy.IdBasedCopyStep">#v1
markAsTranslateNo.b=false
markAsApproved.b=false</step>
<step class="net.sf.okapi.steps.searchandreplace.SearchAndReplaceStep">#v1
regEx.b=true
dotAll.b=false
ignoreCase.b=false
multiLine.b=false
target.b=true
source.b=true
replaceALL.b=true
replacementsPath=
logPath=
saveLog.b=false
count.i=1
use0=true
search0=^"|^&amp;quot;|"$|&amp;quot;$
replace0=</step>
<step class="net.sf.okapi.steps.formatconversion.FormatConversionStep">#v1
singleOutput.b=true
autoExtensions.b=false
targetStyle.i=0
outputPath=$(pwd)/${OUT_FILE}
outputFormat=tmx
useGenericCodes.b=false
skipEntriesWithoutText.b=true
approvedEntriesOnly.b=false
overwriteSameSource.b=false</step>
</rainbowPipeline>
EOF
$RAINBOW -sl $SRC_LANG -tl $TRG_LANG \
-se utf-8 -te utf-8 \
-pln "$PIPELINE" \
-pd "$FILTERS_DIR" \
-np \
"$FILE1" -fc "$FILTER_CONFIG" \
"$FILE2" -fc "$FILTER_CONFIG"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment