Created
August 28, 2013 12:07
-
-
Save mishin/6365277 to your computer and use it in GitHub Desktop.
run perl from java in OmegaT
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/************************************************************************** | |
OmegaT - Computer Assisted Translation (CAT) tool | |
with fuzzy matching, translation memory, keyword search, | |
glossaries, and translation leveraging into updated projects. | |
Copyright (C) 2010 Alex Buloichik, Didier Briel | |
2011 Briac Pilpre, Alex Buloichik | |
Home page: http://www.omegat.org/ | |
Support center: http://groups.yahoo.com/group/OmegaT/ | |
This program is free software; you can redistribute it and/or modify | |
it under the terms of the GNU General Public License as published by | |
the Free Software Foundation; either version 2 of the License, or | |
(at your option) any later version. | |
This program is distributed in the hope that it will be useful, | |
but WITHOUT ANY WARRANTY; without even the implied warranty of | |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
GNU General Public License for more details. | |
You should have received a copy of the GNU General Public License | |
along with this program; if not, write to the Free Software | |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA | |
**************************************************************************/ | |
package org.omegat.core.machinetranslators; | |
import java.io.IOException; | |
import java.util.Map; | |
import java.util.TreeMap; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
import org.omegat.util.Language; | |
import org.omegat.util.OStrings; | |
import org.omegat.util.PatternConsts; | |
import org.omegat.util.Preferences; | |
import org.omegat.util.WikiGet; | |
import java.io.BufferedInputStream; | |
import java.io.BufferedReader; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.io.InputStreamReader; | |
/** | |
* Support of Google Translate API v.2 machine translation. | |
* https://code.google.com/apis/language/translate/v2/getting_started.html | |
* | |
* @author Alex Buloichik ([email protected]) | |
* @author Didier Briel | |
* @author Briac Pilpre | |
* @author Nikolay Mishin ([email protected]) | |
*/ | |
public class Google2Translate extends BaseTranslate { | |
protected static final String GT_URL = "https://www.googleapis.com/language/translate/v2"; | |
protected static final Pattern RE_UNICODE = Pattern.compile("\\\\u([0-9A-Fa-f]{4})"); | |
protected static final Pattern RE_HTML = Pattern.compile("&#([0-9]+);"); | |
@Override | |
protected String getPreferenceName() { | |
return Preferences.ALLOW_GOOGLE2_TRANSLATE; | |
} | |
public String getName() { | |
return OStrings.getString("MT_ENGINE_GOOGLE2"); | |
} | |
@Override | |
protected String translate(Language sLang, Language tLang, String text) throws Exception { | |
String trText = text.length() > 5000 ? text.substring(0, 4997) + "..." : text; | |
String targetLang = tLang.getLanguageCode(); | |
// Differentiate in target between simplified and traditional Chinese | |
if ((tLang.getLanguage().compareToIgnoreCase("zh-cn") == 0) | |
|| (tLang.getLanguage().compareToIgnoreCase("zh-tw") == 0)) | |
targetLang = tLang.getLanguage(); | |
else if ((tLang.getLanguage().compareToIgnoreCase("zh-hk") == 0)) | |
targetLang = "ZH-TW"; // Google doesn't recognize ZH-HK | |
String googleKey = System.getProperty("google.api.key"); | |
if (googleKey == null) { | |
return perl_translate(sLang,tLang,text); | |
//OStrings.getString("GOOGLE_API_KEY_NOTFOUND"); | |
} | |
Map<String, String> params = new TreeMap<String, String>(); | |
params.put("key", googleKey); | |
params.put("source", sLang.getLanguageCode()); | |
params.put("target", targetLang); | |
params.put("q", trText); | |
Map<String, String> headers = new TreeMap<String, String>(); | |
headers.put("X-HTTP-Method-Override", "GET"); | |
String v; | |
try { | |
v = WikiGet.post(GT_URL, params, headers); | |
} catch (IOException e) { | |
return e.getLocalizedMessage(); | |
} | |
while (true) { | |
Matcher m = RE_UNICODE.matcher(v); | |
if (!m.find()) { | |
break; | |
} | |
String g = m.group(); | |
char c = (char) Integer.parseInt(m.group(1), 16); | |
v = v.replace(g, Character.toString(c)); | |
} | |
v = v.replace(""", """); | |
v = v.replace(" ", " "); | |
v = v.replace("&", "&"); | |
while (true) { | |
Matcher m = RE_HTML.matcher(v); | |
if (!m.find()) { | |
break; | |
} | |
String g = m.group(); | |
char c = (char) Integer.parseInt(m.group(1)); | |
v = v.replace(g, Character.toString(c)); | |
} | |
Pattern pattern = java.util.regex.Pattern.compile("\\{\\s*\"translatedText\"\\s*:\\s*\"(.*?)\"\\s*\\s*\\}\\s*]"); | |
Matcher matcher = pattern.matcher(v); | |
boolean matchFound = matcher.find(); | |
String tr = ""; | |
if (matchFound) { | |
tr = matcher.group(1); | |
} | |
// Attempt to clean spaces added by GT | |
// Spaces after | |
Matcher tag = PatternConsts.OMEGAT_TAG_SPACE.matcher(tr); | |
while (tag.find()) { | |
String searchTag = tag.group(); | |
if (text.indexOf(searchTag) == -1) { // The tag didn't appear with a | |
// trailing space in the source text | |
String replacement = searchTag.substring(0, searchTag.length() - 1); | |
tr = tr.replace(searchTag, replacement); | |
} | |
} | |
// Spaces before | |
tag = PatternConsts.SPACE_OMEGAT_TAG.matcher(tr); | |
while (tag.find()) { | |
String searchTag = tag.group(); | |
if (text.indexOf(searchTag) == -1) { // The tag didn't appear with a | |
// leading space in the source text | |
String replacement = searchTag.substring(1, searchTag.length()); | |
tr = tr.replace(searchTag, replacement); | |
} | |
} | |
return tr; | |
} | |
public static String perl_translate (Language sLang, Language tLang, String text) { | |
String trText = text.length() > 5000 ? text.substring(0, 4997) + "..." : text; | |
String perl_command=OStrings.getString("GOOGLE_PATH_TO_PERL")+" "+OStrings.getString("GOOGLE_PATH_TO_SCRIPT")+ | |
" --to " + tLang.getLanguageCode()+ " --from " + sLang.getLanguageCode() + " \"" + trText+ "\""; | |
String Out=""; | |
Runtime r = Runtime.getRuntime(); | |
try { | |
/* | |
* Here we are executing the UNIX command ls for directory listing. | |
* The format returned is the long format which includes file | |
* information and permissions. | |
*/ | |
// Process p = r.exec("ls -l"); | |
//Process p = r.exec("c:\\Share\\Dwimperl\\perl\\bin\\perl.exe c:\\TCPU59\\scripts\\google_trnslate.pl --to russian --from english \"Old awk always has a line loop, even if there are no line actions, whereas new awk does not.\""); | |
Process p = r.exec(perl_command); | |
InputStream in = p.getInputStream(); | |
BufferedInputStream buf = new BufferedInputStream(in); | |
InputStreamReader inread = new InputStreamReader(buf); | |
BufferedReader bufferedreader = new BufferedReader(inread); | |
// Read the ls output | |
String line; | |
while ((line = bufferedreader.readLine()) != null) { | |
//System.out.println(line); | |
Out+=line; | |
//return line; | |
} | |
// Check for ls failure | |
try { | |
if (p.waitFor() != 0) { | |
System.err.println("exit value = " + p.exitValue()); | |
} | |
} catch (InterruptedException e) { | |
System.err.println(e); | |
} finally { | |
// Close the InputStream | |
bufferedreader.close(); | |
inread.close(); | |
buf.close(); | |
in.close(); | |
} | |
} catch (IOException e) { | |
System.err.println(e.getMessage()); | |
} | |
return Out; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment