Created
August 10, 2016 22:04
-
-
Save mayhewsw/9d14bd5218bce1b57f050ccc79ce08e5 to your computer and use it in GitHub Desktop.
Google API Word Translation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from googleapiclient.discovery import build | |
import codecs | |
import HTMLParser | |
import shelve | |
# As of Aug 1 2016 | |
API_KEY = "YOUR_API_KEY_HERE" | |
def translatefile(fname, outfname, source, target): | |
""" | |
Given a filename, an outfname, and a source and target languages, this will translate | |
the first word of each tab-sep row in fname from source to target and write to outfname. Language codes are Google | |
two letter codes (en, uz, tr, de, etc.) | |
""" | |
outlines = [] | |
service = build('translate', 'v2',developerKey=API_KEY) | |
h = HTMLParser.HTMLParser() | |
memo = shelve.open("translatedict-" + source + "-" + target + ".shelf") | |
with codecs.open(fname, "r", "utf-8") as f: | |
lines = f.readlines() | |
words = [] | |
for line in lines: | |
sline = line.split("\t") | |
srcword = str(sline[0]).strip() | |
if srcword not in memo: | |
words.append(srcword) | |
for i in range(0, len(words), 75): | |
iwords = words[i:i+75] | |
print "size of request:",len(iwords) | |
try: | |
response = service.translations().list(source=source,target=target, q=iwords).execute() | |
if len(response["translations"]) > 0: | |
translations = response["translations"] | |
for w,t in zip(iwords,translations): | |
tword = t["translatedText"] | |
memo[str(w)] = tword | |
else: | |
print "WHAAAAT" | |
except Exception as e: | |
print "Whoops... exception" | |
print e | |
for line in lines: | |
sline = line.split("\t") | |
srcword = str(sline[0]).strip() | |
# otherwise, just leave it | |
if srcword in memo: | |
w = h.unescape(memo[srcword]) | |
trans = w.split()[0] | |
else: | |
trans = "not in memo" + srcword | |
outlines.append(srcword + "\t" + trans) | |
outlines.append("\n") | |
with codecs.open(outfname, "w", "utf-8") as out: | |
for line in outlines: | |
out.write(line); | |
memo.close() | |
if __name__ == "__main__": | |
import argparse | |
parser = argparse.ArgumentParser(description="") | |
parser.add_argument("fname",help="Input file name (first word of each row is translated)") | |
parser.add_argument("outfname",help="Output file. Format: origword transword") | |
parser.add_argument("source",help="Source language code (2 letter)") | |
parser.add_argument("target",help="Target language code (2 letter)") | |
args = parser.parse_args() | |
translatefile(args.fname, args.outfname, args.source, args.target) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment