mayhewsw · August 10, 2016 22:04
diff --git a/translate.py b/translate.py
 #!/usr/bin/python
 from googleapiclient.discovery import build
 import codecs
 import HTMLParser
 import shelve

 # As of Aug 1 2016
 API_KEY = "YOUR_API_KEY_HERE"


 def translatefile(fname, outfname, source, target):
    """
    Given a filename, an outfname, and a source and target languages, this will translate
    the first word of each tab-sep row in fname from source to target and write to outfname. Language codes are Google
    two letter codes (en, uz, tr, de, etc.)
    """

    outlines = []
    service = build('translate', 'v2',developerKey=API_KEY)

    h = HTMLParser.HTMLParser()
    
    memo = shelve.open("translatedict-" + source + "-" + target + ".shelf")

    with codecs.open(fname, "r", "utf-8") as f:
        lines = f.readlines()        

    words = []

    for line in lines:            
        sline = line.split("\t")
        srcword = str(sline[0]).strip()
        if srcword not in memo:
            words.append(srcword)

    for i in range(0, len(words), 75):
        iwords = words[i:i+75]
        print "size of request:",len(iwords)
        try:
            response = service.translations().list(source=source,target=target, q=iwords).execute()
            if len(response["translations"]) > 0:
                translations = response["translations"]
                for w,t in zip(iwords,translations):
                    tword = t["translatedText"]
                    memo[str(w)] = tword
            else:
                print "WHAAAAT"

        except Exception as e:
            print "Whoops... exception"
            print e
            

    for line in lines:
        sline = line.split("\t")
        srcword = str(sline[0]).strip()

        # otherwise, just leave it
        if srcword in memo:
            w = h.unescape(memo[srcword])
            trans = w.split()[0]
        else:
            trans = "not in memo" + srcword

                
        outlines.append(srcword + "\t" + trans)        
        outlines.append("\n")

    with codecs.open(outfname, "w", "utf-8") as out:
       for line in outlines:
           out.write(line);

    memo.close()


 if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(description="")

    parser.add_argument("fname",help="Input file name (first word of each row is translated)")
    parser.add_argument("outfname",help="Output file. Format: origword  transword")
    parser.add_argument("source",help="Source language code (2 letter)")
    parser.add_argument("target",help="Target language code (2 letter)")

    args = parser.parse_args()
    
    translatefile(args.fname, args.outfname, args.source, args.target)
	#!/usr/bin/python
	from googleapiclient.discovery import build
	import codecs
	import HTMLParser
	import shelve

	# As of Aug 1 2016
	API_KEY = "YOUR_API_KEY_HERE"


	def translatefile(fname, outfname, source, target):
	"""
	Given a filename, an outfname, and a source and target languages, this will translate
	the first word of each tab-sep row in fname from source to target and write to outfname. Language codes are Google
	two letter codes (en, uz, tr, de, etc.)
	"""

	outlines = []
	service = build('translate', 'v2',developerKey=API_KEY)

	h = HTMLParser.HTMLParser()

	memo = shelve.open("translatedict-" + source + "-" + target + ".shelf")

	with codecs.open(fname, "r", "utf-8") as f:
	lines = f.readlines()

	words = []

	for line in lines:
	sline = line.split("\t")
	srcword = str(sline[0]).strip()
	if srcword not in memo:
	words.append(srcword)

	for i in range(0, len(words), 75):
	iwords = words[i:i+75]
	print "size of request:",len(iwords)
	try:
	response = service.translations().list(source=source,target=target, q=iwords).execute()
	if len(response["translations"]) > 0:
	translations = response["translations"]
	for w,t in zip(iwords,translations):
	tword = t["translatedText"]
	memo[str(w)] = tword
	else:
	print "WHAAAAT"

	except Exception as e:
	print "Whoops... exception"
	print e


	for line in lines:
	sline = line.split("\t")
	srcword = str(sline[0]).strip()

	# otherwise, just leave it
	if srcword in memo:
	w = h.unescape(memo[srcword])
	trans = w.split()[0]
	else:
	trans = "not in memo" + srcword


	outlines.append(srcword + "\t" + trans)
	outlines.append("\n")

	with codecs.open(outfname, "w", "utf-8") as out:
	for line in outlines:
	out.write(line);

	memo.close()


	if __name__ == "__main__":
	import argparse
	parser = argparse.ArgumentParser(description="")

	parser.add_argument("fname",help="Input file name (first word of each row is translated)")
	parser.add_argument("outfname",help="Output file. Format: origword transword")
	parser.add_argument("source",help="Source language code (2 letter)")
	parser.add_argument("target",help="Target language code (2 letter)")

	args = parser.parse_args()

	translatefile(args.fname, args.outfname, args.source, args.target)
No results found