Skip to content

Instantly share code, notes, and snippets.

@Nodraak
Created January 19, 2016 16:25
Show Gist options
  • Save Nodraak/dcc68522ce9011a31b1c to your computer and use it in GitHub Desktop.
Save Nodraak/dcc68522ce9011a31b1c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function
import codecs
import subprocess
import time
from googleapiclient.discovery import build
API_KEY = ''
CHUNK_SIZE = 100 # how many words to translate at one time (Google API's limitation is 100/sec)
DIR_XLS_IN = 'some_directory_to_read_xls_from'
DIR_XLS_OUT = 'some_directory_to_write_xls_to'
FILENAMES = (
'some_file.xls',
'some_other_file.xls',
# more file if you want
)
def translate(service, words):
ret = service.translations().list(source='fr', target='en', q=words).execute()
return [t['translatedText'] for t in ret['translations']]
def translate_file(service, filename):
print('Translating file %s...' % filename)
# convert xls to csv
subprocess.call(['ssconvert', '%s/%s' % (DIR_XLS_IN, filename), '%s.csv' % filename])
# read the csv file
with codecs.open('%s.csv' % filename, 'r', 'utf-8') as f:
lines = [line.strip('\n') for line in f.readlines()[1:]] # remember to skip the first line (column header)
print('\t%d lines to translate.' % len(lines))
# split lines into chunks (or batches) and make an API request for each chunk
translated = []
chunks = len(lines)/CHUNK_SIZE + int((len(lines) % CHUNK_SIZE) != 0)
for i in range(chunks):
start = i*CHUNK_SIZE
end = (i+1)*CHUNK_SIZE
print('\tTranslating lines %d to %d...' % (start, end))
# extract the word to translate from the csv line
def _parse_line(line):
return line.split(',')[1].replace('"', ' ').strip()
cur_words = [_parse_line(line) for line in lines[start:end]]
ret = translate(service, cur_words)
translated.extend(ret)
time.sleep(1) # api's rate limit
# save word to file
print('\tWriting translated words to output...')
with codecs.open('%s.out.csv' % filename, 'w', 'utf-8') as f:
f.write('Clef,"Message Français","Message Anglais"\n')
for tup in zip(lines, translated):
f.write('%s"%s",\n' % tup)
# convert back to xls and clean
subprocess.call(['ssconvert', '%s.out.csv' % filename, '%s/%s.xls' % (DIR_XLS_OUT, filename)])
subprocess.call(['rm', '%s.csv' % filename, '%s.out.csv' % filename])
def main():
service = build('translate', 'v2', developerKey=API_KEY)
for filename in FILENAMES:
translate_file(service, filename)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment