Created
January 19, 2016 16:25
-
-
Save Nodraak/dcc68522ce9011a31b1c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
from __future__ import unicode_literals, print_function | |
import codecs | |
import subprocess | |
import time | |
from googleapiclient.discovery import build | |
API_KEY = '' | |
CHUNK_SIZE = 100 # how many words to translate at one time (Google API's limitation is 100/sec) | |
DIR_XLS_IN = 'some_directory_to_read_xls_from' | |
DIR_XLS_OUT = 'some_directory_to_write_xls_to' | |
FILENAMES = ( | |
'some_file.xls', | |
'some_other_file.xls', | |
# more file if you want | |
) | |
def translate(service, words): | |
ret = service.translations().list(source='fr', target='en', q=words).execute() | |
return [t['translatedText'] for t in ret['translations']] | |
def translate_file(service, filename): | |
print('Translating file %s...' % filename) | |
# convert xls to csv | |
subprocess.call(['ssconvert', '%s/%s' % (DIR_XLS_IN, filename), '%s.csv' % filename]) | |
# read the csv file | |
with codecs.open('%s.csv' % filename, 'r', 'utf-8') as f: | |
lines = [line.strip('\n') for line in f.readlines()[1:]] # remember to skip the first line (column header) | |
print('\t%d lines to translate.' % len(lines)) | |
# split lines into chunks (or batches) and make an API request for each chunk | |
translated = [] | |
chunks = len(lines)/CHUNK_SIZE + int((len(lines) % CHUNK_SIZE) != 0) | |
for i in range(chunks): | |
start = i*CHUNK_SIZE | |
end = (i+1)*CHUNK_SIZE | |
print('\tTranslating lines %d to %d...' % (start, end)) | |
# extract the word to translate from the csv line | |
def _parse_line(line): | |
return line.split(',')[1].replace('"', ' ').strip() | |
cur_words = [_parse_line(line) for line in lines[start:end]] | |
ret = translate(service, cur_words) | |
translated.extend(ret) | |
time.sleep(1) # api's rate limit | |
# save word to file | |
print('\tWriting translated words to output...') | |
with codecs.open('%s.out.csv' % filename, 'w', 'utf-8') as f: | |
f.write('Clef,"Message Français","Message Anglais"\n') | |
for tup in zip(lines, translated): | |
f.write('%s"%s",\n' % tup) | |
# convert back to xls and clean | |
subprocess.call(['ssconvert', '%s.out.csv' % filename, '%s/%s.xls' % (DIR_XLS_OUT, filename)]) | |
subprocess.call(['rm', '%s.csv' % filename, '%s.out.csv' % filename]) | |
def main(): | |
service = build('translate', 'v2', developerKey=API_KEY) | |
for filename in FILENAMES: | |
translate_file(service, filename) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment