Skip to content

Instantly share code, notes, and snippets.

@YieldNull
Created November 17, 2015 13:47
Show Gist options
  • Save YieldNull/f49b8037049e54df4b26 to your computer and use it in GitHub Desktop.
Save YieldNull/f49b8037049e54df4b26 to your computer and use it in GitHub Desktop.
Translate English and Chinese mutually using https://translate.google.com.hk
#!/usr/bin/env python
# coding:utf-8
"""
Translate English and Chinese mutually using https://translate.google.com.hk
create on '11/5/15 5:07 PM'
"""
import gzip
import httplib
import sys
import urllib
import cStringIO
import re
import codecs
__author__ = 'hejunjie'
URL = 'translate.google.com.hk'
HEADERS = {
'accept': '*/*',
'accept-encoding': 'gzip, deflate, sdch',
'accept-language': 'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4',
'referer': 'https//translate.google.com.hk/?hl=en&tab=TT',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 '
'(KHTML, like Gecko) Ubuntu Chromium/45.0.2454.101 '
'Chrome/45.0.2454.101 Safari/537.36'
}
def translate(q):
method = 'GET' if len(q) <= 100 else 'POST'
try:
q.encode('ASCII')
except UnicodeEncodeError:
tl = 'en'
sl = 'zh-CN'
q = q.encode('UTF-8')
else:
tl = 'zh-CN'
sl = 'en'
PARAMS = [
('client', 't'),
('sl', sl),
('tl', tl),
('ie', 'UTF-8'),
('oi', 'UTF-8'),
('q', q),
('dt', 't'),
# ('hl', 'en'),
# ('dt', 'bd'),
# ('dt', 'ex'),
# ('dt', 'ld'),
# ('dt', 'md'),
# ('dt', 'qca'),
# ('dt', 'rw'),
# ('dt', 'rm'),
# ('dt', 'ss'),
# ('dt', 'at'),
('srcrom', '0'),
('ssel', '0'),
('tsel', '4'),
('kc', '0'),
('tk', '462264|76913'),
]
param = urllib.urlencode(PARAMS)
conn = httplib.HTTPSConnection(URL)
if method == 'POST':
conn.request(method, '/translate_a/single', param, HEADERS)
else:
conn.request(method, '/translate_a/single?%s' % param, headers=HEADERS)
res = conn.getresponse()
if res.status != 200:
conn.close()
return None
else:
unziped = gzip.GzipFile(fileobj=cStringIO.StringIO(res.read()))
content = unziped.read().decode('utf-8')
trans = cStringIO.StringIO()
am = re.findall(r'\["(.*?)",.*?,.*?,.*?,.*?\],', content, re.S)
for m in am:
m = m.replace('\\"', '"')
trans.write(m.encode('utf-8'))
v = trans.getvalue()
trans.close()
conn.close()
return v
if __name__ == '__main__':
if len(sys.argv) == 2:
print translate(sys.argv[1].decode('utf-8'))
elif len(sys.argv) == 3 and sys.argv[1] == '-f':
with codecs.open(sys.argv[2], 'r', 'utf-8') as f:
print translate(f.read())
else:
print 'Usage:goslate.py <language_source>'
print ' goslate.py -f <path2file>(encoded with utf-8)'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment