Skip to content

Instantly share code, notes, and snippets.

@ishideo
Created April 6, 2018 08:21
Show Gist options
  • Save ishideo/3e77e32515dbe3b4bcaa331b3f39a66d to your computer and use it in GitHub Desktop.
Save ishideo/3e77e32515dbe3b4bcaa331b3f39a66d to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys
import re
import codecs
import time
import msmt
from BeautifulSoup import BeautifulStoneSoup
#sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
def main():
token = msmt.get_access_token('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
count = 1
file = './out.txt'
for line in open(file, 'r'):
if line:
items = line.split('\t')
if count % 4000 == 0: time.sleep(80)
get_xml = msmt.translate(token, items[0], "fr", "en")
soup = BeautifulStoneSoup(get_xml, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
text = re.sub(r'<[^>]*>', '', str(soup))
print text
count += 1;
else:
break
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment