khuangaf · February 4, 2018 13:14
diff --git a/gistfile1.txt b/gistfile1.txt
 import numpy as np
 import os
 from random import shuffle
 import re
 import urllib.request
 import zipfile
 import lxml.etree
 #download the data
 urllib.request.urlretrieve("https://wit3.fbk.eu/get.php?path=XML_releases/xml/ted_en-20160408.zip&filename=ted_en-20160408.zip", filename="ted_en-20160408.zip")
 # extract subtitle
 with zipfile.ZipFile('ted_en-20160408.zip', 'r') as z:
    doc = lxml.etree.parse(z.open('ted_en-20160408.xml', 'r'))
 input_text = '\n'.join(doc.xpath('//content/text()'))
	import numpy as np
	import os
	from random import shuffle
	import re
	import urllib.request
	import zipfile
	import lxml.etree
	#download the data
	urllib.request.urlretrieve("https://wit3.fbk.eu/get.php?path=XML_releases/xml/ted_en-20160408.zip&filename=ted_en-20160408.zip", filename="ted_en-20160408.zip")
	# extract subtitle
	with zipfile.ZipFile('ted_en-20160408.zip', 'r') as z:
	doc = lxml.etree.parse(z.open('ted_en-20160408.xml', 'r'))
	input_text = '\n'.join(doc.xpath('//content/text()'))