Skip to content

Instantly share code, notes, and snippets.

@antonisa
Created October 30, 2018 23:43
Show Gist options
  • Save antonisa/b03dc7ba12cfbaac9ee17f669baf3836 to your computer and use it in GitHub Desktop.
Save antonisa/b03dc7ba12cfbaac9ee17f669baf3836 to your computer and use it in GitHub Desktop.
import re
import urllib2
import itertools
import codecs
def download(url):
print "Downloading: ", url
try:
html = urllib2.urlopen(url).read()
except urllib2.URLError as e:
print "Download Error: ", e.reason
html = None
return html
baseurl = 'http://www.euroleague.net/main/results/showgame?gamecode=%d&seasoncode=E2018'
hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'none',
'Accept-Language': 'en-US,en;q=0.8',
'Connection': 'keep-alive'}
#for page in itertools.count(1):
for page in range(25,33):
url = baseurl % page
print page, url
try:
req = urllib2.Request(url, headers=hdr)
print "requested"
req = urllib2.urlopen(req)
print "Opened"
content = req.read()
print "Read"
encoding=req.headers['content-type'].split('charset=')[-1]
ucontent = unicode(content, encoding)
except urllib2.URLError as e:
print "Download Error: ", e.reason
content = None
req = None
ucontent = None
if ucontent is None:
print "Fail! ", page
else:
print "Success! ", page
f = "games/game-%d.txt" % page
with codecs.open(f, 'w', 'utf-8') as fout:
fout.write(ucontent)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment