haykuro · July 26, 2018 17:16
diff --git a/alexa_top500.py b/alexa_top500.py
 from urllib import urlretrieve
 from zipfile import ZipFile
 from os.path import isfile

 ZIP_URL = 'http://s3.amazonaws.com/alexa-static/top-1m.csv.zip'
 ZIP_PATH = '/tmp/top-1m.zip'

 if not isfile(ZIP_PATH):
  urlretrieve(ZIP_URL, ZIP_PATH)

 ZFILE = ZipFile(ZIP_PATH, 'r')

 CONTENT = []

 for name in ZFILE.namelist():
  if ".csv" in name and len(CONTENT) < 1:
    EX_FILE = ZFILE.open(name) # this is a file like object
    CONTENT = EX_FILE.read().split('\n')

 if len(CONTENT) > 0:
  for line in CONTENT[:500]:
    if line is not "":
      (d_id, domain) = line.split(',')
      print "id: %s, domain: %s" % (d_id, domain)
	from urllib import urlretrieve
	from zipfile import ZipFile
	from os.path import isfile

	ZIP_URL = 'http://s3.amazonaws.com/alexa-static/top-1m.csv.zip'
	ZIP_PATH = '/tmp/top-1m.zip'

	if not isfile(ZIP_PATH):
	urlretrieve(ZIP_URL, ZIP_PATH)

	ZFILE = ZipFile(ZIP_PATH, 'r')

	CONTENT = []

	for name in ZFILE.namelist():
	if ".csv" in name and len(CONTENT) < 1:
	EX_FILE = ZFILE.open(name) # this is a file like object
	CONTENT = EX_FILE.read().split('\n')

	if len(CONTENT) > 0:
	for line in CONTENT[:500]:
	if line is not "":
	(d_id, domain) = line.split(',')
	print "id: %s, domain: %s" % (d_id, domain)
No results found