Skip to content

Instantly share code, notes, and snippets.

@dimitrov
Created February 20, 2011 17:27
Show Gist options
  • Save dimitrov/836125 to your computer and use it in GitHub Desktop.
Save dimitrov/836125 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import urllib2
import zipfile
import MySQLdb
class AlexaUpdater():
def __init__(self, user, password, db_name, table_name, host='localhost',
charset='utf8'):
self.host = host
self.user = user
self.password = password
self.db_name = db_name
self.charset = charset
self.table_name = table_name
def download_file(self):
try:
request = urllib2.urlopen('http://s3.amazonaws.com/alexa-static/to'\
'p-1m.csv.zip')
file_handle = open('top-1m.csv.zip', 'w')
for line in request:
file_handle.write(line)
file_handle.close()
except:
print 'File download failed. Exiting...'
exit(1)
def extract_file(self):
try:
zip_file = zipfile.ZipFile('top-1m.csv.zip', 'r')
zip_file.extract('top-1m.csv')
zip_file.close()
except:
print 'File extraction failed. Exiting...'
exit(1)
def insert_data(self):
try:
connection = MySQLdb.connect(host=self.host, user=self.user,
passwd=self.password, db=self.db_name, charset=self.charset,
use_unicode=True)
cursor = connection.cursor()
file_handle = open('top-1m.csv', 'r')
for line in file_handle:
temp = line.split(',')
site_rank = temp[0].strip()
site_name = temp[1].split('\n')[0].strip()
cursor.execute('REPLACE INTO `{0}`.`{1}` (`site_rank`, '\
'`site_name`) VALUES (\'{2}\', \'{3}\');'.format(
self.db_name, self.table_name, site_rank, site_name))
connection.close()
file_handle.close()
except:
print 'Data insertion failed. Exiting...'
exit(1)
if __name__ == '__main__':
updater = AlexaUpdater(user='user', password='password',
db_name='alexa_db', table_name='alexa_ranks')
updater.download_file()
updater.extract_file()
updater.insert_data()
exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment