Skip to content

Instantly share code, notes, and snippets.

@xhinking
Created August 8, 2013 04:38
Show Gist options
  • Save xhinking/6181496 to your computer and use it in GitHub Desktop.
Save xhinking/6181496 to your computer and use it in GitHub Desktop.
Earthquake Data crawler
# -*- coding: utf-8 -*-
# Data Source: http://earthquake.usgs.gov/earthquakes/eqarchives/epic/
import urllib2
import csv
import pymongo
import time
import calendar
from datetime import date
from time import mktime
import httplib
httplib.HTTPConnection._http_vsn = 10
httplib.HTTPConnection._http_vsn_str = 'HTTP/1.0'
# Connect MongoDB
conn = pymongo.Connection('localhost', 27017)
mongodb = conn.earthquakes
crawler = mongodb.crawler
# all years
for year in range(2010, 2011):
# Every month
for m in range(1, 13, 6):
# Comput timestamp
start = date(year, m, 1)
if (m == 12):
end = date(year + 1, 1, 1)
elif(m == 7):
end = date(year + 1, 1, 1)
else:
end = date(year, m + 6, 1)
startTimestamp = calendar.timegm(start.timetuple())
endTimestamp = calendar.timegm(end.timetuple())
print 'Begin ' + str(start) + '-' + str(end)
# Get data
url = "http://comcat.cr.usgs.gov/earthquakes/feed/v0.1/search.php?maxEventLatitude=90.0&minEventLatitude=-90.0&minEventLongitude=-180.0&maxEventLongitude=180.0&minEventTime="+str(startTimestamp)+"000&maxEventTime="+str(endTimestamp)+"000&minEventMagnitude=-1.0&maxEventMagnitude=10&minEventDepth=0.0&maxEventDepth=800.0&format=csv"
print url
page = urllib2.urlopen(url)
reader = csv.reader(page)
# Save Data
for row in reader:
# DateTime,Latitude,Longitude,Depth,Magnitude,MagType,NbStations,Gap,Distance,RMS,Source,EventID,Version
if row[0] != 'DateTime':
crawler.insert({"dateTime": row[0], "lat": row[1], "lon": row[2], "depth": row[3] , "magnitude": row[4], "magType": row[5], "nbStations": row[6], "gap": row[7], "distance": row[8], "rms": row[9], "source": row[10], "event_id": row[11], "version": row[12]})
else:
continue
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment