Last active
July 24, 2020 06:27
-
-
Save jamm1985/4a22f1adb0675403595b8997835d554b to your computer and use it in GitHub Desktop.
Aggregate (count) catalog data by year/month and plot it
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
File: catalog.py | |
Author: Andrey Stepnov | |
Email: [email protected], [email protected] | |
Github: https://github.com/jamm1985 | |
Description: Aggregate (count) catalog data by year/month and plot it | |
Catalog sources: https://earthquake.usgs.gov, https://eqalert.ru | |
""" | |
import pandas as pd | |
import matplotlib as mlp | |
import pylab as plt | |
def parse_eqalert_ru(xlsx): | |
"""read xlsx, clean and save data to dataframe | |
:xlsx: string, input XLSX file | |
:returns: pandas dataframe | |
""" | |
data = pd.read_excel(xlsx) | |
data = data[['event_datetime', | |
'lat', 'lon', 'depth', | |
'mag', 'ep_dis', 'msk64_value', 'sta_num']] | |
data['event_datetime'] = pd.to_datetime(data['event_datetime'], format='%Y-%m-%d %H:%M:%S') | |
return data | |
def parse_usgs_gov(csv): | |
"""read csv file, clean and save the data to dataframe | |
:csv: string, input csv file | |
:returns: pandas dataframe | |
""" | |
data = pd.read_csv(csv) | |
data['time'] = pd.to_datetime(data['time']) | |
data = data[['time', | |
'latitude', 'longitude', 'depth', | |
'mag', 'dmin', 'nst']] | |
data.columns = ['event_datetime', 'lat', 'lon', 'depth', 'mag', 'ep_dis', 'sta_num'] | |
return data | |
## USGS catalog | |
data = parse_usgs_gov('2020-07-11-usgs-query.csv') | |
## OR | |
## EQALERT catalog | |
data = parse_eqalert_ru('reports_list_09-04-10072020.xlsx') | |
## selection by date, MAG etc | |
data = data[data['mag'] > 4.0] | |
data = data[data['event_datetime'] > '1960'] | |
## group by month and by year | |
by_month = pd.DataFrame(data['event_datetime'].dt.strftime('%Y-%m')) | |
by_month['mag'] = data['mag'] | |
by_month['max_mag'] = by_month.groupby(['event_datetime'])['mag'].transform(max) | |
by_month = by_month.groupby(['event_datetime', 'max_mag']).size().reset_index().rename(columns={0: 'counts'}) | |
by_year = pd.DataFrame(data['event_datetime'].dt.strftime('%Y')) | |
by_year['mag'] = data['mag'] | |
by_year['max_mag'] = by_year.groupby(['event_datetime'])['mag'].transform(max) | |
by_year = by_year.groupby(['event_datetime', 'max_mag']).size().reset_index().rename(columns={0: 'counts'}) | |
## plot | |
by_month.plot.line(x='event_datetime', y=['counts','max_mag'], subplots=True) | |
plt.show() | |
by_year.plot.line(x='event_datetime', y=['counts','max_mag'], subplots=True) | |
plt.show() | |
## save groupped data | |
by_month.to_excel("by_month.xlsx") | |
by_year.to_excel("by_year.xlsx") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
catalogs in csv and XLSX here