Skip to content

Instantly share code, notes, and snippets.

@Akramz
Created June 25, 2016 22:08
Show Gist options
  • Save Akramz/dea697d6c9fadf4765c827b86784caf5 to your computer and use it in GitHub Desktop.
Save Akramz/dea697d6c9fadf4765c827b86784caf5 to your computer and use it in GitHub Desktop.
from __future__ import division
import pandas as pd
import numpy as np
import requests
import time
import os.path
from datetime import date, timedelta
from subprocess import call
import csv
import sqlite3
# to read strings
import sys
if sys.version_info[0] < 3:
from StringIO import StringIO
else:
from io import StringIO
from pandas import DataFrame
# First the DB
conn = sqlite3.connect('/var/www/html/nuweth/data/DB.db')
cur = conn.cursor()
cur.execute("""DROP TABLE IF EXISTS history""")
cur.execute("""CREATE TABLE history (D text, H float, W float, T float, C text)""")
# cities
cities = [
["Agadir", "GMAD"],
["Al+Hoceima", "GMTA"],
["Tetuan", "GMTN"],
["Fes-Sais", "GMFF"],
["Rabat-Sale", "GMME"],
["Tanger", "GMTT"],
["Marrakech", "GMMX"],
["Nouasseur", "GMMN"],
["Oujda", "GMFO"]
]
# get dates
today = date.today()
yesterday = today - timedelta(1)
third = today - timedelta(2)
fourth = today - timedelta(3)
fifth = today - timedelta(4)
# dates texts
today_text = str(today.year) + "/" + str(today.month) + "/" + str(today.day)
yesterday_text = str(yesterday.year) + "/" + str(yesterday.month) + "/" + str(yesterday.day)
third_text = str(third.year) + "/" + str(third.month) + "/" + str(third.day)
fourth_text = str(fourth.year) + "/" + str(fourth.month) + "/" + str(fourth.day)
fifth_text = str(fifth.year) + "/" + str(fifth.month) + "/" + str(fifth.day)
for city in cities:
# get weather data
today_data = requests.get("http://www.wunderground.com/history/airport/" + city[1] + "/" + today_text + "/DailyHistory.html?req_city=" + city[0] + "&req_statename=Morocco&format=1").text.replace('<br />', '')
yesterday_data = requests.get("http://www.wunderground.com/history/airport/" + city[1] + "/" + yesterday_text + "/DailyHistory.html?req_city=" + city[0] + "&req_statename=Morocco&format=1").text.replace('<br />', '')
third_data = requests.get("http://www.wunderground.com/history/airport/" + city[1] + "/" + third_text + "/DailyHistory.html?req_city=" + city[0] + "&req_statename=Morocco&format=1").text.replace('<br />', '')
fourth_data = requests.get("http://www.wunderground.com/history/airport/" + city[1] + "/" + fourth_text + "/DailyHistory.html?req_city=" + city[0] + "&req_statename=Morocco&format=1").text.replace('<br />', '')
fifth_data = requests.get("http://www.wunderground.com/history/airport/" + city[1] + "/" + fifth_text + "/DailyHistory.html?req_city=" + city[0] + "&req_statename=Morocco&format=1").text.replace('<br />', '')
# compress
today_df = DataFrame.from_csv(StringIO(today_data),index_col=None, header=0)
yesterday_df = DataFrame.from_csv(StringIO(yesterday_data),index_col=None, header=0)
third_df = DataFrame.from_csv(StringIO(third_data),index_col=None, header=0)
fourth_df = DataFrame.from_csv(StringIO(fourth_data),index_col=None, header=0)
fifth_df = DataFrame.from_csv(StringIO(fifth_data),index_col=None, header=0)
frame = pd.DataFrame()
list_ = [today_df, yesterday_df, third_df, fourth_df, fifth_df]
data = pd.concat(list_)
# CLEANING
d = data[['DateUTC', 'TemperatureF', 'Humidity', 'Wind SpeedMPH']]
# calm = 0.5 (<1)
if d['Wind SpeedMPH'].dtype == 'object':
d.loc[d['Wind SpeedMPH'] == 'Calm', 'Wind SpeedMPH'] = 0.310686
d['Wind SpeedMPH'] = d['Wind SpeedMPH'].fillna(method='ffill')
d['Wind SpeedMPH'] = d['Wind SpeedMPH'].astype(float)
d.ix[d['Wind SpeedMPH'] < 0, 'Wind SpeedMPH'] = -9999
d.ix[d['Wind SpeedMPH'] > 77.6714, 'Wind SpeedMPH'] = -9999
d['Wind SpeedMPH'] = d['Wind SpeedMPH'].replace(to_replace=-9999, method='ffill')
# clean humidity
d['Humidity'] = d['Humidity'].fillna(method='ffill')
d.ix[d['Humidity'] < 0, 'Humidity'] = -9999
d.ix[d['Humidity'] > 100, 'Humidity'] = -9999
d['Humidity'] = d['Humidity'].replace(to_replace=-9999, method='ffill')
# clean temps
d['TemperatureF'] = d['TemperatureF'].fillna(method='ffill')
d.ix[d['TemperatureF'] < 14, 'TemperatureF'] = -9999
d.ix[d['TemperatureF'] > 122, 'TemperatureF'] = -9999
d['TemperatureF'] = d['TemperatureF'].replace(to_replace=-9999, method='ffill')
# get usual columns
d['Wind SpeedKm/h'] = d['Wind SpeedMPH'] * 1.60934
d['TemperatureC'] = (d['TemperatureF'] - 32) * (5/9)
del d['Wind SpeedMPH'], d['TemperatureF']
d['DateUTC'] = pd.to_datetime(d.DateUTC, format='%Y-%m-%d %H:%M:%S')
d = d.sort('DateUTC')
# group by hour
s = d.groupby(d.DateUTC.map(lambda t: (t.year,t.month,t.day,t.hour))).mean().reset_index()
final = s.tail(72)
final['City'] = city[0]
# save the dataset
final.to_csv('/var/www/html/nuweth/data/' + city[0] + '.csv', index=False)
with open('/var/www/html/nuweth/data/' + city[0] + '.csv', 'r') as f:
reader = csv.reader(f.readlines()[1:]) # exclude header line
cur.executemany("""INSERT INTO history VALUES (?,?,?,?,?)""", (row for row in reader))
conn.commit()
# close the connection
conn.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment