Created
June 25, 2016 22:10
-
-
Save Akramz/5c8debe07059ef6a0db0a4b2053eaa33 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import numpy as np | |
import pandas as pd | |
from keras.optimizers import rmsprop | |
from keras.models import model_from_json | |
from params import * | |
from datetime import datetime | |
from datetime import date, timedelta | |
import csv | |
import sqlite3 | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.externals import joblib | |
from sys import exit | |
# to read strings | |
import sys | |
if sys.version_info[0] < 3: | |
from StringIO import StringIO | |
else: | |
from io import StringIO | |
from pandas import DataFrame | |
# First the DB | |
conn = sqlite3.connect('/var/www/html/nuweth/data/DB.db') | |
cur = conn.cursor() | |
cur.execute("""DROP TABLE IF EXISTS predictions""") | |
cur.execute("""CREATE TABLE predictions (C text, H float, S float, T float, DP float, D text, Type text, CON text)""") | |
# import the KNN model and the encoders | |
clf = joblib.load('/home/ubuntu/nuweth/models/knn.pkl') | |
conditions_encoder = joblib.load('/home/ubuntu/nuweth/models/conditions_encoder.pkl') | |
cities_encoder = joblib.load('/home/ubuntu/nuweth/models/cities_encoder.pkl') | |
# cities | |
cities = [ | |
["Agadir", "GMAD"], | |
["Tetuan", "GMTN"], | |
["Fes-Sais", "GMFF"], | |
["Rabat-Sale", "GMME"], | |
["Al+Hoceima", "GMTA"], | |
["Tanger", "GMTT"], | |
["Marrakech", "GMMX"], | |
["Nouasseur", "GMMN"], | |
["Oujda", "GMFO"] | |
] | |
# PREDICT THE NEXT 24H | |
for city in cities: | |
data = pd.read_csv('/home/ubuntu/nuweth/data/' + city[0] + '.csv') | |
d = data.tail(24) | |
# get last time | |
last = d.tail(1) | |
last_date = last.iloc[0]['DateUTC'][1:-1].split(",") # a list | |
last_time_text = last_date[0].strip() + "-" + last_date[1].strip() + "-" + last_date[2].strip() + "-" + last_date[3].strip() | |
last_time = datetime.strptime(last_time_text, '%Y-%m-%d-%H') | |
# normalize | |
d['TemperatureC'] = (d['TemperatureC'] - temp_mean[city[0].upper()]) / (temp_max[city[0].upper()] - temp_min[city[0].upper()]) | |
d['Humidity'] = (d['Humidity'] - humidity_mean[city[0].upper()]) / (humidity_max[city[0].upper()] - humidity_min[city[0].upper()]) | |
d['Wind SpeedKm/h'] = (d['Wind SpeedKm/h'] - wind_mean[city[0].upper()]) / (wind_max[city[0].upper()] - wind_min[city[0].upper()]) | |
# every hour, one temperature and one humidity | |
ts = d[['TemperatureC']].values.flatten() | |
hs = d[['Humidity']].values.flatten() | |
ws = d[['Wind SpeedKm/h']].values.flatten() | |
# create sequences | |
sequences = zip(ts, hs, ws) | |
sequences = np.array([list(i) for i in sequences]) | |
# freeup memory | |
del ts, hs, ws, data | |
# load the model | |
model = model_from_json(open('/home/ubuntu/nuweth/models/' + city[0] + '/24/100/architecture.json').read()) | |
model.load_weights('/home/ubuntu/nuweth/models/' + city[0] + '/24/100/weights.h5') | |
optimizer = rmsprop() | |
model.compile(optimizer=optimizer, loss='mse') | |
# predict | |
predictions = model.predict(sequences.reshape((1,24,3))) | |
predicted_temps = [x[0]*(temp_max[city[0].upper()] - temp_min[city[0].upper()]) + temp_mean[city[0].upper()] for x in predictions[0]] | |
predicted_humidities = [x[1]*(humidity_max[city[0].upper()] - humidity_min[city[0].upper()]) + humidity_mean[city[0].upper()] for x in predictions[0]] | |
predicted_speeds = [x[2]*(wind_max[city[0].upper()] - wind_min[city[0].upper()]) + wind_mean[city[0].upper()] for x in predictions[0]] | |
frame = pd.DataFrame({'T': predicted_temps, 'H': predicted_humidities, 'W': predicted_speeds, 'C': city[0]}) | |
# add Dew points | |
frame['DP'] = frame['T'] - (100 - frame['H']) / 5 | |
# add dates | |
i = 1 | |
for index, row in frame.iterrows(): | |
frame.set_value(index, 'DateUTC', last_time + timedelta(hours=i)) | |
frame.set_value(index, 'Type', '24') | |
i += 1 | |
# get months | |
months = frame.DateUTC.dt.month.values | |
days = frame.DateUTC.dt.day.values | |
temperatures = frame['T'].values | |
humidities = frame.H.values | |
windspeeds = frame.W.values | |
cities_ = cities_encoder.transform(frame.C.values) | |
# time to predict | |
X = zip(temperatures, humidities, windspeeds, months, days, cities_) | |
X = np.array([list(x) for x in X]) | |
frame['CON'] = conditions_encoder.inverse_transform(clf.predict(X)) | |
# save the model | |
frame.to_csv('/var/www/html/nuweth/predictions/' + city[0] + '.csv', index=False) | |
# deleting model | |
del model | |
# now to the DB | |
with open('/var/www/html/nuweth/predictions/' + city[0] + '.csv', 'r') as f: | |
reader = csv.reader(f.readlines()[1:]) # exclude header line | |
cur.executemany("""INSERT INTO predictions VALUES (?,?,?,?,?,?,?,?)""", (row for row in reader)) | |
conn.commit() | |
# PREDICT FOR 3 DAYS | |
for city in cities: | |
data = pd.read_csv('/home/ubuntu/nuweth/data/' + city[0] + '.csv') | |
d = data.tail(72) | |
# get last time | |
last = d.tail(1) | |
last_date = last.iloc[0]['DateUTC'][1:-1].split(",") # a list | |
last_time_text = last_date[0].strip() + "-" + last_date[1].strip() + "-" + last_date[2].strip() + "-" + last_date[3].strip() | |
last_time = datetime.strptime(last_time_text, '%Y-%m-%d-%H') | |
# normalize | |
d['TemperatureC'] = (d['TemperatureC'] - temp_mean[city[0].upper()]) / (temp_max[city[0].upper()] - temp_min[city[0].upper()]) | |
d['Humidity'] = (d['Humidity'] - humidity_mean[city[0].upper()]) / (humidity_max[city[0].upper()] - humidity_min[city[0].upper()]) | |
d['Wind SpeedKm/h'] = (d['Wind SpeedKm/h'] - wind_mean[city[0].upper()]) / (wind_max[city[0].upper()] - wind_min[city[0].upper()]) | |
# every hour, one temperature and one humidity | |
ts = d[['TemperatureC']].values.flatten() | |
hs = d[['Humidity']].values.flatten() | |
ws = d[['Wind SpeedKm/h']].values.flatten() | |
# create sequences | |
sequences = zip(ts, hs, ws) | |
sequences = np.array([list(i) for i in sequences]) | |
# freeup memory | |
del ts, hs, ws, data | |
# load the model | |
model = model_from_json(open('/home/ubuntu/nuweth/models/' + city[0] + '/72/100/architecture.json').read()) | |
model.load_weights('/home/ubuntu/nuweth/models/' + city[0] + '/72/100/weights.h5') | |
optimizer = rmsprop() | |
model.compile(optimizer=optimizer, loss='mse') | |
# predict | |
predictions = model.predict(sequences.reshape((1,72,3))) | |
predicted_temps = [x[0]*(temp_max[city[0].upper()] - temp_min[city[0].upper()]) + temp_mean[city[0].upper()] for x in predictions[0]] | |
predicted_humidities = [x[1]*(humidity_max[city[0].upper()] - humidity_min[city[0].upper()]) + humidity_mean[city[0].upper()] for x in predictions[0]] | |
predicted_speeds = [x[2]*(wind_max[city[0].upper()] - wind_min[city[0].upper()]) + wind_mean[city[0].upper()] for x in predictions[0]] | |
frame = pd.DataFrame({'T': predicted_temps, 'H': predicted_humidities, 'W': predicted_speeds, 'C': city[0]}) | |
# add Dew points | |
frame['DP'] = frame['T'] - (100 - frame['H']) / 5 | |
# add dates | |
i = 1 | |
for index, row in frame.iterrows(): | |
frame.set_value(index, 'DateUTC', last_time + timedelta(hours=i)) | |
frame.set_value(index, 'Type', '72') | |
i += 1 | |
# get months | |
months = frame.DateUTC.dt.month.values | |
days = frame.DateUTC.dt.day.values | |
temperatures = frame['T'].values | |
humidities = frame.H.values | |
windspeeds = frame.W.values | |
cities = cities_encoder.transform(frame.C.values) | |
# time to predict | |
X = zip(temperatures, humidities, windspeeds, months, days, cities) | |
X = np.array([list(x) for x in X]) | |
frame['CON'] = conditions_encoder.inverse_transform(clf.predict(X)) | |
# save the model | |
frame.to_csv('/var/www/html/nuweth/predictions/' + city[0] + '_.csv', index=False) | |
# deleting model | |
del model | |
# now to the DB | |
with open('/var/www/html/nuweth/predictions/' + city[0] + '_.csv', 'r') as f: | |
reader = csv.reader(f.readlines()[1:]) # exclude header line | |
cur.executemany("""INSERT INTO predictions VALUES (?,?,?,?,?,?,?,?)""", (row for row in reader)) | |
conn.commit() | |
# close the connection | |
conn.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment