Skip to content

Instantly share code, notes, and snippets.

@Akramz
Created June 25, 2016 22:10
Show Gist options
  • Save Akramz/5c8debe07059ef6a0db0a4b2053eaa33 to your computer and use it in GitHub Desktop.
Save Akramz/5c8debe07059ef6a0db0a4b2053eaa33 to your computer and use it in GitHub Desktop.
from __future__ import division
import numpy as np
import pandas as pd
from keras.optimizers import rmsprop
from keras.models import model_from_json
from params import *
from datetime import datetime
from datetime import date, timedelta
import csv
import sqlite3
from sklearn.neighbors import KNeighborsClassifier
from sklearn.externals import joblib
from sys import exit
# to read strings
import sys
if sys.version_info[0] < 3:
from StringIO import StringIO
else:
from io import StringIO
from pandas import DataFrame
# First the DB
conn = sqlite3.connect('/var/www/html/nuweth/data/DB.db')
cur = conn.cursor()
cur.execute("""DROP TABLE IF EXISTS predictions""")
cur.execute("""CREATE TABLE predictions (C text, H float, S float, T float, DP float, D text, Type text, CON text)""")
# import the KNN model and the encoders
clf = joblib.load('/home/ubuntu/nuweth/models/knn.pkl')
conditions_encoder = joblib.load('/home/ubuntu/nuweth/models/conditions_encoder.pkl')
cities_encoder = joblib.load('/home/ubuntu/nuweth/models/cities_encoder.pkl')
# cities
cities = [
["Agadir", "GMAD"],
["Tetuan", "GMTN"],
["Fes-Sais", "GMFF"],
["Rabat-Sale", "GMME"],
["Al+Hoceima", "GMTA"],
["Tanger", "GMTT"],
["Marrakech", "GMMX"],
["Nouasseur", "GMMN"],
["Oujda", "GMFO"]
]
# PREDICT THE NEXT 24H
for city in cities:
data = pd.read_csv('/home/ubuntu/nuweth/data/' + city[0] + '.csv')
d = data.tail(24)
# get last time
last = d.tail(1)
last_date = last.iloc[0]['DateUTC'][1:-1].split(",") # a list
last_time_text = last_date[0].strip() + "-" + last_date[1].strip() + "-" + last_date[2].strip() + "-" + last_date[3].strip()
last_time = datetime.strptime(last_time_text, '%Y-%m-%d-%H')
# normalize
d['TemperatureC'] = (d['TemperatureC'] - temp_mean[city[0].upper()]) / (temp_max[city[0].upper()] - temp_min[city[0].upper()])
d['Humidity'] = (d['Humidity'] - humidity_mean[city[0].upper()]) / (humidity_max[city[0].upper()] - humidity_min[city[0].upper()])
d['Wind SpeedKm/h'] = (d['Wind SpeedKm/h'] - wind_mean[city[0].upper()]) / (wind_max[city[0].upper()] - wind_min[city[0].upper()])
# every hour, one temperature and one humidity
ts = d[['TemperatureC']].values.flatten()
hs = d[['Humidity']].values.flatten()
ws = d[['Wind SpeedKm/h']].values.flatten()
# create sequences
sequences = zip(ts, hs, ws)
sequences = np.array([list(i) for i in sequences])
# freeup memory
del ts, hs, ws, data
# load the model
model = model_from_json(open('/home/ubuntu/nuweth/models/' + city[0] + '/24/100/architecture.json').read())
model.load_weights('/home/ubuntu/nuweth/models/' + city[0] + '/24/100/weights.h5')
optimizer = rmsprop()
model.compile(optimizer=optimizer, loss='mse')
# predict
predictions = model.predict(sequences.reshape((1,24,3)))
predicted_temps = [x[0]*(temp_max[city[0].upper()] - temp_min[city[0].upper()]) + temp_mean[city[0].upper()] for x in predictions[0]]
predicted_humidities = [x[1]*(humidity_max[city[0].upper()] - humidity_min[city[0].upper()]) + humidity_mean[city[0].upper()] for x in predictions[0]]
predicted_speeds = [x[2]*(wind_max[city[0].upper()] - wind_min[city[0].upper()]) + wind_mean[city[0].upper()] for x in predictions[0]]
frame = pd.DataFrame({'T': predicted_temps, 'H': predicted_humidities, 'W': predicted_speeds, 'C': city[0]})
# add Dew points
frame['DP'] = frame['T'] - (100 - frame['H']) / 5
# add dates
i = 1
for index, row in frame.iterrows():
frame.set_value(index, 'DateUTC', last_time + timedelta(hours=i))
frame.set_value(index, 'Type', '24')
i += 1
# get months
months = frame.DateUTC.dt.month.values
days = frame.DateUTC.dt.day.values
temperatures = frame['T'].values
humidities = frame.H.values
windspeeds = frame.W.values
cities_ = cities_encoder.transform(frame.C.values)
# time to predict
X = zip(temperatures, humidities, windspeeds, months, days, cities_)
X = np.array([list(x) for x in X])
frame['CON'] = conditions_encoder.inverse_transform(clf.predict(X))
# save the model
frame.to_csv('/var/www/html/nuweth/predictions/' + city[0] + '.csv', index=False)
# deleting model
del model
# now to the DB
with open('/var/www/html/nuweth/predictions/' + city[0] + '.csv', 'r') as f:
reader = csv.reader(f.readlines()[1:]) # exclude header line
cur.executemany("""INSERT INTO predictions VALUES (?,?,?,?,?,?,?,?)""", (row for row in reader))
conn.commit()
# PREDICT FOR 3 DAYS
for city in cities:
data = pd.read_csv('/home/ubuntu/nuweth/data/' + city[0] + '.csv')
d = data.tail(72)
# get last time
last = d.tail(1)
last_date = last.iloc[0]['DateUTC'][1:-1].split(",") # a list
last_time_text = last_date[0].strip() + "-" + last_date[1].strip() + "-" + last_date[2].strip() + "-" + last_date[3].strip()
last_time = datetime.strptime(last_time_text, '%Y-%m-%d-%H')
# normalize
d['TemperatureC'] = (d['TemperatureC'] - temp_mean[city[0].upper()]) / (temp_max[city[0].upper()] - temp_min[city[0].upper()])
d['Humidity'] = (d['Humidity'] - humidity_mean[city[0].upper()]) / (humidity_max[city[0].upper()] - humidity_min[city[0].upper()])
d['Wind SpeedKm/h'] = (d['Wind SpeedKm/h'] - wind_mean[city[0].upper()]) / (wind_max[city[0].upper()] - wind_min[city[0].upper()])
# every hour, one temperature and one humidity
ts = d[['TemperatureC']].values.flatten()
hs = d[['Humidity']].values.flatten()
ws = d[['Wind SpeedKm/h']].values.flatten()
# create sequences
sequences = zip(ts, hs, ws)
sequences = np.array([list(i) for i in sequences])
# freeup memory
del ts, hs, ws, data
# load the model
model = model_from_json(open('/home/ubuntu/nuweth/models/' + city[0] + '/72/100/architecture.json').read())
model.load_weights('/home/ubuntu/nuweth/models/' + city[0] + '/72/100/weights.h5')
optimizer = rmsprop()
model.compile(optimizer=optimizer, loss='mse')
# predict
predictions = model.predict(sequences.reshape((1,72,3)))
predicted_temps = [x[0]*(temp_max[city[0].upper()] - temp_min[city[0].upper()]) + temp_mean[city[0].upper()] for x in predictions[0]]
predicted_humidities = [x[1]*(humidity_max[city[0].upper()] - humidity_min[city[0].upper()]) + humidity_mean[city[0].upper()] for x in predictions[0]]
predicted_speeds = [x[2]*(wind_max[city[0].upper()] - wind_min[city[0].upper()]) + wind_mean[city[0].upper()] for x in predictions[0]]
frame = pd.DataFrame({'T': predicted_temps, 'H': predicted_humidities, 'W': predicted_speeds, 'C': city[0]})
# add Dew points
frame['DP'] = frame['T'] - (100 - frame['H']) / 5
# add dates
i = 1
for index, row in frame.iterrows():
frame.set_value(index, 'DateUTC', last_time + timedelta(hours=i))
frame.set_value(index, 'Type', '72')
i += 1
# get months
months = frame.DateUTC.dt.month.values
days = frame.DateUTC.dt.day.values
temperatures = frame['T'].values
humidities = frame.H.values
windspeeds = frame.W.values
cities = cities_encoder.transform(frame.C.values)
# time to predict
X = zip(temperatures, humidities, windspeeds, months, days, cities)
X = np.array([list(x) for x in X])
frame['CON'] = conditions_encoder.inverse_transform(clf.predict(X))
# save the model
frame.to_csv('/var/www/html/nuweth/predictions/' + city[0] + '_.csv', index=False)
# deleting model
del model
# now to the DB
with open('/var/www/html/nuweth/predictions/' + city[0] + '_.csv', 'r') as f:
reader = csv.reader(f.readlines()[1:]) # exclude header line
cur.executemany("""INSERT INTO predictions VALUES (?,?,?,?,?,?,?,?)""", (row for row in reader))
conn.commit()
# close the connection
conn.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment