Akramz · June 25, 2016 22:10
diff --git a/predict_data.py b/predict_data.py
 from __future__ import division
 import numpy as np 
 import pandas as pd
 from keras.optimizers import rmsprop
 from keras.models import model_from_json
 from params import *
 from datetime import datetime
 from datetime import date, timedelta
 import csv
 import sqlite3
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.externals import joblib
 from sys import exit

 # to read strings
 import sys
 if sys.version_info[0] < 3:
    from StringIO import StringIO
 else:
    from io import StringIO
 from pandas import DataFrame

 # First the DB
 conn = sqlite3.connect('/var/www/html/nuweth/data/DB.db')
 cur = conn.cursor()
 cur.execute("""DROP TABLE IF EXISTS predictions""")
 cur.execute("""CREATE TABLE predictions (C text, H float, S float, T float, DP float, D text, Type text, CON text)""")

 # import the KNN model and the encoders
 clf 			   = joblib.load('/home/ubuntu/nuweth/models/knn.pkl') 
 conditions_encoder = joblib.load('/home/ubuntu/nuweth/models/conditions_encoder.pkl')
 cities_encoder 	   = joblib.load('/home/ubuntu/nuweth/models/cities_encoder.pkl')

 # cities
 cities = [
 			["Agadir", "GMAD"],  
 			["Tetuan", "GMTN"], 
 			["Fes-Sais", "GMFF"], 
 			["Rabat-Sale", "GMME"],
 			["Al+Hoceima", "GMTA"], 
 			["Tanger", "GMTT"], 
 			["Marrakech", "GMMX"], 
 			["Nouasseur", "GMMN"], 
 			["Oujda", "GMFO"]
 		]

 # PREDICT THE NEXT 24H
 for city in cities:		

 	data = pd.read_csv('/home/ubuntu/nuweth/data/' + city[0] + '.csv')
 	d = data.tail(24)

 	# get last time
 	last = d.tail(1)
 	last_date = last.iloc[0]['DateUTC'][1:-1].split(",") # a list
 	last_time_text = last_date[0].strip() + "-" + last_date[1].strip() + "-" + last_date[2].strip() + "-" + last_date[3].strip()
 	last_time = datetime.strptime(last_time_text, '%Y-%m-%d-%H')

 	# normalize
 	d['TemperatureC']   = (d['TemperatureC'] - temp_mean[city[0].upper()]) / (temp_max[city[0].upper()] - temp_min[city[0].upper()])
 	d['Humidity']       = (d['Humidity'] - humidity_mean[city[0].upper()]) / (humidity_max[city[0].upper()] - humidity_min[city[0].upper()])
 	d['Wind SpeedKm/h'] = (d['Wind SpeedKm/h'] - wind_mean[city[0].upper()]) / (wind_max[city[0].upper()] - wind_min[city[0].upper()])

 	# every hour, one temperature and one humidity
 	ts = d[['TemperatureC']].values.flatten()
 	hs = d[['Humidity']].values.flatten()
 	ws = d[['Wind SpeedKm/h']].values.flatten()

 	# create sequences
 	sequences = zip(ts, hs, ws)
 	sequences = np.array([list(i) for i in sequences])

 	# freeup memory
 	del ts, hs, ws, data

 	# load the model
 	model = model_from_json(open('/home/ubuntu/nuweth/models/' + city[0] + '/24/100/architecture.json').read())
 	model.load_weights('/home/ubuntu/nuweth/models/' + city[0] + '/24/100/weights.h5')
 	optimizer = rmsprop()
 	model.compile(optimizer=optimizer, loss='mse')

 	# predict
 	predictions = model.predict(sequences.reshape((1,24,3)))

 	predicted_temps       = [x[0]*(temp_max[city[0].upper()] - temp_min[city[0].upper()]) + temp_mean[city[0].upper()] for x in predictions[0]]        
 	predicted_humidities  = [x[1]*(humidity_max[city[0].upper()] - humidity_min[city[0].upper()]) + humidity_mean[city[0].upper()] for x in predictions[0]]
 	predicted_speeds      = [x[2]*(wind_max[city[0].upper()] - wind_min[city[0].upper()]) + wind_mean[city[0].upper()] for x in predictions[0]]    

 	frame = pd.DataFrame({'T': predicted_temps, 'H': predicted_humidities, 'W': predicted_speeds, 'C': city[0]})

 	# add Dew points
 	frame['DP'] = frame['T'] - (100 - frame['H']) / 5

 	# add dates
 	i = 1
 	for index, row in frame.iterrows():
 		frame.set_value(index, 'DateUTC', last_time + timedelta(hours=i))  
 		frame.set_value(index, 'Type', '24')  
 		i += 1

 	# get months
 	months       = frame.DateUTC.dt.month.values  
 	days         = frame.DateUTC.dt.day.values
 	temperatures = frame['T'].values
 	humidities   = frame.H.values
 	windspeeds   = frame.W.values
 	cities_      = cities_encoder.transform(frame.C.values)

 	# time to predict
 	X = zip(temperatures, humidities, windspeeds, months, days, cities_)
 	X = np.array([list(x) for x in X])

 	frame['CON'] = conditions_encoder.inverse_transform(clf.predict(X))

 	# save the model 
 	frame.to_csv('/var/www/html/nuweth/predictions/' + city[0] + '.csv', index=False)

 	# deleting model
 	del model

 	# now to the DB
 	with open('/var/www/html/nuweth/predictions/' + city[0] + '.csv', 'r') as f:
 		reader = csv.reader(f.readlines()[1:])  # exclude header line
 		cur.executemany("""INSERT INTO predictions VALUES (?,?,?,?,?,?,?,?)""", (row for row in reader))
 	conn.commit()

 # PREDICT FOR 3 DAYS
 for city in cities:	

 	data = pd.read_csv('/home/ubuntu/nuweth/data/' + city[0] + '.csv')
 	d = data.tail(72)

 	# get last time
 	last = d.tail(1)
 	last_date = last.iloc[0]['DateUTC'][1:-1].split(",") # a list
 	last_time_text = last_date[0].strip() + "-" + last_date[1].strip() + "-" + last_date[2].strip() + "-" + last_date[3].strip()
 	last_time = datetime.strptime(last_time_text, '%Y-%m-%d-%H')

 	# normalize
 	d['TemperatureC']   = (d['TemperatureC'] - temp_mean[city[0].upper()]) / (temp_max[city[0].upper()] - temp_min[city[0].upper()])
 	d['Humidity']       = (d['Humidity'] - humidity_mean[city[0].upper()]) / (humidity_max[city[0].upper()] - humidity_min[city[0].upper()])
 	d['Wind SpeedKm/h'] = (d['Wind SpeedKm/h'] - wind_mean[city[0].upper()]) / (wind_max[city[0].upper()] - wind_min[city[0].upper()])

 	# every hour, one temperature and one humidity
 	ts = d[['TemperatureC']].values.flatten()
 	hs = d[['Humidity']].values.flatten()
 	ws = d[['Wind SpeedKm/h']].values.flatten()

 	# create sequences
 	sequences = zip(ts, hs, ws)
 	sequences = np.array([list(i) for i in sequences])

 	# freeup memory
 	del ts, hs, ws, data

 	# load the model
 	model = model_from_json(open('/home/ubuntu/nuweth/models/' + city[0] + '/72/100/architecture.json').read())
 	model.load_weights('/home/ubuntu/nuweth/models/' + city[0] + '/72/100/weights.h5')
 	optimizer = rmsprop()
 	model.compile(optimizer=optimizer, loss='mse')

 	# predict
 	predictions = model.predict(sequences.reshape((1,72,3)))

 	predicted_temps       = [x[0]*(temp_max[city[0].upper()] - temp_min[city[0].upper()]) + temp_mean[city[0].upper()] for x in predictions[0]]        
 	predicted_humidities  = [x[1]*(humidity_max[city[0].upper()] - humidity_min[city[0].upper()]) + humidity_mean[city[0].upper()] for x in predictions[0]]
 	predicted_speeds      = [x[2]*(wind_max[city[0].upper()] - wind_min[city[0].upper()]) + wind_mean[city[0].upper()] for x in predictions[0]]    

 	frame = pd.DataFrame({'T': predicted_temps, 'H': predicted_humidities, 'W': predicted_speeds, 'C': city[0]})

 	# add Dew points
 	frame['DP'] = frame['T'] - (100 - frame['H']) / 5

 	# add dates
 	i = 1
 	for index, row in frame.iterrows():
 		frame.set_value(index, 'DateUTC', last_time + timedelta(hours=i))  
 		frame.set_value(index, 'Type', '72')  
 		i += 1

 	# get months
 	months       = frame.DateUTC.dt.month.values  
 	days         = frame.DateUTC.dt.day.values
 	temperatures = frame['T'].values
 	humidities   = frame.H.values
 	windspeeds   = frame.W.values
 	cities       = cities_encoder.transform(frame.C.values)

 	# time to predict
 	X = zip(temperatures, humidities, windspeeds, months, days, cities)
 	X = np.array([list(x) for x in X])

 	frame['CON'] = conditions_encoder.inverse_transform(clf.predict(X))

 	# save the model 
 	frame.to_csv('/var/www/html/nuweth/predictions/' + city[0] + '_.csv', index=False)

 	# deleting model
 	del model

 	# now to the DB
 	with open('/var/www/html/nuweth/predictions/' + city[0] + '_.csv', 'r') as f:
 		reader = csv.reader(f.readlines()[1:])  # exclude header line
 		cur.executemany("""INSERT INTO predictions VALUES (?,?,?,?,?,?,?,?)""", (row for row in reader))
 	conn.commit()

 # close the connection
 conn.close()
	from __future__ import division
	import numpy as np
	import pandas as pd
	from keras.optimizers import rmsprop
	from keras.models import model_from_json
	from params import *
	from datetime import datetime
	from datetime import date, timedelta
	import csv
	import sqlite3
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.externals import joblib
	from sys import exit

	# to read strings
	import sys
	if sys.version_info[0] < 3:
	from StringIO import StringIO
	else:
	from io import StringIO
	from pandas import DataFrame

	# First the DB
	conn = sqlite3.connect('/var/www/html/nuweth/data/DB.db')
	cur = conn.cursor()
	cur.execute("""DROP TABLE IF EXISTS predictions""")
	cur.execute("""CREATE TABLE predictions (C text, H float, S float, T float, DP float, D text, Type text, CON text)""")

	# import the KNN model and the encoders
	clf = joblib.load('/home/ubuntu/nuweth/models/knn.pkl')
	conditions_encoder = joblib.load('/home/ubuntu/nuweth/models/conditions_encoder.pkl')
	cities_encoder = joblib.load('/home/ubuntu/nuweth/models/cities_encoder.pkl')

	# cities
	cities = [
	["Agadir", "GMAD"],
	["Tetuan", "GMTN"],
	["Fes-Sais", "GMFF"],
	["Rabat-Sale", "GMME"],
	["Al+Hoceima", "GMTA"],
	["Tanger", "GMTT"],
	["Marrakech", "GMMX"],
	["Nouasseur", "GMMN"],
	["Oujda", "GMFO"]
	]

	# PREDICT THE NEXT 24H
	for city in cities:

	data = pd.read_csv('/home/ubuntu/nuweth/data/' + city[0] + '.csv')
	d = data.tail(24)

	# get last time
	last = d.tail(1)
	last_date = last.iloc[0]['DateUTC'][1:-1].split(",") # a list
	last_time_text = last_date[0].strip() + "-" + last_date[1].strip() + "-" + last_date[2].strip() + "-" + last_date[3].strip()
	last_time = datetime.strptime(last_time_text, '%Y-%m-%d-%H')

	# normalize
	d['TemperatureC'] = (d['TemperatureC'] - temp_mean[city[0].upper()]) / (temp_max[city[0].upper()] - temp_min[city[0].upper()])
	d['Humidity'] = (d['Humidity'] - humidity_mean[city[0].upper()]) / (humidity_max[city[0].upper()] - humidity_min[city[0].upper()])
	d['Wind SpeedKm/h'] = (d['Wind SpeedKm/h'] - wind_mean[city[0].upper()]) / (wind_max[city[0].upper()] - wind_min[city[0].upper()])

	# every hour, one temperature and one humidity
	ts = d[['TemperatureC']].values.flatten()
	hs = d[['Humidity']].values.flatten()
	ws = d[['Wind SpeedKm/h']].values.flatten()

	# create sequences
	sequences = zip(ts, hs, ws)
	sequences = np.array([list(i) for i in sequences])

	# freeup memory
	del ts, hs, ws, data

	# load the model
	model = model_from_json(open('/home/ubuntu/nuweth/models/' + city[0] + '/24/100/architecture.json').read())
	model.load_weights('/home/ubuntu/nuweth/models/' + city[0] + '/24/100/weights.h5')
	optimizer = rmsprop()
	model.compile(optimizer=optimizer, loss='mse')

	# predict
	predictions = model.predict(sequences.reshape((1,24,3)))

	predicted_temps = [x[0]*(temp_max[city[0].upper()] - temp_min[city[0].upper()]) + temp_mean[city[0].upper()] for x in predictions[0]]
	predicted_humidities = [x[1]*(humidity_max[city[0].upper()] - humidity_min[city[0].upper()]) + humidity_mean[city[0].upper()] for x in predictions[0]]
	predicted_speeds = [x[2]*(wind_max[city[0].upper()] - wind_min[city[0].upper()]) + wind_mean[city[0].upper()] for x in predictions[0]]

	frame = pd.DataFrame({'T': predicted_temps, 'H': predicted_humidities, 'W': predicted_speeds, 'C': city[0]})

	# add Dew points
	frame['DP'] = frame['T'] - (100 - frame['H']) / 5

	# add dates
	i = 1
	for index, row in frame.iterrows():
	frame.set_value(index, 'DateUTC', last_time + timedelta(hours=i))
	frame.set_value(index, 'Type', '24')
	i += 1

	# get months
	months = frame.DateUTC.dt.month.values
	days = frame.DateUTC.dt.day.values
	temperatures = frame['T'].values
	humidities = frame.H.values
	windspeeds = frame.W.values
	cities_ = cities_encoder.transform(frame.C.values)

	# time to predict
	X = zip(temperatures, humidities, windspeeds, months, days, cities_)
	X = np.array([list(x) for x in X])

	frame['CON'] = conditions_encoder.inverse_transform(clf.predict(X))

	# save the model
	frame.to_csv('/var/www/html/nuweth/predictions/' + city[0] + '.csv', index=False)

	# deleting model
	del model

	# now to the DB
	with open('/var/www/html/nuweth/predictions/' + city[0] + '.csv', 'r') as f:
	reader = csv.reader(f.readlines()[1:]) # exclude header line
	cur.executemany("""INSERT INTO predictions VALUES (?,?,?,?,?,?,?,?)""", (row for row in reader))
	conn.commit()

	# PREDICT FOR 3 DAYS
	for city in cities:

	data = pd.read_csv('/home/ubuntu/nuweth/data/' + city[0] + '.csv')
	d = data.tail(72)

	# get last time
	last = d.tail(1)
	last_date = last.iloc[0]['DateUTC'][1:-1].split(",") # a list
	last_time_text = last_date[0].strip() + "-" + last_date[1].strip() + "-" + last_date[2].strip() + "-" + last_date[3].strip()
	last_time = datetime.strptime(last_time_text, '%Y-%m-%d-%H')

	# normalize
	d['TemperatureC'] = (d['TemperatureC'] - temp_mean[city[0].upper()]) / (temp_max[city[0].upper()] - temp_min[city[0].upper()])
	d['Humidity'] = (d['Humidity'] - humidity_mean[city[0].upper()]) / (humidity_max[city[0].upper()] - humidity_min[city[0].upper()])
	d['Wind SpeedKm/h'] = (d['Wind SpeedKm/h'] - wind_mean[city[0].upper()]) / (wind_max[city[0].upper()] - wind_min[city[0].upper()])

	# every hour, one temperature and one humidity
	ts = d[['TemperatureC']].values.flatten()
	hs = d[['Humidity']].values.flatten()
	ws = d[['Wind SpeedKm/h']].values.flatten()

	# create sequences
	sequences = zip(ts, hs, ws)
	sequences = np.array([list(i) for i in sequences])

	# freeup memory
	del ts, hs, ws, data

	# load the model
	model = model_from_json(open('/home/ubuntu/nuweth/models/' + city[0] + '/72/100/architecture.json').read())
	model.load_weights('/home/ubuntu/nuweth/models/' + city[0] + '/72/100/weights.h5')
	optimizer = rmsprop()
	model.compile(optimizer=optimizer, loss='mse')

	# predict
	predictions = model.predict(sequences.reshape((1,72,3)))

	predicted_temps = [x[0]*(temp_max[city[0].upper()] - temp_min[city[0].upper()]) + temp_mean[city[0].upper()] for x in predictions[0]]
	predicted_humidities = [x[1]*(humidity_max[city[0].upper()] - humidity_min[city[0].upper()]) + humidity_mean[city[0].upper()] for x in predictions[0]]
	predicted_speeds = [x[2]*(wind_max[city[0].upper()] - wind_min[city[0].upper()]) + wind_mean[city[0].upper()] for x in predictions[0]]

	frame = pd.DataFrame({'T': predicted_temps, 'H': predicted_humidities, 'W': predicted_speeds, 'C': city[0]})

	# add Dew points
	frame['DP'] = frame['T'] - (100 - frame['H']) / 5

	# add dates
	i = 1
	for index, row in frame.iterrows():
	frame.set_value(index, 'DateUTC', last_time + timedelta(hours=i))
	frame.set_value(index, 'Type', '72')
	i += 1

	# get months
	months = frame.DateUTC.dt.month.values
	days = frame.DateUTC.dt.day.values
	temperatures = frame['T'].values
	humidities = frame.H.values
	windspeeds = frame.W.values
	cities = cities_encoder.transform(frame.C.values)

	# time to predict
	X = zip(temperatures, humidities, windspeeds, months, days, cities)
	X = np.array([list(x) for x in X])

	frame['CON'] = conditions_encoder.inverse_transform(clf.predict(X))

	# save the model
	frame.to_csv('/var/www/html/nuweth/predictions/' + city[0] + '_.csv', index=False)

	# deleting model
	del model

	# now to the DB
	with open('/var/www/html/nuweth/predictions/' + city[0] + '_.csv', 'r') as f:
	reader = csv.reader(f.readlines()[1:]) # exclude header line
	cur.executemany("""INSERT INTO predictions VALUES (?,?,?,?,?,?,?,?)""", (row for row in reader))
	conn.commit()

	# close the connection
	conn.close()