Skip to content

Instantly share code, notes, and snippets.

@seahrh
Last active August 1, 2020 08:43
Show Gist options
  • Save seahrh/8ab327f9e3db8b7f947928d27d3ead96 to your computer and use it in GitHub Desktop.
Save seahrh/8ab327f9e3db8b7f947928d27d3ead96 to your computer and use it in GitHub Desktop.
ARIMA / SARIMA examples (forecast method works but not predict)
#!/bin/python3
import math
import os
import random
import re
import sys
import pandas as pd
from statsmodels.tsa.arima_model import ARIMA
from typing import List
def predictTemperature(startDate: str, endDate: str, temperature: List[float], n: int) -> List[float]:
index = pd.period_range(start=startDate + " 00:00", end=endDate + " 23:00", freq='H')
x = pd.Series(temperature, index=index)
model = ARIMA(x, order=(1, 1, 1))
model = model.fit(disp=False)
y = model.forecast(n * 24)[0]
return y
#!/bin/python3
import math
import os
import random
import re
import sys
from datetime import datetime, date
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA
from typing import List
_DATE_FORMAT = "%m/%d/%Y %H:%M:%S"
def calcMissing(readings):
dates: List[str] = []
values: List[float] = []
for r in readings:
d, v = r.split('\t')
#d = datetime.strptime(d, _DATE_FORMAT).date()
#print(f'd={d}, v={v}')
if v.startswith('Missing_'):
x_train = pd.Series(values, index=pd.PeriodIndex(dates, freq='D'), dtype=np.float32)
#model = ARIMA(x_train, order=(1, 2, 1))
model = sm.tsa.statespace.SARIMAX(x_train,
order=(1, 1, 1),
seasonal_order=(1, 1, 1, 12),
enforce_invertibility=True
)
model = model.fit(disp=False)
y = model.forecast()
print(y)
#print(round(y, 2))
continue
values.append(v)
dates.append(d)
#print(f'len={len(missing)}, missing={missing}')
#print(x_train.head())
#print(model.summary())
if __name__ == '__main__':
readings_count = int(input().strip())
readings = []
for _ in range(readings_count):
readings_item = input()
readings.append(readings_item)
calcMissing(readings)
from datetime import datetime, timedelta
import pandas as pd
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA
from typing import List
def predict(startDate: str, endDate: str, knownTimestamps: List[str], values: List[float], timestamps: List[str]) -> List[float]:
print(f'startDate={startDate}')
print(f'endDate={endDate}')
print(f'knownTimestamps={knownTimestamps}')
print(f'humidity={humidity}')
print(f'timestamps={timestamps}')
res: List[float] = []
#index = pd.period_range(start=startDate + " 00:00", end=endDate + " 23:00", freq='H')
ts_format = '%Y-%m-%d %H:%M'
to_predict: Set[datetime] = set()
for ts in timestamps:
dt = datetime.strptime(ts, ts_format)
to_predict.add(dt)
print(f'to_predict={to_predict}')
dt = datetime.strptime(knownTimestamps[0], ts_format)
i = 1
index = pd.PeriodIndex(knownTimestamps, freq='H')
x = pd.Series(values, index=index)
#order = sm.tsa.stattools.arma_order_select_ic(x, ic=['aic'])
#print(f'aic_min_order={order.aic_min_order}')
while len(to_predict) != 0:
next_hour = dt + timedelta(hours=1)
if next_hour in to_predict:
#index = pd.PeriodIndex(knownTimestamps[:i], freq='H')
#x = pd.Series(humidity[:i], index=index)
#model = ARIMA(x, order=(2, 2, 1))
model = sm.tsa.statespace.SARIMAX(x[:i], trend='c', order=(1, 1, 1))
model = model.fit(disp=False)
y_pred = model.forecast(1)[0]
res.append(y_pred)
to_predict.remove(next_hour)
continue
dt = next_hour
i += 1
return res
#!/bin/python3
import math
import os
import random
import re
import sys
from datetime import datetime, date
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA
from typing import List
_DATE_FORMAT = "%m/%d/%Y %H:%M:%S"
def calcMissing(readings):
dates: List[str] = []
values: List[float] = []
missing: List[str] = []
for r in readings:
d, v = r.split('\t')
#d = datetime.strptime(d, _DATE_FORMAT).date()
#print(f'd={d}, v={v}')
if v.startswith('Missing_'):
missing.append(d)
continue
values.append(v)
dates.append(d)
#print(f'len={len(missing)}, missing={missing}')
#print(x_train.head())
x_train = pd.Series(values, index=pd.PeriodIndex(dates, freq='D'), dtype=np.float32)
#model = ARIMA(x_train, order=(1, 2, 1))
model = sm.tsa.statespace.SARIMAX(x_train,
order=(1, 1, 1),
seasonal_order=(1, 1, 1, 12),
enforce_invertibility=True
)
model = model.fit(disp=False)
#print(model.summary())
for m in missing:
start = pd.Period(datetime.strptime(m, _DATE_FORMAT).date(), freq='D')
y = model.predict(start=start, end=start)
print(y)
#print(round(y, 2))
if __name__ == '__main__':
readings_count = int(input().strip())
readings = []
for _ in range(readings_count):
readings_item = input()
readings.append(readings_item)
calcMissing(readings)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment