Skip to content

Instantly share code, notes, and snippets.

@kentatogashi
Last active December 15, 2018 06:34
Show Gist options
  • Save kentatogashi/67369c8208a3e2e53a6a3e4f5c42404d to your computer and use it in GitHub Desktop.
Save kentatogashi/67369c8208a3e2e53a6a3e4f5c42404d to your computer and use it in GitHub Desktop.
import pandas as pd
import matplotlib.pyplot as plt
df_orig = pd.read_csv('https://timeseries.weebly.com/uploads/2/1/0/8/21086414/_visitors.csv')
df = df_orig.copy()
df['Date'].replace('(.*)Q1', r'\1-01', regex=True, inplace=True)
df['Date'].replace('(.*)Q2', r'\1-04', regex=True, inplace=True)
df['Date'].replace('(.*)Q3', r'\1-07', regex=True, inplace=True)
df['Date'].replace('(.*)Q4', r'\1-10', regex=True, inplace=True)
df['DateTime'] = pd.to_datetime(df['Date'])
df.drop('Date', axis=1, inplace=True)
plt.plot(df['DateTime'], df['Australia'], label='Australia')
plt.plot(df['DateTime'], df['China, People\'s Republic of'], label='China, People\'s Republic of')
plt.plot(df['DateTime'], df['Japan'], label='Japan')
plt.plot(df['DateTime'], df['United Kingdom'], label='Unite Kingdom')
plt.title('Visitors to NZ')
plt.xlabel('datetime')
plt.ylabel('visitors')
plt.legend(loc='best')
plt.show()
df_orig = pd.read_csv('https://timeseries.weebly.com/uploads/2/1/0/8/21086414/_visitors.csv')
df = df_orig.copy()
df['Date'].replace('(.*)Q1', r'\1-01', regex=True, inplace=True)
df['Date'].replace('(.*)Q2', r'\1-04', regex=True, inplace=True)
df['Date'].replace('(.*)Q3', r'\1-07', regex=True, inplace=True)
df['Date'].replace('(.*)Q4', r'\1-10', regex=True, inplace=True)
df['DateTime'] = pd.to_datetime(df['Date'])
df.drop('Date', axis=1, inplace=True)
plt.plot(df['DateTime'], df['Australia'], label='Australia')
plt.plot(df['DateTime'], df['China, People\'s Republic of'], label='China, People\'s Republic of')
plt.plot(df['DateTime'], df['Japan'], label='Japan')
plt.plot(df['DateTime'], df['United Kingdom'], label='Unite Kingdom')
plt.title('Visitors to NZ')
plt.xlabel('datetime')
plt.ylabel('visitors')
plt.legend(loc='best')
plt.show()
# linear_model
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import math
size = len(df)
test_size = 20
train_data = df.Australia[:(size - test_size)]
test_data = df.Australia[-(test_size):]
trainX = train_data.index.values.reshape(-1, 1)
trainY = train_data.values.reshape(-1, 1)
testX = test_data.index.values.reshape(-1, 1)
testY = test_data.values.reshape(-1, 1)
lr = LinearRegression()
lr.fit(trainX, trainY)
predicted_trainY = lr.predict(trainX)
import math
print(f'RMSE: %.2f' % math.sqrt(mean_squared_error(trainY, predicted_trainY)))
plt.plot(trainX, trainY, label='actual')
plt.plot(trainX, predicted_trainY, label='predicted')
plt.legend(loc='best')
plt.show()
print(f'RMSE: %.2f' % math.sqrt(mean_squared_error(testY, predicted_testY)))
plt.plot(testX, testY, label='actual')
plt.plot(testX, predicted_testY, label='predicted')
plt.legend(loc='best')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment