-
-
Save AllieUbisse/6d77b2715f3c3609ed904e068c686af1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"IBM Capstone - ITUB4 TS Forecasting using News" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# @hidden_cell\n", | |
"# The project token is an authorization token that is used to access project resources like data sources, connections, and used by platform APIs.\n", | |
"from project_lib import Project\n", | |
"project = Project(project_id='********-****-******-****-**********', project_access_token='p-************************************')\n", | |
"pc = project.project_context" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"!pip install yfinance==0.1.54\n", | |
"!pip install pandas-datareader==0.8.1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/anthony/Capstone Advanced DS/github/Deployment/venv/lib/python3.7/site-packages/pandas_datareader/compat/__init__.py:7: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n", | |
" from pandas.util.testing import assert_frame_equal\n" | |
] | |
} | |
], | |
"source": [ | |
"from pandas_datareader import data as pdr\n", | |
"from datetime import date,timedelta\n", | |
"import yfinance as yf\n", | |
"yf.pdr_override()\n", | |
"import pandas as pd\n", | |
"import datetime \n", | |
"\n", | |
"today = date.today()\n", | |
"today = today + timedelta(days=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"date_time = '2005-04-05'\n", | |
"start = datetime.datetime.strptime(date_time, '%Y-%d-%m') " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[*********************100%***********************] 1 of 1 completed\n", | |
"[*********************100%***********************] 1 of 1 completed\n", | |
"[*********************100%***********************] 1 of 1 completed\n" | |
] | |
} | |
], | |
"source": [ | |
"### Get the datas (Itau share price, ibovespa, USDBRL)\n", | |
"TS_itau = pdr.get_data_yahoo('itub4.sa', start=start, end=today).reset_index()\n", | |
"BVSP = pdr.get_data_yahoo('^BVSP', start=start, end=today).reset_index()\n", | |
"USDBRL = pdr.get_data_yahoo('USDBRL=X', start=start, end=today).reset_index()\n", | |
"\n", | |
"### Cleanning and group by\n", | |
"TS_itau= TS_itau[['Date','Open','Close']]\n", | |
"BVSP= BVSP[['Date','Open']]\n", | |
"USDBRL= USDBRL[['Date','Open']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"TS_itau.columns = ['Date','Itau_open','Itau_Close']\n", | |
"BVSP.columns = ['Date','BVSP_open']\n", | |
"USDBRL.columns = ['Date','USDBRL_open']\n", | |
"result = pd.merge(TS_itau, BVSP, on='Date')\n", | |
"result = pd.merge(result, USDBRL, on='Date')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"s = result['Itau_Close']\n", | |
"lag_col = pd.concat([s.shift(), s.shift(2),s.shift(3)], axis=1)\n", | |
"lag_col.columns = ['lag_1','lag_2','lag_3']\n", | |
"result = pd.concat([result, lag_col], axis=1, sort=False).dropna()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Date</th>\n", | |
" <th>Itau_open</th>\n", | |
" <th>Itau_Close</th>\n", | |
" <th>BVSP_open</th>\n", | |
" <th>USDBRL_open</th>\n", | |
" <th>lag_1</th>\n", | |
" <th>lag_2</th>\n", | |
" <th>lag_3</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2006-03-29</td>\n", | |
" <td>10.9282</td>\n", | |
" <td>10.8934</td>\n", | |
" <td>36684.0</td>\n", | |
" <td>2.2270</td>\n", | |
" <td>10.9282</td>\n", | |
" <td>11.1600</td>\n", | |
" <td>11.4829</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>2006-03-30</td>\n", | |
" <td>11.0093</td>\n", | |
" <td>10.8405</td>\n", | |
" <td>37493.0</td>\n", | |
" <td>2.2117</td>\n", | |
" <td>10.8934</td>\n", | |
" <td>10.9282</td>\n", | |
" <td>11.1600</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>2006-03-31</td>\n", | |
" <td>10.8620</td>\n", | |
" <td>10.5971</td>\n", | |
" <td>37783.0</td>\n", | |
" <td>2.1853</td>\n", | |
" <td>10.8405</td>\n", | |
" <td>10.8934</td>\n", | |
" <td>10.9282</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>2006-04-03</td>\n", | |
" <td>10.7047</td>\n", | |
" <td>11.0358</td>\n", | |
" <td>37952.0</td>\n", | |
" <td>2.1618</td>\n", | |
" <td>10.5971</td>\n", | |
" <td>10.8405</td>\n", | |
" <td>10.8934</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>2006-04-04</td>\n", | |
" <td>11.0789</td>\n", | |
" <td>11.2263</td>\n", | |
" <td>38718.0</td>\n", | |
" <td>2.1370</td>\n", | |
" <td>11.0358</td>\n", | |
" <td>10.5971</td>\n", | |
" <td>10.8405</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Date Itau_open Itau_Close BVSP_open USDBRL_open lag_1 lag_2 \\\n", | |
"3 2006-03-29 10.9282 10.8934 36684.0 2.2270 10.9282 11.1600 \n", | |
"4 2006-03-30 11.0093 10.8405 37493.0 2.2117 10.8934 10.9282 \n", | |
"5 2006-03-31 10.8620 10.5971 37783.0 2.1853 10.8405 10.8934 \n", | |
"6 2006-04-03 10.7047 11.0358 37952.0 2.1618 10.5971 10.8405 \n", | |
"7 2006-04-04 11.0789 11.2263 38718.0 2.1370 11.0358 10.5971 \n", | |
"\n", | |
" lag_3 \n", | |
"3 11.4829 \n", | |
"4 11.1600 \n", | |
"5 10.9282 \n", | |
"6 10.8934 \n", | |
"7 10.8405 " | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"result.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# overwrite the TS data\n", | |
"project.save_data(data=result.to_csv(), file_name='ts_data',overwrite=True)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment