Last active
July 21, 2021 01:18
-
-
Save audhiaprilliant/ed67535b1380a252c6d9fd8f500d48bf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# IHSG Pre-processing Data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Audhi Aprilliant" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## 1 Import Libraries" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 89, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-17T20:35:29.132953Z", | |
| "start_time": "2020-03-17T20:35:29.126113Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd # Dataframe manipulation\n", | |
| "import numpy as np # Mathematics operation\n", | |
| "import datetime" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Load the Data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 90, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-17T20:35:29.210984Z", | |
| "start_time": "2020-03-17T20:35:29.138470Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "ihsg_data = pd.read_csv('Datasets/^JKSE.csv')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 91, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-17T20:35:29.315803Z", | |
| "start_time": "2020-03-17T20:35:29.215699Z" | |
| }, | |
| "scrolled": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Dimension of financial news:\n", | |
| "257 rows and 7 columns\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Date</th>\n", | |
| " <th>Open</th>\n", | |
| " <th>High</th>\n", | |
| " <th>Low</th>\n", | |
| " <th>Close</th>\n", | |
| " <th>Adj Close</th>\n", | |
| " <th>Volume</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>2019-03-08</td>\n", | |
| " <td>6439.840820</td>\n", | |
| " <td>6442.204102</td>\n", | |
| " <td>6381.730957</td>\n", | |
| " <td>6383.067871</td>\n", | |
| " <td>6383.067871</td>\n", | |
| " <td>92588600.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>2019-03-11</td>\n", | |
| " <td>6418.882813</td>\n", | |
| " <td>6420.782227</td>\n", | |
| " <td>6366.039063</td>\n", | |
| " <td>6366.434082</td>\n", | |
| " <td>6366.434082</td>\n", | |
| " <td>97316700.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>2019-03-12</td>\n", | |
| " <td>6395.880859</td>\n", | |
| " <td>6404.430176</td>\n", | |
| " <td>6339.967773</td>\n", | |
| " <td>6353.773926</td>\n", | |
| " <td>6353.773926</td>\n", | |
| " <td>84514000.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>2019-03-13</td>\n", | |
| " <td>6343.121094</td>\n", | |
| " <td>6377.575195</td>\n", | |
| " <td>6337.874023</td>\n", | |
| " <td>6377.575195</td>\n", | |
| " <td>6377.575195</td>\n", | |
| " <td>100029400.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>2019-03-14</td>\n", | |
| " <td>6374.916992</td>\n", | |
| " <td>6413.266113</td>\n", | |
| " <td>6372.964844</td>\n", | |
| " <td>6413.266113</td>\n", | |
| " <td>6413.266113</td>\n", | |
| " <td>96692700.0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>2019-03-15</td>\n", | |
| " <td>6420.182129</td>\n", | |
| " <td>6489.194824</td>\n", | |
| " <td>6419.644043</td>\n", | |
| " <td>6461.183105</td>\n", | |
| " <td>6461.183105</td>\n", | |
| " <td>122938800.0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Date Open High Low Close \\\n", | |
| "0 2019-03-08 6439.840820 6442.204102 6381.730957 6383.067871 \n", | |
| "1 2019-03-11 6418.882813 6420.782227 6366.039063 6366.434082 \n", | |
| "2 2019-03-12 6395.880859 6404.430176 6339.967773 6353.773926 \n", | |
| "3 2019-03-13 6343.121094 6377.575195 6337.874023 6377.575195 \n", | |
| "4 2019-03-14 6374.916992 6413.266113 6372.964844 6413.266113 \n", | |
| "5 2019-03-15 6420.182129 6489.194824 6419.644043 6461.183105 \n", | |
| "\n", | |
| " Adj Close Volume \n", | |
| "0 6383.067871 92588600.0 \n", | |
| "1 6366.434082 97316700.0 \n", | |
| "2 6353.773926 84514000.0 \n", | |
| "3 6377.575195 100029400.0 \n", | |
| "4 6413.266113 96692700.0 \n", | |
| "5 6461.183105 122938800.0 " | |
| ] | |
| }, | |
| "execution_count": 91, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "print('Dimension of financial news:\\n{}'.format(ihsg_data.shape[0]),\n", | |
| " 'rows and {}'.format(ihsg_data.shape[1]),'columns')\n", | |
| "ihsg_data.head(6)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 92, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-17T20:35:29.406594Z", | |
| "start_time": "2020-03-17T20:35:29.324102Z" | |
| }, | |
| "scrolled": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "<class 'pandas.core.frame.DataFrame'>\n", | |
| "RangeIndex: 257 entries, 0 to 256\n", | |
| "Data columns (total 7 columns):\n", | |
| "Date 257 non-null object\n", | |
| "Open 247 non-null float64\n", | |
| "High 247 non-null float64\n", | |
| "Low 247 non-null float64\n", | |
| "Close 247 non-null float64\n", | |
| "Adj Close 247 non-null float64\n", | |
| "Volume 247 non-null float64\n", | |
| "dtypes: float64(6), object(1)\n", | |
| "memory usage: 14.2+ KB\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "ihsg_data.info()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 93, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-17T20:35:29.472012Z", | |
| "start_time": "2020-03-17T20:35:29.412660Z" | |
| }, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Date 0\n", | |
| "Open 10\n", | |
| "High 10\n", | |
| "Low 10\n", | |
| "Close 10\n", | |
| "Adj Close 10\n", | |
| "Volume 10\n", | |
| "dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 93, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Check missing value\n", | |
| "ihsg_data.isna().sum()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## 2 Function to Imput the Date and Time" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 94, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-17T20:35:29.527054Z", | |
| "start_time": "2020-03-17T20:35:29.477958Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def imput_date(df):\n", | |
| " start = datetime.datetime.strptime(df.loc[0,'Date'],'%Y-%m-%d')\n", | |
| " end = datetime.datetime.strptime(df.loc[df.shape[0] - 1,'Date'],'%Y-%m-%d')\n", | |
| " list_date = pd.date_range(start,end).strftime('%Y-%m-%d').tolist()\n", | |
| " pd_date = pd.DataFrame(df,list_date)\n", | |
| " pd_date['Date'] = pd_date.index.astype(object)\n", | |
| " pd_date = pd_date.reset_index(drop=True)\n", | |
| " df_date = pd.merge(pd_date['Date'],df,on='Date',how='left')\n", | |
| " return df_date" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## 2 Function to Calculate the Return of IHSG" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 95, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-17T20:35:29.595903Z", | |
| "start_time": "2020-03-17T20:35:29.538917Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def return_ihsg(df):\n", | |
| " df_no_na = df.dropna().reset_index(drop=True)\n", | |
| " return_data = [0]\n", | |
| " index_data = list(df_no_na.index)\n", | |
| " for i in range(1,len(index_data)):\n", | |
| " return_i = (df_no_na['Adj Close'][index_data[i]] - \n", | |
| " df_no_na['Adj Close'][index_data[i-1]])/df_no_na['Adj Close'][index_data[i]]\n", | |
| " return_data.append(return_i)\n", | |
| " df_no_na = pd.concat([df_no_na['Date'],pd.DataFrame(return_data)],axis=1)\n", | |
| " df_full = pd.merge(df,df_no_na,on='Date',how='left')\n", | |
| " df_full = df_full.rename(columns = {0:'Return'})\n", | |
| " return df_full" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-04T09:24:24.082912Z", | |
| "start_time": "2020-03-04T09:24:24.066687Z" | |
| } | |
| }, | |
| "source": [ | |
| "## 3 Function to Apply Curve Function" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 96, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-17T20:35:29.664193Z", | |
| "start_time": "2020-03-17T20:35:29.603110Z" | |
| }, | |
| "scrolled": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "def curve_function(df):\n", | |
| " for i in df.columns:\n", | |
| " while df[i].isna().sum() > 0:\n", | |
| " for j in range(df.shape[0]):\n", | |
| " if pd.isnull(df.loc[j,i]):\n", | |
| " seq_k = [j]\n", | |
| " k = j\n", | |
| " while pd.isnull(df.loc[k,i]):\n", | |
| " k = k + 1\n", | |
| " seq_k.append(k)\n", | |
| " if len(seq_k) % 2 == 0:\n", | |
| " df.loc[seq_k[int((len(seq_k) - 1)/2)],i] = (df.loc[j - 1,i] + \n", | |
| " df.loc[seq_k[len(seq_k) - 1],i])/2\n", | |
| " else:\n", | |
| " df.loc[seq_k[int((len(seq_k) - 1)/2)],i] = (df.loc[j - 1,i] + \n", | |
| " df.loc[seq_k[len(seq_k) - 1],i])/2\n", | |
| " else:\n", | |
| " df.loc[j,i] = df.loc[j,i]\n", | |
| " return(df)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## 4 Apply Functions Above" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 97, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-17T20:35:29.875304Z", | |
| "start_time": "2020-03-17T20:35:29.670020Z" | |
| }, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Dimension of financial news:\n", | |
| "257 rows and 8 columns\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Date</th>\n", | |
| " <th>Open</th>\n", | |
| " <th>High</th>\n", | |
| " <th>Low</th>\n", | |
| " <th>Close</th>\n", | |
| " <th>Adj Close</th>\n", | |
| " <th>Volume</th>\n", | |
| " <th>Return</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>2019-03-08</td>\n", | |
| " <td>6439.840820</td>\n", | |
| " <td>6442.204102</td>\n", | |
| " <td>6381.730957</td>\n", | |
| " <td>6383.067871</td>\n", | |
| " <td>6383.067871</td>\n", | |
| " <td>92588600.0</td>\n", | |
| " <td>0.000000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>2019-03-11</td>\n", | |
| " <td>6418.882813</td>\n", | |
| " <td>6420.782227</td>\n", | |
| " <td>6366.039063</td>\n", | |
| " <td>6366.434082</td>\n", | |
| " <td>6366.434082</td>\n", | |
| " <td>97316700.0</td>\n", | |
| " <td>-0.002613</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>2019-03-12</td>\n", | |
| " <td>6395.880859</td>\n", | |
| " <td>6404.430176</td>\n", | |
| " <td>6339.967773</td>\n", | |
| " <td>6353.773926</td>\n", | |
| " <td>6353.773926</td>\n", | |
| " <td>84514000.0</td>\n", | |
| " <td>-0.001993</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>2019-03-13</td>\n", | |
| " <td>6343.121094</td>\n", | |
| " <td>6377.575195</td>\n", | |
| " <td>6337.874023</td>\n", | |
| " <td>6377.575195</td>\n", | |
| " <td>6377.575195</td>\n", | |
| " <td>100029400.0</td>\n", | |
| " <td>0.003732</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>2019-03-14</td>\n", | |
| " <td>6374.916992</td>\n", | |
| " <td>6413.266113</td>\n", | |
| " <td>6372.964844</td>\n", | |
| " <td>6413.266113</td>\n", | |
| " <td>6413.266113</td>\n", | |
| " <td>96692700.0</td>\n", | |
| " <td>0.005565</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Date Open High Low Close \\\n", | |
| "0 2019-03-08 6439.840820 6442.204102 6381.730957 6383.067871 \n", | |
| "1 2019-03-11 6418.882813 6420.782227 6366.039063 6366.434082 \n", | |
| "2 2019-03-12 6395.880859 6404.430176 6339.967773 6353.773926 \n", | |
| "3 2019-03-13 6343.121094 6377.575195 6337.874023 6377.575195 \n", | |
| "4 2019-03-14 6374.916992 6413.266113 6372.964844 6413.266113 \n", | |
| "\n", | |
| " Adj Close Volume Return \n", | |
| "0 6383.067871 92588600.0 0.000000 \n", | |
| "1 6366.434082 97316700.0 -0.002613 \n", | |
| "2 6353.773926 84514000.0 -0.001993 \n", | |
| "3 6377.575195 100029400.0 0.003732 \n", | |
| "4 6413.266113 96692700.0 0.005565 " | |
| ] | |
| }, | |
| "execution_count": 97, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "ihsg_data_clean = return_ihsg(ihsg_data)\n", | |
| "print('Dimension of financial news:\\n{}'.format(ihsg_data_clean.shape[0]),\n", | |
| " 'rows and {}'.format(ihsg_data_clean.shape[1]),'columns')\n", | |
| "ihsg_data_clean.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 98, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-17T20:35:29.963673Z", | |
| "start_time": "2020-03-17T20:35:29.881112Z" | |
| } | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Dimension of financial news:\n", | |
| "365 rows and 8 columns\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Date</th>\n", | |
| " <th>Open</th>\n", | |
| " <th>High</th>\n", | |
| " <th>Low</th>\n", | |
| " <th>Close</th>\n", | |
| " <th>Adj Close</th>\n", | |
| " <th>Volume</th>\n", | |
| " <th>Return</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>2019-03-08</td>\n", | |
| " <td>6439.840820</td>\n", | |
| " <td>6442.204102</td>\n", | |
| " <td>6381.730957</td>\n", | |
| " <td>6383.067871</td>\n", | |
| " <td>6383.067871</td>\n", | |
| " <td>92588600.0</td>\n", | |
| " <td>0.000000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>2019-03-09</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>2019-03-10</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>2019-03-11</td>\n", | |
| " <td>6418.882813</td>\n", | |
| " <td>6420.782227</td>\n", | |
| " <td>6366.039063</td>\n", | |
| " <td>6366.434082</td>\n", | |
| " <td>6366.434082</td>\n", | |
| " <td>97316700.0</td>\n", | |
| " <td>-0.002613</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>2019-03-12</td>\n", | |
| " <td>6395.880859</td>\n", | |
| " <td>6404.430176</td>\n", | |
| " <td>6339.967773</td>\n", | |
| " <td>6353.773926</td>\n", | |
| " <td>6353.773926</td>\n", | |
| " <td>84514000.0</td>\n", | |
| " <td>-0.001993</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Date Open High Low Close \\\n", | |
| "0 2019-03-08 6439.840820 6442.204102 6381.730957 6383.067871 \n", | |
| "1 2019-03-09 NaN NaN NaN NaN \n", | |
| "2 2019-03-10 NaN NaN NaN NaN \n", | |
| "3 2019-03-11 6418.882813 6420.782227 6366.039063 6366.434082 \n", | |
| "4 2019-03-12 6395.880859 6404.430176 6339.967773 6353.773926 \n", | |
| "\n", | |
| " Adj Close Volume Return \n", | |
| "0 6383.067871 92588600.0 0.000000 \n", | |
| "1 NaN NaN NaN \n", | |
| "2 NaN NaN NaN \n", | |
| "3 6366.434082 97316700.0 -0.002613 \n", | |
| "4 6353.773926 84514000.0 -0.001993 " | |
| ] | |
| }, | |
| "execution_count": 98, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "ihsg_data_clean = imput_date(ihsg_data_clean)\n", | |
| "print('Dimension of financial news:\\n{}'.format(ihsg_data_clean.shape[0]),\n", | |
| " 'rows and {}'.format(ihsg_data_clean.shape[1]),'columns')\n", | |
| "ihsg_data_clean.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 99, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-17T20:35:42.479250Z", | |
| "start_time": "2020-03-17T20:35:29.969846Z" | |
| }, | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Dimension of financial news:\n", | |
| "365 rows and 8 columns\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Date</th>\n", | |
| " <th>Open</th>\n", | |
| " <th>High</th>\n", | |
| " <th>Low</th>\n", | |
| " <th>Close</th>\n", | |
| " <th>Adj Close</th>\n", | |
| " <th>Volume</th>\n", | |
| " <th>Return</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>2019-03-08</td>\n", | |
| " <td>6439.840820</td>\n", | |
| " <td>6442.204102</td>\n", | |
| " <td>6381.730957</td>\n", | |
| " <td>6383.067871</td>\n", | |
| " <td>6383.067871</td>\n", | |
| " <td>92588600.0</td>\n", | |
| " <td>0.000000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>2019-03-09</td>\n", | |
| " <td>6434.601318</td>\n", | |
| " <td>6436.848633</td>\n", | |
| " <td>6377.807983</td>\n", | |
| " <td>6378.909424</td>\n", | |
| " <td>6378.909424</td>\n", | |
| " <td>93770625.0</td>\n", | |
| " <td>-0.000653</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>2019-03-10</td>\n", | |
| " <td>6429.361817</td>\n", | |
| " <td>6431.493165</td>\n", | |
| " <td>6373.885010</td>\n", | |
| " <td>6374.750976</td>\n", | |
| " <td>6374.750976</td>\n", | |
| " <td>94952650.0</td>\n", | |
| " <td>-0.001306</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>2019-03-11</td>\n", | |
| " <td>6418.882813</td>\n", | |
| " <td>6420.782227</td>\n", | |
| " <td>6366.039063</td>\n", | |
| " <td>6366.434082</td>\n", | |
| " <td>6366.434082</td>\n", | |
| " <td>97316700.0</td>\n", | |
| " <td>-0.002613</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>2019-03-12</td>\n", | |
| " <td>6395.880859</td>\n", | |
| " <td>6404.430176</td>\n", | |
| " <td>6339.967773</td>\n", | |
| " <td>6353.773926</td>\n", | |
| " <td>6353.773926</td>\n", | |
| " <td>84514000.0</td>\n", | |
| " <td>-0.001993</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " Date Open High Low Close \\\n", | |
| "0 2019-03-08 6439.840820 6442.204102 6381.730957 6383.067871 \n", | |
| "1 2019-03-09 6434.601318 6436.848633 6377.807983 6378.909424 \n", | |
| "2 2019-03-10 6429.361817 6431.493165 6373.885010 6374.750976 \n", | |
| "3 2019-03-11 6418.882813 6420.782227 6366.039063 6366.434082 \n", | |
| "4 2019-03-12 6395.880859 6404.430176 6339.967773 6353.773926 \n", | |
| "\n", | |
| " Adj Close Volume Return \n", | |
| "0 6383.067871 92588600.0 0.000000 \n", | |
| "1 6378.909424 93770625.0 -0.000653 \n", | |
| "2 6374.750976 94952650.0 -0.001306 \n", | |
| "3 6366.434082 97316700.0 -0.002613 \n", | |
| "4 6353.773926 84514000.0 -0.001993 " | |
| ] | |
| }, | |
| "execution_count": 99, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "ihsg_data_curve = curve_function(ihsg_data_clean)\n", | |
| "print('Dimension of financial news:\\n{}'.format(ihsg_data_curve.shape[0]),\n", | |
| " 'rows and {}'.format(ihsg_data_curve.shape[1]),'columns')\n", | |
| "ihsg_data_curve.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Save Data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 100, | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-17T20:35:42.592819Z", | |
| "start_time": "2020-03-17T20:35:42.484353Z" | |
| } | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "ihsg_data_curve.to_csv(\"Datasets/interim/Clean Data of JCI 2019.csv\",index=False)" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.9" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment