Created
September 4, 2017 12:28
-
-
Save libinruan/dad14eec3d194512b4aebc458bad0ae5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"The reason we use csv module instead of default Python i/o functions. \n", | |
"Here is an example." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Let's start with a cursory preview without any modules\n", | |
"path = \"E:\\\\GoogleDrive\\\\github\\\\tutorial\\\\google_stock_data_20042014.csv\"\n", | |
"file = open(path)\n", | |
"for line in file:\n", | |
" print(line)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'Date,Open,High,Low,Close,Volume,Adj Close\\n'" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Index each line of a file with list comprehensions\n", | |
"file = open(path)\n", | |
"lines = [line for line in file]\n", | |
"# Then, we can read each line by index; \n", | |
"# each line is treated as a single string \n", | |
"# (there's a new line character included in the end)\n", | |
"lines[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'8/19/2014,585.002622,587.342658,584.002627,586.862643,978600,586.862643\\n'" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lines[1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'Date,Open,High,Low,Close,Volume,Adj Close'" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# We can use strip to remove any leading and trailing white space\n", | |
"lines[0].strip()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close']" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# We can further use split function to divide the string into small pieces\n", | |
"lines[0].strip().split(',')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close'],\n", | |
" ['8/19/2014',\n", | |
" '585.002622',\n", | |
" '587.342658',\n", | |
" '584.002627',\n", | |
" '586.862643',\n", | |
" '978600',\n", | |
" '586.862643']]" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Let's read the file once more with trick we just saw\n", | |
"file = open(path)\n", | |
"dataset = [line.strip().split(',') for line in file]\n", | |
"dataset[:2]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Problem: Data remain treated as string. \n", | |
"Solution: Use csv module." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['Dialect', 'DictReader', 'DictWriter', 'Error', 'QUOTE_ALL', 'QUOTE_MINIMAL', 'QUOTE_NONE', 'QUOTE_NONNUMERIC', 'Sniffer', 'StringIO', '_Dialect', '__all__', '__builtins__', '__doc__', '__file__', '__name__', '__package__', '__version__', 'excel', 'excel_tab', 'field_size_limit', 'get_dialect', 'list_dialects', 're', 'reader', 'reduce', 'register_dialect', 'unregister_dialect', 'writer']\n" | |
] | |
} | |
], | |
"source": [ | |
"import csv\n", | |
"'''When use a module for the first time, it is always useful to use 'dir'\n", | |
"to see what classes and function the module contains'''\n", | |
"print(dir(csv))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 53, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"'''use newline='' in Python 3.0 becasue depending on your system strings may \n", | |
"end with a new line carraige return or both. This ensures the csv \n", | |
"module will work correctly across all platfomrs'''\n", | |
"file = open(path)\n", | |
"reader = csv.reader(file)\n", | |
"# use next function to extract the first line which is header containing no data\n", | |
"header = next(reader) # The first line is the header\n", | |
"data = [row for row in reader] # Read the remaing data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 55, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close']\n", | |
"['8/19/2014', '585.002622', '587.342658', '584.002627', '586.862643', '978600', '586.862643']\n" | |
] | |
} | |
], | |
"source": [ | |
"print(header)\n", | |
"print(data[0])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"To convert data into appropriate type" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 71, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[datetime.datetime(2014, 8, 19, 0, 0), 585.002622, 587.342658, 584.002627, 586.862643, 978600, 586.862643]\n" | |
] | |
} | |
], | |
"source": [ | |
"# import the datetime class from the datetime module. you'll see why.\n", | |
"from datetime import datetime \n", | |
"import csv\n", | |
"file = open(path)\n", | |
"reader = csv.reader(file)\n", | |
"header = next(reader)\n", | |
"data = []\n", | |
"'''row = [Date, Open, High, Low, Close, Volume, Adj. Close ]\n", | |
" 'Date' is a datetime; the rest are of type floats \n", | |
" except 'Volume' which is an integer'''\n", | |
"''' We use strptime (short for string parse time) \n", | |
" to convert the datetime column'''\n", | |
"for row in reader:\n", | |
" date = datetime.strptime(row[0], '%m/%d/%Y')\n", | |
" open_price = float(row[1])\n", | |
" high = float(row[2])\n", | |
" low = float(row[3])\n", | |
" close = float(row[4])\n", | |
" volume = int(row[5])\n", | |
" adj_close = float(row[6])\n", | |
" data.append([date,open_price,high,low,close,volume,adj_close])\n", | |
"\n", | |
"print(data[0])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 85, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# compute and store daily Google stock returns\n", | |
"save_path = \"E:\\\\GoogleDrive\\\\github\\\\tutorial\\\\google_stock_data_20042014_daily_returns.csv\"\n", | |
"file = open(save_path,'w')\n", | |
"# create a csv.writer object to store our computed results\n", | |
"writer = csv.writer(file)\n", | |
"# to write a row call the write row method with a list of values\n", | |
"# write the header first\n", | |
"writer.writerow([\"Date\",\"Return\"])\n", | |
"# our data is luckily already sorted chronologically so\n", | |
"# we can simply loop through it.\n", | |
"# Since we use stock prices of consecutive days to compute returns, \n", | |
"# we stop at the second-to-last row.\n", | |
"for i in range(len(data)-1): \n", | |
" todays_row = data[i]\n", | |
" todays_date = todays_row[0]\n", | |
" todays_price = todays_row[-1] # the last element in the list (adj_close)\n", | |
" yesterdays_row = data[i+1] # data is arranged starting recently\n", | |
" yesterdays_price = yesterdays_row[-1]\n", | |
" daily_return = (todays_price-yesterdays_price)/yesterdays_price\n", | |
" \n", | |
" #The reuslt would not be listed in a user friedly way without\n", | |
" #using the string format time method on today's date and pass in the desired format\n", | |
" formatted_date = todays_date.strftime('%m/%d/%Y')\n", | |
" writer.writerow([formatted_date,daily_return])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Now, let's turn to pandas module to see how it makes our life better \n", | |
"It only takes four command lines to perfrom the same task." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Date</th>\n", | |
" <th>Open</th>\n", | |
" <th>High</th>\n", | |
" <th>Low</th>\n", | |
" <th>Close</th>\n", | |
" <th>Volume</th>\n", | |
" <th>Adj Close</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>8/19/2014</td>\n", | |
" <td>585.002622</td>\n", | |
" <td>587.342658</td>\n", | |
" <td>584.002627</td>\n", | |
" <td>586.862643</td>\n", | |
" <td>978600</td>\n", | |
" <td>586.862643</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>8/18/2014</td>\n", | |
" <td>576.112580</td>\n", | |
" <td>584.512631</td>\n", | |
" <td>576.002598</td>\n", | |
" <td>582.162619</td>\n", | |
" <td>1284100</td>\n", | |
" <td>582.162619</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>8/15/2014</td>\n", | |
" <td>577.862619</td>\n", | |
" <td>579.382595</td>\n", | |
" <td>570.522603</td>\n", | |
" <td>573.482626</td>\n", | |
" <td>1519100</td>\n", | |
" <td>573.482626</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>8/14/2014</td>\n", | |
" <td>576.182596</td>\n", | |
" <td>577.902645</td>\n", | |
" <td>570.882599</td>\n", | |
" <td>574.652582</td>\n", | |
" <td>985400</td>\n", | |
" <td>574.652582</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>8/13/2014</td>\n", | |
" <td>567.312567</td>\n", | |
" <td>575.002602</td>\n", | |
" <td>565.752564</td>\n", | |
" <td>574.782577</td>\n", | |
" <td>1439200</td>\n", | |
" <td>574.782577</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Date Open High Low Close Volume \\\n", | |
"0 8/19/2014 585.002622 587.342658 584.002627 586.862643 978600 \n", | |
"1 8/18/2014 576.112580 584.512631 576.002598 582.162619 1284100 \n", | |
"2 8/15/2014 577.862619 579.382595 570.522603 573.482626 1519100 \n", | |
"3 8/14/2014 576.182596 577.902645 570.882599 574.652582 985400 \n", | |
"4 8/13/2014 567.312567 575.002602 565.752564 574.782577 1439200 \n", | |
"\n", | |
" Adj Close \n", | |
"0 586.862643 \n", | |
"1 582.162619 \n", | |
"2 573.482626 \n", | |
"3 574.652582 \n", | |
"4 574.782577 " | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"path = \"E:\\\\GoogleDrive\\\\github\\\\tutorial\\\\google_stock_data_20042014.csv\"\n", | |
"df = pd.read_csv(path)\n", | |
"df.head(5)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 92, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.frame.DataFrame'>\n", | |
"RangeIndex: 2518 entries, 0 to 2517\n", | |
"Data columns (total 7 columns):\n", | |
"Date 2518 non-null object\n", | |
"Open 2518 non-null float64\n", | |
"High 2518 non-null float64\n", | |
"Low 2518 non-null float64\n", | |
"Close 2518 non-null float64\n", | |
"Volume 2518 non-null int64\n", | |
"Adj Close 2518 non-null float64\n", | |
"dtypes: float64(5), int64(1), object(1)\n", | |
"memory usage: 137.8+ KB\n", | |
"None\n" | |
] | |
} | |
], | |
"source": [ | |
"# See panadas already correctly identifies object type for each data column\n", | |
"print(df.info())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 100, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Date</th>\n", | |
" <th>Open</th>\n", | |
" <th>High</th>\n", | |
" <th>Low</th>\n", | |
" <th>Close</th>\n", | |
" <th>Volume</th>\n", | |
" <th>Adj Close</th>\n", | |
" <th>daily return</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>8/19/2014</td>\n", | |
" <td>585.002622</td>\n", | |
" <td>587.342658</td>\n", | |
" <td>584.002627</td>\n", | |
" <td>586.862643</td>\n", | |
" <td>978600</td>\n", | |
" <td>586.862643</td>\n", | |
" <td>0.008073</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>8/18/2014</td>\n", | |
" <td>576.112580</td>\n", | |
" <td>584.512631</td>\n", | |
" <td>576.002598</td>\n", | |
" <td>582.162619</td>\n", | |
" <td>1284100</td>\n", | |
" <td>582.162619</td>\n", | |
" <td>0.015136</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>8/15/2014</td>\n", | |
" <td>577.862619</td>\n", | |
" <td>579.382595</td>\n", | |
" <td>570.522603</td>\n", | |
" <td>573.482626</td>\n", | |
" <td>1519100</td>\n", | |
" <td>573.482626</td>\n", | |
" <td>-0.002036</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>8/14/2014</td>\n", | |
" <td>576.182596</td>\n", | |
" <td>577.902645</td>\n", | |
" <td>570.882599</td>\n", | |
" <td>574.652582</td>\n", | |
" <td>985400</td>\n", | |
" <td>574.652582</td>\n", | |
" <td>-0.000226</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>8/13/2014</td>\n", | |
" <td>567.312567</td>\n", | |
" <td>575.002602</td>\n", | |
" <td>565.752564</td>\n", | |
" <td>574.782577</td>\n", | |
" <td>1439200</td>\n", | |
" <td>574.782577</td>\n", | |
" <td>0.021413</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>8/12/2014</td>\n", | |
" <td>564.522567</td>\n", | |
" <td>565.902572</td>\n", | |
" <td>560.882518</td>\n", | |
" <td>562.732562</td>\n", | |
" <td>1542000</td>\n", | |
" <td>562.732562</td>\n", | |
" <td>-0.009069</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>8/11/2014</td>\n", | |
" <td>569.992585</td>\n", | |
" <td>570.492553</td>\n", | |
" <td>566.002578</td>\n", | |
" <td>567.882551</td>\n", | |
" <td>1214700</td>\n", | |
" <td>567.882551</td>\n", | |
" <td>-0.001565</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>8/8/2014</td>\n", | |
" <td>563.562536</td>\n", | |
" <td>570.252576</td>\n", | |
" <td>560.352561</td>\n", | |
" <td>568.772565</td>\n", | |
" <td>1494700</td>\n", | |
" <td>568.772565</td>\n", | |
" <td>0.009603</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>8/7/2014</td>\n", | |
" <td>568.002570</td>\n", | |
" <td>569.892580</td>\n", | |
" <td>561.102543</td>\n", | |
" <td>563.362525</td>\n", | |
" <td>1110900</td>\n", | |
" <td>563.362525</td>\n", | |
" <td>-0.005322</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>8/6/2014</td>\n", | |
" <td>561.782569</td>\n", | |
" <td>570.702601</td>\n", | |
" <td>560.002541</td>\n", | |
" <td>566.376589</td>\n", | |
" <td>1334300</td>\n", | |
" <td>566.376589</td>\n", | |
" <td>0.002308</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Date Open High Low Close Volume \\\n", | |
"0 8/19/2014 585.002622 587.342658 584.002627 586.862643 978600 \n", | |
"1 8/18/2014 576.112580 584.512631 576.002598 582.162619 1284100 \n", | |
"2 8/15/2014 577.862619 579.382595 570.522603 573.482626 1519100 \n", | |
"3 8/14/2014 576.182596 577.902645 570.882599 574.652582 985400 \n", | |
"4 8/13/2014 567.312567 575.002602 565.752564 574.782577 1439200 \n", | |
"5 8/12/2014 564.522567 565.902572 560.882518 562.732562 1542000 \n", | |
"6 8/11/2014 569.992585 570.492553 566.002578 567.882551 1214700 \n", | |
"7 8/8/2014 563.562536 570.252576 560.352561 568.772565 1494700 \n", | |
"8 8/7/2014 568.002570 569.892580 561.102543 563.362525 1110900 \n", | |
"9 8/6/2014 561.782569 570.702601 560.002541 566.376589 1334300 \n", | |
"\n", | |
" Adj Close daily return \n", | |
"0 586.862643 0.008073 \n", | |
"1 582.162619 0.015136 \n", | |
"2 573.482626 -0.002036 \n", | |
"3 574.652582 -0.000226 \n", | |
"4 574.782577 0.021413 \n", | |
"5 562.732562 -0.009069 \n", | |
"6 567.882551 -0.001565 \n", | |
"7 568.772565 0.009603 \n", | |
"8 563.362525 -0.005322 \n", | |
"9 566.376589 0.002308 " | |
] | |
}, | |
"execution_count": 100, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# use 'shift' method to read the previous trade day's adjusted closing price.\n", | |
"df['daily return'] = (df['Adj Close']-df['Adj Close'].shift(-1))/df['Adj Close'].shift(-1)\n", | |
"df.head(5)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 102, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df.to_csv(\"google_stock_data_20042014_daily_returns_PANDAS.csv\",columns=['Date','daily return'],index=False)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Rearrange Google stock prices series in reverse date order" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 172, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"<class 'pandas.core.series.Series'>\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Open</th>\n", | |
" <th>High</th>\n", | |
" <th>Low</th>\n", | |
" <th>Close</th>\n", | |
" <th>Volume</th>\n", | |
" <th>Adj Close</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Date</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1/10/2005</th>\n", | |
" <td>194.500326</td>\n", | |
" <td>198.100339</td>\n", | |
" <td>191.830333</td>\n", | |
" <td>195.060341</td>\n", | |
" <td>15135600</td>\n", | |
" <td>97.432738</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1/10/2006</th>\n", | |
" <td>464.420786</td>\n", | |
" <td>470.250794</td>\n", | |
" <td>462.040801</td>\n", | |
" <td>469.760804</td>\n", | |
" <td>18262300</td>\n", | |
" <td>234.645756</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1/10/2007</th>\n", | |
" <td>484.430845</td>\n", | |
" <td>493.550855</td>\n", | |
" <td>482.040813</td>\n", | |
" <td>489.460837</td>\n", | |
" <td>11981700</td>\n", | |
" <td>244.485933</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1/10/2008</th>\n", | |
" <td>645.011113</td>\n", | |
" <td>657.201125</td>\n", | |
" <td>640.111093</td>\n", | |
" <td>646.731101</td>\n", | |
" <td>12715800</td>\n", | |
" <td>323.042508</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1/10/2011</th>\n", | |
" <td>614.801082</td>\n", | |
" <td>615.391054</td>\n", | |
" <td>608.561033</td>\n", | |
" <td>614.211048</td>\n", | |
" <td>3170100</td>\n", | |
" <td>306.798725</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Open High Low Close Volume \\\n", | |
"Date \n", | |
"1/10/2005 194.500326 198.100339 191.830333 195.060341 15135600 \n", | |
"1/10/2006 464.420786 470.250794 462.040801 469.760804 18262300 \n", | |
"1/10/2007 484.430845 493.550855 482.040813 489.460837 11981700 \n", | |
"1/10/2008 645.011113 657.201125 640.111093 646.731101 12715800 \n", | |
"1/10/2011 614.801082 615.391054 608.561033 614.211048 3170100 \n", | |
"\n", | |
" Adj Close \n", | |
"Date \n", | |
"1/10/2005 97.432738 \n", | |
"1/10/2006 234.645756 \n", | |
"1/10/2007 244.485933 \n", | |
"1/10/2008 323.042508 \n", | |
"1/10/2011 306.798725 " | |
] | |
}, | |
"execution_count": 172, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"wdf = df # just to keep original df intact\n", | |
"print(type(wdf['Date']))\n", | |
"wdf.set_index('Date').head(5)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 173, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Open</th>\n", | |
" <th>High</th>\n", | |
" <th>Low</th>\n", | |
" <th>Close</th>\n", | |
" <th>Volume</th>\n", | |
" <th>Adj Close</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Date</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1/10/2005</th>\n", | |
" <td>194.500326</td>\n", | |
" <td>198.100339</td>\n", | |
" <td>191.830333</td>\n", | |
" <td>195.060341</td>\n", | |
" <td>15135600</td>\n", | |
" <td>97.432738</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1/10/2006</th>\n", | |
" <td>464.420786</td>\n", | |
" <td>470.250794</td>\n", | |
" <td>462.040801</td>\n", | |
" <td>469.760804</td>\n", | |
" <td>18262300</td>\n", | |
" <td>234.645756</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1/10/2007</th>\n", | |
" <td>484.430845</td>\n", | |
" <td>493.550855</td>\n", | |
" <td>482.040813</td>\n", | |
" <td>489.460837</td>\n", | |
" <td>11981700</td>\n", | |
" <td>244.485933</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1/10/2008</th>\n", | |
" <td>645.011113</td>\n", | |
" <td>657.201125</td>\n", | |
" <td>640.111093</td>\n", | |
" <td>646.731101</td>\n", | |
" <td>12715800</td>\n", | |
" <td>323.042508</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1/10/2011</th>\n", | |
" <td>614.801082</td>\n", | |
" <td>615.391054</td>\n", | |
" <td>608.561033</td>\n", | |
" <td>614.211048</td>\n", | |
" <td>3170100</td>\n", | |
" <td>306.798725</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Open High Low Close Volume \\\n", | |
"Date \n", | |
"1/10/2005 194.500326 198.100339 191.830333 195.060341 15135600 \n", | |
"1/10/2006 464.420786 470.250794 462.040801 469.760804 18262300 \n", | |
"1/10/2007 484.430845 493.550855 482.040813 489.460837 11981700 \n", | |
"1/10/2008 645.011113 657.201125 640.111093 646.731101 12715800 \n", | |
"1/10/2011 614.801082 615.391054 608.561033 614.211048 3170100 \n", | |
"\n", | |
" Adj Close \n", | |
"Date \n", | |
"1/10/2005 97.432738 \n", | |
"1/10/2006 234.645756 \n", | |
"1/10/2007 244.485933 \n", | |
"1/10/2008 323.042508 \n", | |
"1/10/2011 306.798725 " | |
] | |
}, | |
"execution_count": 173, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"wdf = df\n", | |
"wdf.sort_values(by='Date',ascending=True,inplace=True)\n", | |
"wdf.set_index('Date').head(5)\n", | |
"# The result is grouped in the way not what we expect" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 174, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Open</th>\n", | |
" <th>High</th>\n", | |
" <th>Low</th>\n", | |
" <th>Close</th>\n", | |
" <th>Volume</th>\n", | |
" <th>Adj Close</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Date</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2004-08-19</th>\n", | |
" <td>100.000168</td>\n", | |
" <td>104.060182</td>\n", | |
" <td>95.960165</td>\n", | |
" <td>100.340176</td>\n", | |
" <td>44871300</td>\n", | |
" <td>50.119968</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2004-08-20</th>\n", | |
" <td>101.010175</td>\n", | |
" <td>109.080187</td>\n", | |
" <td>100.500174</td>\n", | |
" <td>108.310183</td>\n", | |
" <td>22942800</td>\n", | |
" <td>54.100990</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2004-08-23</th>\n", | |
" <td>110.750191</td>\n", | |
" <td>113.480193</td>\n", | |
" <td>109.050183</td>\n", | |
" <td>109.400185</td>\n", | |
" <td>18342800</td>\n", | |
" <td>54.645447</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2004-08-24</th>\n", | |
" <td>111.240189</td>\n", | |
" <td>111.600192</td>\n", | |
" <td>103.570177</td>\n", | |
" <td>104.870176</td>\n", | |
" <td>15319700</td>\n", | |
" <td>52.382705</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2004-08-25</th>\n", | |
" <td>104.960181</td>\n", | |
" <td>108.000187</td>\n", | |
" <td>103.880180</td>\n", | |
" <td>106.000184</td>\n", | |
" <td>9232100</td>\n", | |
" <td>52.947145</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Open High Low Close Volume \\\n", | |
"Date \n", | |
"2004-08-19 100.000168 104.060182 95.960165 100.340176 44871300 \n", | |
"2004-08-20 101.010175 109.080187 100.500174 108.310183 22942800 \n", | |
"2004-08-23 110.750191 113.480193 109.050183 109.400185 18342800 \n", | |
"2004-08-24 111.240189 111.600192 103.570177 104.870176 15319700 \n", | |
"2004-08-25 104.960181 108.000187 103.880180 106.000184 9232100 \n", | |
"\n", | |
" Adj Close \n", | |
"Date \n", | |
"2004-08-19 50.119968 \n", | |
"2004-08-20 54.100990 \n", | |
"2004-08-23 54.645447 \n", | |
"2004-08-24 52.382705 \n", | |
"2004-08-25 52.947145 " | |
] | |
}, | |
"execution_count": 174, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"wdf = df\n", | |
"# add this command to correct the problem\n", | |
"wdf['Date'] =pd.to_datetime(wdf.Date)\n", | |
"wdf.sort_values(by='Date',ascending=True,inplace=True)\n", | |
"wdf.set_index('Date').head(5)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Locate Google stock price of a specific trading date" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 116, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Open</th>\n", | |
" <th>High</th>\n", | |
" <th>Low</th>\n", | |
" <th>Close</th>\n", | |
" <th>Volume</th>\n", | |
" <th>Adj Close</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Date</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>8/19/2014</th>\n", | |
" <td>585.002622</td>\n", | |
" <td>587.342658</td>\n", | |
" <td>584.002627</td>\n", | |
" <td>586.862643</td>\n", | |
" <td>978600</td>\n", | |
" <td>586.862643</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8/18/2014</th>\n", | |
" <td>576.112580</td>\n", | |
" <td>584.512631</td>\n", | |
" <td>576.002598</td>\n", | |
" <td>582.162619</td>\n", | |
" <td>1284100</td>\n", | |
" <td>582.162619</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8/15/2014</th>\n", | |
" <td>577.862619</td>\n", | |
" <td>579.382595</td>\n", | |
" <td>570.522603</td>\n", | |
" <td>573.482626</td>\n", | |
" <td>1519100</td>\n", | |
" <td>573.482626</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8/14/2014</th>\n", | |
" <td>576.182596</td>\n", | |
" <td>577.902645</td>\n", | |
" <td>570.882599</td>\n", | |
" <td>574.652582</td>\n", | |
" <td>985400</td>\n", | |
" <td>574.652582</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8/13/2014</th>\n", | |
" <td>567.312567</td>\n", | |
" <td>575.002602</td>\n", | |
" <td>565.752564</td>\n", | |
" <td>574.782577</td>\n", | |
" <td>1439200</td>\n", | |
" <td>574.782577</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Open High Low Close Volume Adj Close\n", | |
"Date \n", | |
"8/19/2014 585.002622 587.342658 584.002627 586.862643 978600 586.862643\n", | |
"8/18/2014 576.112580 584.512631 576.002598 582.162619 1284100 582.162619\n", | |
"8/15/2014 577.862619 579.382595 570.522603 573.482626 1519100 573.482626\n", | |
"8/14/2014 576.182596 577.902645 570.882599 574.652582 985400 574.652582\n", | |
"8/13/2014 567.312567 575.002602 565.752564 574.782577 1439200 574.782577" | |
] | |
}, | |
"execution_count": 116, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"wdf = df.set_index('Date')\n", | |
"wdf.head(5)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 123, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Index([u'8/19/2014', u'8/18/2014', u'8/15/2014', u'8/14/2014', u'8/13/2014',\n", | |
" u'8/12/2014', u'8/11/2014', u'8/8/2014', u'8/7/2014', u'8/6/2014',\n", | |
" ...\n", | |
" u'9/1/2004', u'8/31/2004', u'8/30/2004', u'8/27/2004', u'8/26/2004',\n", | |
" u'8/25/2004', u'8/24/2004', u'8/23/2004', u'8/20/2004', u'8/19/2004'],\n", | |
" dtype='object', name=u'Date', length=2518)" | |
] | |
}, | |
"execution_count": 123, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"wdf.index" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 196, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Open 5.054009e+02\n", | |
"High 5.057409e+02\n", | |
"Low 4.960509e+02\n", | |
"Close 5.002209e+02\n", | |
"Volume 6.663800e+06\n", | |
"Adj Close 2.498606e+02\n", | |
"Name: 8/6/2010, dtype: float64" | |
] | |
}, | |
"execution_count": 196, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# locate an entry by label index\n", | |
"wdf.loc['8/6/2010']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 215, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Open</th>\n", | |
" <th>High</th>\n", | |
" <th>Low</th>\n", | |
" <th>Close</th>\n", | |
" <th>Volume</th>\n", | |
" <th>Adj Close</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Date</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>8/6/2010</th>\n", | |
" <td>505.400882</td>\n", | |
" <td>505.740867</td>\n", | |
" <td>496.050856</td>\n", | |
" <td>500.220854</td>\n", | |
" <td>6663800</td>\n", | |
" <td>249.860567</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Open High Low Close Volume Adj Close\n", | |
"Date \n", | |
"8/6/2010 505.400882 505.740867 496.050856 500.220854 6663800 249.860567" | |
] | |
}, | |
"execution_count": 215, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Or using positional indexing (input is in the list by list format)\n", | |
"wdf.iloc[[wdf.index.get_loc('8/6/2010')],list(range(len(wdf.columns)))]" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment