Created
March 27, 2014 01:55
-
-
Save neoneo40/9798362 to your computer and use it in GitHub Desktop.
06_Data Loading, Save, File Format
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "metadata": { | |
| "name": "" | |
| }, | |
| "nbformat": 3, | |
| "nbformat_minor": 0, | |
| "worksheets": [ | |
| { | |
| "cells": [ | |
| { | |
| "cell_type": "heading", | |
| "level": 1, | |
| "metadata": {}, | |
| "source": [ | |
| "6. \ub370\uc774\ud130 \ub85c\ub529, \uc800\uc7a5, \ud30c\uc77c \ud615\uc2dd" | |
| ] | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 2, | |
| "metadata": {}, | |
| "source": [ | |
| "6.1 \ud14d\uc2a4\ud2b8 \ud30c\uc77c \uc774\uc6a9\ud558\ub294 \ubc29\ubc95" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### \ud30c\uc774\uc36c \uc88b\uc740 \uc774\uc720\n", | |
| "\n", | |
| "- \ub2e8\uc21c\ud55c \ubb38\ubc95\n", | |
| "- \uc9c1\uad00\uc801\uc778 \uc790\ub8cc \uad6c\uc870\n", | |
| "- \ud29c\ud50c\uc5d0 \ub370\uc774\ud130\ub97c \uc800\uc7a5\ud558\uace0 \uc77d\uc5b4\ub0b4\ub294 \ud3b8\ub9ac\ud55c \uae30\ub2a5\n", | |
| "\n", | |
| "#### pandas \ud30c\uc77c \ud30c\uc2f1 \ud568\uc218\n", | |
| "\n", | |
| "- read_csv: \ud30c\uc77c, URL \ub610\ub294 \ud30c\uc77c\uacfc \uc720\uc0ac\ud55c \uac1d\uccb4\ub85c\ubd80\ud130 \uad6c\ubd84\ub41c \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc628\ub2e4. \ub370\uc774\ud130 \uad6c\ubd84\uc790\ub294 \uc27c\ud45c(,)\ub97c \uae30\ubcf8\uc73c\ub85c \ud55c\ub2e4.\n", | |
| "- read_table: \ud30c\uc77c, URL \ub610\ub294 \ud30c\uc77c\uacfc \uc720\uc0ac\ud55c \uac1d\uccb4\ub85c\ubd80\ud130 \uad6c\ubd84\ub41c \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc628\ub2e4. \ub370\uc774\ud130 \uad6c\ubd84\uc790\ub294 \ud0ed('\\t')\uc744 \uae30\ubcf8\uc73c\ub85c \ud55c\ub2e4.\n", | |
| "- read_fwf: \uace0\uc815\ud3ed \uce7c\ub7fc \ud615\uc2dd\uc5d0\uc11c \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc628\ub2e4(\uad6c\ubd84\uc790\uac00 \uc5c6\ub294 \ub370\uc774\ud130)\n", | |
| "- read_clipboard: \ud074\ub9bd\ubcf4\ub4dc\uc5d0 \uc788\ub294 \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc624\ub294 read_table \ud568\uc218. \uc6f9\ud398\uc774\uc9c0\uc5d0\uc11c \ud45c\ub97c \uae01\uc5b4\uc62c \ub54c \uc720\uc6a9\ud558\ub2e4.\n", | |
| "\n", | |
| "#### pandas \ud30c\uc77c \ud30c\uc2f1 \ud568\uc218 \uc635\uc158\n", | |
| "\n", | |
| "- **\uc0c9\uc778**: \ubc18\ud658\ud558\ub294 DataFrame\uc5d0\uc11c \ud558\ub098 \uc774\uc0c1\uc758 \uce7c\ub7fc\uc744 \uc0c9\uc778\uc73c\ub85c \uc9c0\uc815\ud560 \uc218 \uc788\ub2e4. \ud30c\uc77c\uc774\ub098 \uc0ac\uc6a9\uc790\ub85c\ubd80\ud130 \uce7c\ub7fc\uc758 \uc774\ub984\uc744 \ubc1b\uac70\ub098 \uc544\ubb34\uac83\ub3c4 \ubc1b\uc9c0 \uc54a\uc744 \uc218 \uc788\ub2e4.\n", | |
| "- **\uc790\ub8cc\ud615 \ucd94\ub860\uacfc \ub370\uc774\ud130 \ubcc0\ud658**: \uc0ac\uc6a9\uc790 \uc815\uc758 \uac12 \ubcc0\ud658\uacfc \ube44\uc5b4\uc788\ub294 \uac12\uc744 \uc704\ud55c \uc0ac\uc6a9\uc790 \ub9ac\uc2a4\ud2b8\ub97c \ud3ec\ud568\ud55c\ub2e4.\n", | |
| "- **\ub0a0\uc9dc \ubd84\uc11d**: \uc5ec\ub7ec \uce7c\ub7fc\uc5d0 \uac78\uccd0 \uc788\ub294 \ub0a0\uc9dc\uc640 \uc2dc\uac04 \uc815\ubcf4\ub97c \ud558\ub098\uc758 \uce7c\ub7fc\uc5d0 \uc870\ud569\ud574\uc11c \uacb0\uacfc\uc5d0 \ubc18\uc601\ud55c\ub2e4.\n", | |
| "- **\ubc18\ubcf5**: \uc5ec\ub7ec \ud30c\uc77c\uc5d0 \uac78\uccd0 \uc788\ub294 \uc790\ub8cc\ub97c \ubc18\ubcf5\uc801\uc73c\ub85c \uc77d\uc5b4\uc62c \uc218 \uc788\ub2e4.\n", | |
| "- **\uc815\uc81c\ub418\uc9c0 \uc54a\ub294 \ub370\uc774\ud130 \ucc98\ub9ac**: \ub85c\uc6b0\ub098 \uaf2c\ub9ac\ub9d0, \uc8fc\uc11d \uac74\ub108\ub6f0\uae30 \ub610\ub294 \ucc9c \ub2e8\uc704\ub9c8\ub2e4 \uc27c\ud45c\ub85c \uad6c\ubd84\ub41c \uc22b\uc790 \uac19\uc740 \uc0ac\uc18c\ud55c \uc77c\uc744 \ucc98\ub9ac\ud574\uc900\ub2e4.\n", | |
| "\n", | |
| "#### \uc790\ub8cc\ud615 \ucd94\ub860\uc740 \ub9e4\uc6b0 \uc911\uc694\n", | |
| "\n", | |
| "- \uc5b4\ub5a4 \uce7c\ub7fc\uc774 \uc22b\uc790\uc778\uc9c0 \ubd88\ub9ac\uc5b8\uc778\uc9c0 \uc9c0\uc815\ud574\uc904 \ud544\uc694\uac00 \uc5c6\ub2e4" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "from pandas import DataFrame, Series\n", | |
| "import pandas as pd" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 15 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!cat ch06/ex1.csv" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "a,b,c,d,message\r\n", | |
| "1,2,3,4,hello\r\n", | |
| "5,6,7,8,world\r\n", | |
| "9,10,11,12,foo" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 16 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "df = pd.read_csv('ch06/ex1.csv')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 17 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "df" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " <th>c</th>\n", | |
| " <th>d</th>\n", | |
| " <th>message</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> 1</td>\n", | |
| " <td> 2</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 4</td>\n", | |
| " <td> hello</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> 5</td>\n", | |
| " <td> 6</td>\n", | |
| " <td> 7</td>\n", | |
| " <td> 8</td>\n", | |
| " <td> world</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> 9</td>\n", | |
| " <td> 10</td>\n", | |
| " <td> 11</td>\n", | |
| " <td> 12</td>\n", | |
| " <td> foo</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 18, | |
| "text": [ | |
| " a b c d message\n", | |
| "0 1 2 3 4 hello\n", | |
| "1 5 6 7 8 world\n", | |
| "2 9 10 11 12 foo" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 18 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "type(df)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 19, | |
| "text": [ | |
| "pandas.core.frame.DataFrame" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 19 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "pd.read_table('ch06/ex1.csv', sep=',')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " <th>c</th>\n", | |
| " <th>d</th>\n", | |
| " <th>message</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> 1</td>\n", | |
| " <td> 2</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 4</td>\n", | |
| " <td> hello</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> 5</td>\n", | |
| " <td> 6</td>\n", | |
| " <td> 7</td>\n", | |
| " <td> 8</td>\n", | |
| " <td> world</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> 9</td>\n", | |
| " <td> 10</td>\n", | |
| " <td> 11</td>\n", | |
| " <td> 12</td>\n", | |
| " <td> foo</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 20, | |
| "text": [ | |
| " a b c d message\n", | |
| "0 1 2 3 4 hello\n", | |
| "1 5 6 7 8 world\n", | |
| "2 9 10 11 12 foo" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 20 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!cat ch06/ex2.csv" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "1,2,3,4,hello\r\n", | |
| "5,6,7,8,world\r\n", | |
| "9,10,11,12,foo" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 21 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "pd.read_csv('ch06/ex2.csv', header=None)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>0</th>\n", | |
| " <th>1</th>\n", | |
| " <th>2</th>\n", | |
| " <th>3</th>\n", | |
| " <th>4</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> 1</td>\n", | |
| " <td> 2</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 4</td>\n", | |
| " <td> hello</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> 5</td>\n", | |
| " <td> 6</td>\n", | |
| " <td> 7</td>\n", | |
| " <td> 8</td>\n", | |
| " <td> world</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> 9</td>\n", | |
| " <td> 10</td>\n", | |
| " <td> 11</td>\n", | |
| " <td> 12</td>\n", | |
| " <td> foo</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 22, | |
| "text": [ | |
| " 0 1 2 3 4\n", | |
| "0 1 2 3 4 hello\n", | |
| "1 5 6 7 8 world\n", | |
| "2 9 10 11 12 foo" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 22 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "pd.read_csv('ch06/ex2.csv', names=['a', 'b', 'c', 'message'])" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " <th>c</th>\n", | |
| " <th>message</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> 2</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 4</td>\n", | |
| " <td> hello</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td> 6</td>\n", | |
| " <td> 7</td>\n", | |
| " <td> 8</td>\n", | |
| " <td> world</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td> 10</td>\n", | |
| " <td> 11</td>\n", | |
| " <td> 12</td>\n", | |
| " <td> foo</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 23, | |
| "text": [ | |
| " a b c message\n", | |
| "1 2 3 4 hello\n", | |
| "5 6 7 8 world\n", | |
| "9 10 11 12 foo" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 23 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "names = ['a', 'b', 'c', 'd', 'message']" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 24 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "# message -> index\n", | |
| "pd.read_csv('ch06/ex2.csv', names=names, index_col='message')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " <th>c</th>\n", | |
| " <th>d</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>message</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>hello</th>\n", | |
| " <td> 1</td>\n", | |
| " <td> 2</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>world</th>\n", | |
| " <td> 5</td>\n", | |
| " <td> 6</td>\n", | |
| " <td> 7</td>\n", | |
| " <td> 8</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>foo</th>\n", | |
| " <td> 9</td>\n", | |
| " <td> 10</td>\n", | |
| " <td> 11</td>\n", | |
| " <td> 12</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 25, | |
| "text": [ | |
| " a b c d\n", | |
| "message \n", | |
| "hello 1 2 3 4\n", | |
| "world 5 6 7 8\n", | |
| "foo 9 10 11 12" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 25 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!cat ch06/csv_mindex.csv" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "key1,key2,value1,value2\r\n", | |
| "one,a,1,2\r\n", | |
| "one,b,3,4\r\n", | |
| "one,c,5,6\r\n", | |
| "one,d,7,8\r\n", | |
| "two,a,9,10\r\n", | |
| "two,b,11,12\r\n", | |
| "two,c,13,14\r\n", | |
| "two,d,15,16\r\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 26 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### \uacc4\uce35\uc801 \uc0c9\uc778\uc744 \uc9c0\uc815\ud558\uace0 \uc2f6\ub2e4\uba74 \uce7c\ub7fc \ubc88\ud638\ub098 \uc774\ub984\uc758 \ub9ac\uc2a4\ud2b8\ub97c \ub118\uae34\ub2e4" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "parsed = pd.read_csv('ch06/csv_mindex.csv', index_col=['key1', 'key2'])" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 27 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "parsed" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th>value1</th>\n", | |
| " <th>value2</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>key1</th>\n", | |
| " <th>key2</th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"4\" valign=\"top\">one</th>\n", | |
| " <th>a</th>\n", | |
| " <td> 1</td>\n", | |
| " <td> 2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>b</th>\n", | |
| " <td> 3</td>\n", | |
| " <td> 4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>c</th>\n", | |
| " <td> 5</td>\n", | |
| " <td> 6</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>d</th>\n", | |
| " <td> 7</td>\n", | |
| " <td> 8</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"4\" valign=\"top\">two</th>\n", | |
| " <th>a</th>\n", | |
| " <td> 9</td>\n", | |
| " <td> 10</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>b</th>\n", | |
| " <td> 11</td>\n", | |
| " <td> 12</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>c</th>\n", | |
| " <td> 13</td>\n", | |
| " <td> 14</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>d</th>\n", | |
| " <td> 15</td>\n", | |
| " <td> 16</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 28, | |
| "text": [ | |
| " value1 value2\n", | |
| "key1 key2 \n", | |
| "one a 1 2\n", | |
| " b 3 4\n", | |
| " c 5 6\n", | |
| " d 7 8\n", | |
| "two a 9 10\n", | |
| " b 11 12\n", | |
| " c 13 14\n", | |
| " d 15 16" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 28 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### \uace0\uc815\ub41c \uad6c\ubd84\uc790\uac00 \uc5c6\ub2e4\uba74 read_table\uc758 \uad6c\ubd84\uc790\ub85c \uc815\uaddc\ud45c\ud604\uc2dd([\ud30c\uc774\uc36c \u2013 \uc815\uaddc\uc2dd\ud45c\ud604\uc2dd(Regular Expression) \ubaa8\ub4c8](http://devanix.tistory.com/296), [\ubc88\uc5ed \ud30c\uc774\uc36c \uc815\uaddc\ud45c\ud604\uc2dd](http://codeflow.co.kr/question/1061/%ED%8C%8C%EC%9D%B4%EC%8D%AC-%EC%A0%95%EA%B7%9C-%ED%91%9C%ED%98%84%EC%8B%9D/))\uc744 \uc0ac\uc6a9\ud558\uba74 \ub41c\ub2e4." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "list(open('ch06/ex3.txt'))" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 29, | |
| "text": [ | |
| "[' A B C\\n',\n", | |
| " 'aaa -0.264438 -1.026059 -0.619500\\n',\n", | |
| " 'bbb 0.927272 0.302904 -0.032399\\n',\n", | |
| " 'ccc -0.264273 -0.386314 -0.217601\\n',\n", | |
| " 'ddd -0.871858 -0.348382 1.100491\\n']" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 29 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### \uc9c1\uc811 \ud30c\uc77c\uc744 \uace0\uccd0\ub3c4 \ub418\uc9c0\ub9cc \uc774 \ud30c\uc77c\uc740 \uc5ec\ub7ec \uac1c\uc758 \uacf5\ubc31\ubb38\uc790\ub85c \ud544\ub4dc\uac00 \uad6c\ubd84\ub418\uc5b4 \uc788\uc73c\ubbc0\ub85c \uc774\ub97c \ud45c\ud604\ud560 \uc218 \uc788\ub294 \uc815\uaddc\ud45c\ud604\uc2dd \\s+\ub97c \uc0ac\uc6a9\ud574\uc11c \ucc98\ub9ac" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "result = pd.read_table('ch06/ex3.txt', sep='\\s+')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 30 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "result" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>A</th>\n", | |
| " <th>B</th>\n", | |
| " <th>C</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>aaa</th>\n", | |
| " <td>-0.264438</td>\n", | |
| " <td>-1.026059</td>\n", | |
| " <td>-0.619500</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>bbb</th>\n", | |
| " <td> 0.927272</td>\n", | |
| " <td> 0.302904</td>\n", | |
| " <td>-0.032399</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>ccc</th>\n", | |
| " <td>-0.264273</td>\n", | |
| " <td>-0.386314</td>\n", | |
| " <td>-0.217601</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>ddd</th>\n", | |
| " <td>-0.871858</td>\n", | |
| " <td>-0.348382</td>\n", | |
| " <td> 1.100491</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 31, | |
| "text": [ | |
| " A B C\n", | |
| "aaa -0.264438 -1.026059 -0.619500\n", | |
| "bbb 0.927272 0.302904 -0.032399\n", | |
| "ccc -0.264273 -0.386314 -0.217601\n", | |
| "ddd -0.871858 -0.348382 1.100491" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 31 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### \uc774 \uacbd\uc6b0, \uccab\ubc88\uc9f8 \ub85c\uc6b0\ub294 \ub2e4\ub978 \ub85c\uc6b0\ubcf4\ub2e4 \uce7c\ub7fc\uc774 \ud558\ub098 \uc801\uae30 \ub54c\ubb38\uc5d0 read_table\uc740 \uccab \ubc88\uc9f8 \uce7c\ub7fc\uc774 DataFrame\uc758 \uc0c9\uc778\uc774 \ub418\uc5b4\uc57c \ud55c\ub2e4\uace0 \ucd94\ub860" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### read_table\uacfc read_csv\uc758 \ucc28\uc774\uc810\uc740??\n", | |
| "\n", | |
| "- read_csv: \ud30c\uc77c, URL \ub610\ub294 \ud30c\uc77c\uacfc \uc720\uc0ac\ud55c \uac1d\uccb4\ub85c\ubd80\ud130 \uad6c\ubd84\ub41c \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc628\ub2e4. \ub370\uc774\ud130 \uad6c\ubd84\uc790\ub294 \uc27c\ud45c(,)\ub97c \uae30\ubcf8\uc73c\ub85c \ud55c\ub2e4.\n", | |
| "- read_table: \ud30c\uc77c, URL \ub610\ub294 \ud30c\uc77c\uacfc \uc720\uc0ac\ud55c \uac1d\uccb4\ub85c\ubd80\ud130 \uad6c\ubd84\ub41c \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc628\ub2e4. \ub370\uc774\ud130 \uad6c\ubd84\uc790\ub294 \ud0ed('\\t')\ub97c \uae30\ubcf8\uc73c\ub85c \ud55c\ub2e4.\n", | |
| "\n", | |
| "#### \uadf8\ub7ec\ub2c8 \ub458 \ub2e4 \uc0ac\uc6a9\uc744 \ud574\ub3c4 \ub418\uc9c0\ub9cc \uc660\ub9cc\ud558\uba74 read_csv \uac19\uc740 \uacbd\uc6b0\ub294 csv \ud30c\uc77c\ub9cc \uc0ac\uc6a9\uc744 \ud558\uace0 \ub098\uba38\uc9c0 \ud2b9\ubcc4\ud55c \uacbd\uc6b0\ub97c read_table\ub85c \ud65c\uc6a9" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "pd.read_csv('ch06/ex3.txt', delimiter='\\s+')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>A</th>\n", | |
| " <th>B</th>\n", | |
| " <th>C</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>aaa</th>\n", | |
| " <td>-0.264438</td>\n", | |
| " <td>-1.026059</td>\n", | |
| " <td>-0.619500</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>bbb</th>\n", | |
| " <td> 0.927272</td>\n", | |
| " <td> 0.302904</td>\n", | |
| " <td>-0.032399</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>ccc</th>\n", | |
| " <td>-0.264273</td>\n", | |
| " <td>-0.386314</td>\n", | |
| " <td>-0.217601</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>ddd</th>\n", | |
| " <td>-0.871858</td>\n", | |
| " <td>-0.348382</td>\n", | |
| " <td> 1.100491</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 32, | |
| "text": [ | |
| " A B C\n", | |
| "aaa -0.264438 -1.026059 -0.619500\n", | |
| "bbb 0.927272 0.302904 -0.032399\n", | |
| "ccc -0.264273 -0.386314 -0.217601\n", | |
| "ddd -0.871858 -0.348382 1.100491" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 32 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### [IO Tools(Text, CSV, HDF5, \u22ef) example](http://pandas.pydata.org/pandas-docs/stable/io.html)\n", | |
| "\n", | |
| "- \ud30c\uc11c \ud568\uc218\ub294 \ud30c\uc77c \ud615\uc2dd\uc5d0\uc11c \ubc1c\uc0dd\ud560 \uc218 \uc788\ub294 \ub9e4\uc6b0 \ub2e4\uc591\ud55c \uc608\uc678\ub97c \uc798 \ucc98\ub9ac\ud560 \uc218 \uc788\ub3c4\ub85d \ub9ce\uc740 \ucd94\uac00 \uc778\uc790\ub97c \uac00\uc9c0\uace0 \uc788\ub2e4.\n", | |
| "- skiprows\ub97c \uc774\uc6a9\ud574\uc11c \uccab\ubc88\uc9f8, \uc138\ubc88\uc9f8, \ub124\ubc88\uc9f8 \ub85c\uc6b0\ub97c \uac74\ub108\ub6f8 \uc218 \uc788\uc74c" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "# Read CSV(comma-separated) file into DataFrame\n", | |
| "pd.read_csv?" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 33 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!cat ch06/ex4.csv" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "# hey!\r\n", | |
| "a,b,c,d,message\r\n", | |
| "# just wanted to make things more difficult for you\r\n", | |
| "# who reads CSV files with computers, anyway?\r\n", | |
| "1,2,3,4,hello\r\n", | |
| "5,6,7,8,world\r\n", | |
| "9,10,11,12,foo" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 34 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "pd.read_csv('ch06/ex4.csv', skiprows=[0, 2, 3])" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " <th>c</th>\n", | |
| " <th>d</th>\n", | |
| " <th>message</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> 1</td>\n", | |
| " <td> 2</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 4</td>\n", | |
| " <td> hello</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> 5</td>\n", | |
| " <td> 6</td>\n", | |
| " <td> 7</td>\n", | |
| " <td> 8</td>\n", | |
| " <td> world</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> 9</td>\n", | |
| " <td> 10</td>\n", | |
| " <td> 11</td>\n", | |
| " <td> 12</td>\n", | |
| " <td> foo</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 35, | |
| "text": [ | |
| " a b c d message\n", | |
| "0 1 2 3 4 hello\n", | |
| "1 5 6 7 8 world\n", | |
| "2 9 10 11 12 foo" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 35 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!cat ch06/ex5.csv" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "something,a,b,c,d,message\r\n", | |
| "one,1,2,3,4,NA\r\n", | |
| "two,5,6,,8,world\r\n", | |
| "three,9,10,11,12,foo" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 36 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "result = pd.read_csv('ch06/ex5.csv')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 37 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "result" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>something</th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " <th>c</th>\n", | |
| " <th>d</th>\n", | |
| " <th>message</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> one</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 2</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 4</td>\n", | |
| " <td> NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> two</td>\n", | |
| " <td> 5</td>\n", | |
| " <td> 6</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td> 8</td>\n", | |
| " <td> world</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> three</td>\n", | |
| " <td> 9</td>\n", | |
| " <td> 10</td>\n", | |
| " <td> 11</td>\n", | |
| " <td> 12</td>\n", | |
| " <td> foo</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 38, | |
| "text": [ | |
| " something a b c d message\n", | |
| "0 one 1 2 3 4 NaN\n", | |
| "1 two 5 6 NaN 8 world\n", | |
| "2 three 9 10 11 12 foo" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 38 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "pd.isnull(result)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>something</th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " <th>c</th>\n", | |
| " <th>d</th>\n", | |
| " <th>message</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> False</td>\n", | |
| " <td> False</td>\n", | |
| " <td> False</td>\n", | |
| " <td> False</td>\n", | |
| " <td> False</td>\n", | |
| " <td> True</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> False</td>\n", | |
| " <td> False</td>\n", | |
| " <td> False</td>\n", | |
| " <td> True</td>\n", | |
| " <td> False</td>\n", | |
| " <td> False</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> False</td>\n", | |
| " <td> False</td>\n", | |
| " <td> False</td>\n", | |
| " <td> False</td>\n", | |
| " <td> False</td>\n", | |
| " <td> False</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 39, | |
| "text": [ | |
| " something a b c d message\n", | |
| "0 False False False False False True\n", | |
| "1 False False False True False False\n", | |
| "2 False False False False False False" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 39 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### na_values \uc635\uc158\uc740 \ub9ac\uc2a4\ud2b8\ub098 \ubb38\uc790\uc5f4 \uc9d1\ud569\uc744 \ubc1b\uc544\uc11c \ub204\ub77d\ub41c \uac12\uc744 \ucc98\ub9ac\n", | |
| "\n", | |
| "### Why? na_values\ub97c \uc0ac\uc6a9\ud558\uc9c0?\n", | |
| "\n", | |
| "- \ud2b9\uc815\ud55c \uac12\ub4e4\uc740 \uacc4\uc0b0\uc744 \ud558\uc9c0 \uc54a\uc73c\ub824\uace0??? \uc74c.." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "result = pd.read_csv('ch06/ex5.csv', na_values=['NULL'])" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 40 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "result" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>something</th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " <th>c</th>\n", | |
| " <th>d</th>\n", | |
| " <th>message</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> one</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 2</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 4</td>\n", | |
| " <td> NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> two</td>\n", | |
| " <td> 5</td>\n", | |
| " <td> 6</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td> 8</td>\n", | |
| " <td> world</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> three</td>\n", | |
| " <td> 9</td>\n", | |
| " <td> 10</td>\n", | |
| " <td> 11</td>\n", | |
| " <td> 12</td>\n", | |
| " <td> foo</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 41, | |
| "text": [ | |
| " something a b c d message\n", | |
| "0 one 1 2 3 4 NaN\n", | |
| "1 two 5 6 NaN 8 world\n", | |
| "2 three 9 10 11 12 foo" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 41 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "# world\ub97c NA\uac12\uc73c\ub85c \ucc98\ub9ac\ud558\ub2c8 NaN\uc73c\ub85c \ub098\uc628\ub2e4.\n", | |
| "# \ud2b9\uc815\ud55c \uac12\uc744 NA \ucc98\ub9ac\ud560 \uc218 \uc788\uc744\uac83 \uac19\ub2e4.\n", | |
| "pd.read_csv('ch06/ex5.csv', na_values=['world'])" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>something</th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " <th>c</th>\n", | |
| " <th>d</th>\n", | |
| " <th>message</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> one</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 2</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 4</td>\n", | |
| " <td> NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> two</td>\n", | |
| " <td> 5</td>\n", | |
| " <td> 6</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td> 8</td>\n", | |
| " <td> NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> three</td>\n", | |
| " <td> 9</td>\n", | |
| " <td> 10</td>\n", | |
| " <td> 11</td>\n", | |
| " <td> 12</td>\n", | |
| " <td> foo</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 42, | |
| "text": [ | |
| " something a b c d message\n", | |
| "0 one 1 2 3 4 NaN\n", | |
| "1 two 5 6 NaN 8 NaN\n", | |
| "2 three 9 10 11 12 foo" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 42 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### \uc5f4\ub9c8\ub2e4 \ub2e4\ub978 NA \ubb38\uc790\ub97c \uc0ac\uc804 \uac12\uc73c\ub85c \ub118\uaca8 \ucc98\ub9ac \uac00\ub2a5" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "sentinels = {'message': ['foo', 'NA'], 'something': ['two']}" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 43 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "pd.read_csv('ch06/ex5.csv', na_values=sentinels)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>something</th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " <th>c</th>\n", | |
| " <th>d</th>\n", | |
| " <th>message</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> one</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 2</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 4</td>\n", | |
| " <td> NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> NaN</td>\n", | |
| " <td> 5</td>\n", | |
| " <td> 6</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td> 8</td>\n", | |
| " <td> world</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> three</td>\n", | |
| " <td> 9</td>\n", | |
| " <td> 10</td>\n", | |
| " <td> 11</td>\n", | |
| " <td> 12</td>\n", | |
| " <td> NaN</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 44, | |
| "text": [ | |
| " something a b c d message\n", | |
| "0 one 1 2 3 4 NaN\n", | |
| "1 NaN 5 6 NaN 8 world\n", | |
| "2 three 9 10 11 12 NaN" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 44 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "result = pd.read_csv('ch06/ex6.csv')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 45 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "result" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<pre>\n", | |
| "<class 'pandas.core.frame.DataFrame'>\n", | |
| "Int64Index: 10000 entries, 0 to 9999\n", | |
| "Data columns (total 5 columns):\n", | |
| "one 10000 non-null values\n", | |
| "two 10000 non-null values\n", | |
| "three 10000 non-null values\n", | |
| "four 10000 non-null values\n", | |
| "key 10000 non-null values\n", | |
| "dtypes: float64(4), object(1)\n", | |
| "</pre>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 46, | |
| "text": [ | |
| "<class 'pandas.core.frame.DataFrame'>\n", | |
| "Int64Index: 10000 entries, 0 to 9999\n", | |
| "Data columns (total 5 columns):\n", | |
| "one 10000 non-null values\n", | |
| "two 10000 non-null values\n", | |
| "three 10000 non-null values\n", | |
| "four 10000 non-null values\n", | |
| "key 10000 non-null values\n", | |
| "dtypes: float64(4), object(1)" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 46 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### nrows\ub85c \ucc98\uc74c \uba87 \uc904\ub9cc \uc77d\uc5b4\ubcfc \uc218 \uc788\ub2e4.\n", | |
| "\n", | |
| "- \ub9ac\ub205\uc2a4\uc758 head \ud30c\uc77c\uc774\ub984 \uacfc \uac19\ub2e4\uace0 \uc0dd\uac01\ud558\uba74 \ub41c\ub2e4." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "pd.read_csv('ch06/ex6.csv', nrows=5)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>one</th>\n", | |
| " <th>two</th>\n", | |
| " <th>three</th>\n", | |
| " <th>four</th>\n", | |
| " <th>key</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> 0.467976</td>\n", | |
| " <td>-0.038649</td>\n", | |
| " <td>-0.295344</td>\n", | |
| " <td>-1.824726</td>\n", | |
| " <td> L</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>-0.358893</td>\n", | |
| " <td> 1.404453</td>\n", | |
| " <td> 0.704965</td>\n", | |
| " <td>-0.200638</td>\n", | |
| " <td> B</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>-0.501840</td>\n", | |
| " <td> 0.659254</td>\n", | |
| " <td>-0.421691</td>\n", | |
| " <td>-0.057688</td>\n", | |
| " <td> G</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td> 0.204886</td>\n", | |
| " <td> 1.074134</td>\n", | |
| " <td> 1.388361</td>\n", | |
| " <td>-0.982404</td>\n", | |
| " <td> R</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td> 0.354628</td>\n", | |
| " <td>-0.133116</td>\n", | |
| " <td> 0.283763</td>\n", | |
| " <td>-0.837063</td>\n", | |
| " <td> Q</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 47, | |
| "text": [ | |
| " one two three four key\n", | |
| "0 0.467976 -0.038649 -0.295344 -1.824726 L\n", | |
| "1 -0.358893 1.404453 0.704965 -0.200638 B\n", | |
| "2 -0.501840 0.659254 -0.421691 -0.057688 G\n", | |
| "3 0.204886 1.074134 1.388361 -0.982404 R\n", | |
| "4 0.354628 -0.133116 0.283763 -0.837063 Q" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 47 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### TextParser \uac1d\uccb4\ub97c \uc774\uc6a9\ud574\uc11c chunksize\uc5d0 \ub530\ub77c \ubd84\ub9ac\ub41c \ud30c\uc77c\uc744 \uc21c\ud68c \uac00\ub2a5" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "chunker = pd.read_csv('ch06/ex6.csv', chunksize=1000)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 48 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "chunker" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 49, | |
| "text": [ | |
| "<pandas.io.parsers.TextFileReader at 0x109b87f90>" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 49 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "chunker = pd.read_csv('ch06/ex6.csv', chunksize=1000)\n", | |
| "\n", | |
| "tot = Series([])\n", | |
| "for piece in chunker:\n", | |
| " tot = tot.add( piece['key'].value_counts(), fill_value=0)\n", | |
| "\n", | |
| "tot = tot.order(ascending=False)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 50 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "tot[:10]" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 51, | |
| "text": [ | |
| "E 368\n", | |
| "X 364\n", | |
| "L 346\n", | |
| "O 343\n", | |
| "Q 340\n", | |
| "M 338\n", | |
| "J 337\n", | |
| "F 335\n", | |
| "K 334\n", | |
| "H 330\n", | |
| "dtype: float64" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 51 | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 3, | |
| "metadata": {}, | |
| "source": [ | |
| "6.1.2 \ub370\uc774\ud130\ub97c \ud14d\uc2a4\ud2b8 \ud615\uc2dd\uc73c\ub85c \uae30\ub85d\ud558\uae30" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "data = pd.read_csv('ch06/ex5.csv')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 52 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "data" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>something</th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " <th>c</th>\n", | |
| " <th>d</th>\n", | |
| " <th>message</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> one</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 2</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 4</td>\n", | |
| " <td> NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> two</td>\n", | |
| " <td> 5</td>\n", | |
| " <td> 6</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td> 8</td>\n", | |
| " <td> world</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> three</td>\n", | |
| " <td> 9</td>\n", | |
| " <td> 10</td>\n", | |
| " <td> 11</td>\n", | |
| " <td> 12</td>\n", | |
| " <td> foo</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 53, | |
| "text": [ | |
| " something a b c d message\n", | |
| "0 one 1 2 3 4 NaN\n", | |
| "1 two 5 6 NaN 8 world\n", | |
| "2 three 9 10 11 12 foo" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 53 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "data.to_csv('ch06/out.csv')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 54 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!cat ch06/out.csv" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| ",something,a,b,c,d,message\r\n", | |
| "0,one,1,2,3.0,4,\r\n", | |
| "1,two,5,6,,8,world\r\n", | |
| "2,three,9,10,11.0,12,foo\r\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 55 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "data.to_csv(sys.stdout, sep='|')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "|something|a|b|c|d|message\n", | |
| "0|one|1|2|3.0|4|\n", | |
| "1|two|5|6||8|world\n", | |
| "2|three|9|10|11.0|12|foo\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 56 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "# Write DataFrame to a comma-separated value (csv) file\n", | |
| "# na_rep -> Missing data representation. NA REPresentation\n", | |
| "data.to_csv?" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 57 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### na_rep\ub85c \ub204\ub77d\ub41c\uac12\uc744 \uc6d0\ud558\ub294 \uac12\uc73c\ub85c \ubcc0\uacbd \uac00\ub2a5" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "data.to_csv(sys.stdout, na_rep='NULL')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| ",something,a,b,c,d,message\n", | |
| "0,one,1,2,3.0,4,NULL\n", | |
| "1,two,5,6,NULL,8,world\n", | |
| "2,three,9,10,11.0,12,foo\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 58 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "data.to_csv(sys.stdout, na_rep='NaN')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| ",something,a,b,c,d,message\n", | |
| "0,one,1,2,3.0,4,NaN\n", | |
| "1,two,5,6,NaN,8,world\n", | |
| "2,three,9,10,11.0,12,foo\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 59 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### row, column \uac12\uc744 \uc800\uc7a5\ud560 \uac83\uc778\uc9c0 \uc120\ud0dd \uac00\ub2a5" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "data.to_csv(sys.stdout, index=False, header=False)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "one,1,2,3.0,4,\n", | |
| "two,5,6,,8,world\n", | |
| "three,9,10,11.0,12,foo\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 60 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### \ucee4\ub7fc\uc758 \uc77c\ubd80\ubd84\ub9cc \uae30\ub85d \uac00\ub2a5, \uc21c\uc11c\ub97c \uc9c1\uc811 \uc9c0\uc815 \uac00\ub2a5" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "data.to_csv(sys.stdout, index=False, cols=['a', 'b', 'c'])" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "a,b,c\n", | |
| "1,2,3.0\n", | |
| "5,6,\n", | |
| "9,10,11.0\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 61 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### Series\uc5d0\ub3c4 to_csv method \uc874\uc7ac" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "dates = pd.date_range('1/1/2000', periods=7)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 62 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "ts = Series(np.arange(7), index=dates)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 63 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "ts.to_csv('ch06/tseries.csv')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 64 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!cat ch06/tseries.csv" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "2000-01-01,0\r\n", | |
| "2000-01-02,1\r\n", | |
| "2000-01-03,2\r\n", | |
| "2000-01-04,3\r\n", | |
| "2000-01-05,4\r\n", | |
| "2000-01-06,5\r\n", | |
| "2000-01-07,6\r\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 65 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### \uc57d\uac04 \ubcf5\uc7a1\ud558\uac8c \ud5e4\ub354\ub97c \uc5c6\uc560\uace0 \uccab \ubc88\uc9f8 \uce7c\ub7fc\uc744 \uc0c9\uc778\uc73c\ub85c \ud558\uba74 read_csv \uba54\uc11c\ub4dc\ub85c Series \uac1d\uccb4\ub97c \uc5bb\uc744 \uc218 \uc788\uc9c0\ub9cc from_csv \uba54\uc11c\ub4dc\uac00 \uc880 \ub354 \ud3b8\ub9ac\ud558\uace0 \uac04\ub2e8\ud558\uac8c \ubb38\uc81c \ud574\uacb0" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "pd.DataFrame.to_csv?" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 66 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "Series.from_csv('ch06/tseries.csv', parse_dates=True)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 67, | |
| "text": [ | |
| "2000-01-01 0\n", | |
| "2000-01-02 1\n", | |
| "2000-01-03 2\n", | |
| "2000-01-04 3\n", | |
| "2000-01-05 4\n", | |
| "2000-01-06 5\n", | |
| "2000-01-07 6\n", | |
| "dtype: int64" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 67 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "type( Series.from_csv('ch06/tseries.csv', parse_dates=True) )" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 68, | |
| "text": [ | |
| "pandas.core.series.Series" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 68 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "# parse dates: boolean, default True.\n", | |
| "# Parse dates. Different default from read_table\n", | |
| "Series.from_csv?" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 69 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### read_csv\ub97c Series\ub85c \uc77d\uc744 \uc218 \uc788\ub2e4\uace0 \uc2e4\ud5d8\ud558\ub294 \uc911\uc778\ub370 \uc798 \uc548\ub418\ub124.. \n", | |
| "\n", | |
| "- DataFrame\uc73c\ub85c \uc77d\uc5b4\uc9d0" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "pd.read_csv('ch06/tseries.csv', header=None)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>0</th>\n", | |
| " <th>1</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> 2000-01-01</td>\n", | |
| " <td> 0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> 2000-01-02</td>\n", | |
| " <td> 1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> 2000-01-03</td>\n", | |
| " <td> 2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td> 2000-01-04</td>\n", | |
| " <td> 3</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td> 2000-01-05</td>\n", | |
| " <td> 4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td> 2000-01-06</td>\n", | |
| " <td> 5</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td> 2000-01-07</td>\n", | |
| " <td> 6</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 70, | |
| "text": [ | |
| " 0 1\n", | |
| "0 2000-01-01 0\n", | |
| "1 2000-01-02 1\n", | |
| "2 2000-01-03 2\n", | |
| "3 2000-01-04 3\n", | |
| "4 2000-01-05 4\n", | |
| "5 2000-01-06 5\n", | |
| "6 2000-01-07 6" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 70 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "type(pd.read_csv('ch06/tseries.csv', header=None))" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 71, | |
| "text": [ | |
| "pandas.core.frame.DataFrame" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 71 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "pd.read_csv?" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 72 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "###6.1.3 \uc218\ub3d9\uc73c\ub85c \uad6c\ubd84 \ud615\uc2dd \ucc98\ub9ac\ud558\uae30" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### read_table\uc5d0\uc11c \uc77d\uc744 \uc218 \uc5c6\ub294 \uc798\ubabb\ub41c \ud615\uc2dd\uc758 \uc904\uc774 \ud3ec\ud568\ub41c \ub370\uc774\ud130\uac00 \ub4dc\ubb3c\uac8c \ubc1c\uacac \ub428 -> \uc218\ub3d9 \ucc98\ub9ac" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!cat ch06/ex7.csv" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "\"a\",\"b\",\"c\"\r\n", | |
| "\"1\",\"2\",\"3\"\r\n", | |
| "\"1\",\"2\",\"3\",\"4\"\r\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 73 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "import csv\n", | |
| "f = open('ch06/ex7.csv')\n", | |
| "\n", | |
| "reader = csv.reader(f)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 74 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "for line in reader:\n", | |
| " print line" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "['a', 'b', 'c']\n", | |
| "['1', '2', '3']\n", | |
| "['1', '2', '3', '4']\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 75 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "lines = list(csv.reader(open('ch06/ex7.csv')))" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 76 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "header, values = lines[0], lines[1:]" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 77 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "header" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 78, | |
| "text": [ | |
| "['a', 'b', 'c']" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 78 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "values" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 79, | |
| "text": [ | |
| "[['1', '2', '3'], ['1', '2', '3', '4']]" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 79 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "# header = a,b,c\n", | |
| "# values\ub97c 1,1\uc744 \uac19\uc774 \ubb36\ub294\ub2e4. 2,2 \ubb36\uace0. 3,3 \ubb36\uace0. 4\ub294 header\uac00 a,b,c 3\uac1c \ubc16\uc5d0 \uc5c6\uae30 \ub54c\ubb38\uc5d0 \ud3ec\ud568\ub418\uc9c0 \uc54a\ub294\ub2e4.\n", | |
| "data_dict = {h: v for h, v in zip(header, zip(*values))}" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 80 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "data_dict" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 81, | |
| "text": [ | |
| "{'a': ('1', '1'), 'b': ('2', '2'), 'c': ('3', '3')}" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 81 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### CSV \ud30c\uc77c\uc740 \ub2e4\uc591\ud55c \ud30c\uc77c \uc874\uc7ac\ud558\uae30 \ub54c\ubb38\uc5d0 \ub2e4\uc591\ud55c \uc635\uc158\ub4e4\uc740 csv.Dialect \uc0c1\uc18d\ubc1b\uc544 \ud574\uacb0\n", | |
| "\n", | |
| "- \ub2e4\uc591\ud55c \uad6c\ubd84\uc790\n", | |
| "- \ubb38\uc790\uc5f4\uc744 \ub458\ub7ec\uc2f8\ub294 \ubc29\ubc95\n", | |
| "- \uac1c\ud589\ubb38\uc790" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "class my_dialect(csv.Dialect):\n", | |
| " lineterminator = '\\n'\n", | |
| " delimiter = ';'\n", | |
| " quotechar = '\"'\n", | |
| "\n", | |
| "reader = csv.reader" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 82 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "reader = csv.reader?" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 83 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "reader = csv.reader" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "reader = csv.reader" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 84 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### TypeError: \"quoting\" must be an integer\n", | |
| "\n", | |
| "- [_csv.Error: field larger than field limit (131072) \ucc38\uace0](http://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "# quoting\uc774 \uaf2d integer\uc5ec\uc57c \ud55c\ub2e4\ub294 \uc624\ub958\uac00 \ubc1c\uc0dd\ud574\uc11c \uc0bd\uc9c8\ud558\ub2e4\uac00 \ub4a4\uc5d0 quoting keyword\ub97c \ubd99\uc5ec\uc90c..\n", | |
| "reader = csv.reader(f, dialect=my_dialect)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "ename": "TypeError", | |
| "evalue": "\"quoting\" must be an integer", | |
| "output_type": "pyerr", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-85-92557f61d368>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# quoting\uc774 \uaf2d integer\uc5ec\uc57c \ud55c\ub2e4\ub294 \uc624\ub958\uac00 \ubc1c\uc0dd\ud574\uc11c \uc0bd\uc9c8\ud558\ub2e4\uac00 \ub4a4\uc5d0 quoting keyword\ub97c \ubd99\uc5ec\uc90c..\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mreader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcsv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdialect\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmy_dialect\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
| "\u001b[0;31mTypeError\u001b[0m: \"quoting\" must be an integer" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 85 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "reader = csv.reader(f, dialect=my_dialect, quoting=csv.QUOTE_NONE)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 86 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "csv.QUOTE_NONE" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 87, | |
| "text": [ | |
| "3" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 87 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "reader = csv.reader(f, delimiter='|')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 88 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### [13.1. csv \u2014 CSV File Reading and Writing](http://docs.python.org/3.1/library/csv.html)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "# \uc5b4\ub5a4 \uc635\uc158\ub4e4 \uc788\ub294\uc9c0 \ubcf4\ub824\uace0 \ud588\ub354\ub2c8 \uc548 \ubcf4\uc5ec\uc8fc\ub124...\n", | |
| "csv.reader??" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 89 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### CSV Note\n", | |
| "\n", | |
| "- \uc880 \ub354 \ubcf5\uc7a1\ud558\uac70\ub098 \uad6c\ubd84\uc790\uac00 \ud55c \uae00\uc790\ub97c \ucd08\uacfc\ud558\ub294 \uace0\uc815 \uae38\uc774\ub97c \uac00\uc9c4\ub2e4\uba74 csv \ubaa8\ub4c8\uc744 \uc0ac\uc6a9\ud560 \uc218 \uc5c6\ub2e4.\n", | |
| "- \uc774\ub7f0 \uacbd\uc6b0\uc5d0\ub294 \uc904\uc744 \ub098\ub204\uace0 \ubb38\uc790\uc5f4\uc758 split \uba54\uc11c\ub4dc\ub098 \uc815\uaddc\ud45c\ud604\uc2dd \uba54\uc11c\ub4dc\uc778 re.split \ub4f1\uc744 \uc774\uc6a9\ud574\uc11c \uac00\uacf5\ud558\ub294 \uc791\uc5c5\uc744 \ud574\uc57c \ud55c\ub2e4." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### CSV \ud30c\uc77c \uae30\ub85d" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "with open('mydata.csv', 'w') as f:\n", | |
| " writer = csv.writer(f, dialect=my_dialect, quoting=csv.QUOTE_NONE)\n", | |
| " writer.writerow(('one', 'two', 'three'))\n", | |
| " writer.writerow(('1', '2', '3'))\n", | |
| " writer.writerow(('4', '5', '6'))\n", | |
| " writer.writerow(('7', '8', '9'))" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 90 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "!cat mydata.csv" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "one;two;three\r\n", | |
| "1;2;3\r\n", | |
| "4;5;6\r\n", | |
| "7;8;9\r\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 91 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### JSON \ub370\uc774\ud130\n", | |
| "\n", | |
| "- JSON(JavaScript Object Notation)\uc740 \uc6f9\ube0c\ub77c\uc6b0\uc800\uc640 \ub2e4\ub978 \uc560\ud50c\ub9ac\ucf00\uc774\uc158\uc774 HTTP \uc694\uccad\uc73c\ub85c \ub370\uc774\ud130\ub97c \ubcf4\ub0bc \ub54c \ub110\ub9ac \uc0ac\uc6a9\ud558\ub294 \ud45c\uc900 \ud30c\uc77c \ud615\uc2dd \uc911 \ud558\ub098\ub2e4.\n", | |
| "- JSON\uc740 CSV \uac19\uc740 \ud45c \ud615\uc2dd\uc758 \ud14d\uc2a4\ud2b8\ubcf4\ub2e4 \uc880 \ub354 \uc720\uc5f0\ud55c \ub370\uc774\ud130 \ud615\uc2dd\uc774\uba70, JSON \ub370\uc774\ud130\uc758 \uc608\ub294 \ub2e4\uc74c\uacfc \uac19\ub2e4." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "# json\uc740 python\uc5d0\uc11c\ucc98\ub7fc '\uc73c\ub85c \ud558\uba74 \uc548\ub41c\ub2e4. \ud604\uc7ac \"\"\"\ub85c \uac10\uc2f8 \ubb38\uc790\uc5f4\ub85c \uc800\uc7a5\ub418\uc5b4 \uc788\uae30 \ub54c\ubb38\uc5d0 javascript\uc5d0\uc11c\ub294 '\ub97c string \uac12\uc73c\ub85c \uc778\uc2dd\ud558\uc9c0 \uc54a\uc544\uc11c \uc5d0\ub7ec \ubc1c\uc0dd\n", | |
| "obj = \"\"\"\n", | |
| "{\n", | |
| " 'name': 'Wes',\n", | |
| " 'places_lived': ['United States', 'Spain', 'Germany'],\n", | |
| " 'pet': null, 'siblings': [{'name': 'Scott', 'age':25, 'pet':'Zuko'},\n", | |
| " {'name': 'Katie', 'age':33, 'pet': 'Cisco'}]\n", | |
| "}\n", | |
| "\"\"\"" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 92 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "# ValueError: Expecting property name: line 3 column 5 (char 7)\n", | |
| "result = json.loads(obj)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "ename": "NameError", | |
| "evalue": "name 'json' is not defined", | |
| "output_type": "pyerr", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-93-f05e1f9794f2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# ValueError: Expecting property name: line 3 column 5 (char 7)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloads\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
| "\u001b[0;31mNameError\u001b[0m: name 'json' is not defined" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 93 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "obj = \"\"\"\n", | |
| "{\n", | |
| " \"name\": \"Wes\",\n", | |
| " \"places_lived\": [\"United States\", \"Spain\", \"Germany\"],\n", | |
| " \"pet\": null, \"siblings\": [{\"name\": \"Scott\", \"age\":25, \"pet\":\"Zuko\"},\n", | |
| " {\"name\": \"Katie\", \"age\":33, \"pet\": \"Cisco\"}]\n", | |
| "}\n", | |
| "\"\"\"" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 94 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "obj" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 95, | |
| "text": [ | |
| "'\\n{\\n \"name\": \"Wes\",\\n \"places_lived\": [\"United States\", \"Spain\", \"Germany\"],\\n \"pet\": null, \"siblings\": [{\"name\": \"Scott\", \"age\":25, \"pet\":\"Zuko\"},\\n {\"name\": \"Katie\", \"age\":33, \"pet\": \"Cisco\"}]\\n}\\n'" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 95 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### JSON\uc740 \ub110 \uac12\uc778 null\uacfc \ub2e4\ub978 \uba87 \uac00\uc9c0 \uc0ac\uc18c\ud55c \uc8fc\uc758\uc0ac\ud56d(\ub9ac\uc2a4\ud2b8\uc758 \ub9c8\uc9c0\ub9c9\uc5d0 \uc27c\ud45c\uac00 \uc788\uc73c\uba74 \uc548\ub41c\ub2e4\ub358\uac00 \ud558\ub294)\uc744 \uc81c\uc678\ud558\uba74 \ud30c\uc774\uc36c \ucf54\ub4dc\uc640 \uac70\uc758 \uc720\uc0ac\n", | |
| "\n", | |
| "- \uae30\ubcf8 \uc790\ub8cc\ud615\uc740 \uac1d\uccb4(\uc0ac\uc804), \ubc30\uc5f4(\ub9ac\uc2a4\ud2b8), \ubb38\uc790\uc5f4, \uc22b\uc790, \ubd88\ub9ac\uc5b8 \uadf8\ub9ac\uace0 \ub110\n", | |
| "- \uac1d\uccb4\uc758 \ud0a4\ub294 \ubc18\ub4dc\uc2dc \ubb38\uc790\uc5f4\n", | |
| "- JSON \uc77d\uace0 \uc4f8 \uc218 \uc788\ub294 \ub77c\uc774\ube0c\ub7ec\ub9ac\uac00 \uba87 \uac1c \uc788\uc9c0\ub9cc \ud45c\uc900 \ub77c\uc774\ube0c\ub7ec\ub9ac\uc778 json \uc0ac\uc6a9" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "import json" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 96 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "# ValueError: Expecting property name: line 3 column 5 (char 7)\n", | |
| "result = json.loads(obj)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 97 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "result" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 98, | |
| "text": [ | |
| "{u'name': u'Wes',\n", | |
| " u'pet': None,\n", | |
| " u'places_lived': [u'United States', u'Spain', u'Germany'],\n", | |
| " u'siblings': [{u'age': 25, u'name': u'Scott', u'pet': u'Zuko'},\n", | |
| " {u'age': 33, u'name': u'Katie', u'pet': u'Cisco'}]}" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 98 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### json.dumps\ub294 \ud30c\uc774\uc36c \uac1d\uccb4\ub97c JSON \ud615\ud0dc\ub85c \ubcc0\ud658" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "asjson = json.dumps(result)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 99 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "# '\uac00 \uc544\ub2c8\ub77c \"\uc778 \uac83\uc744 \ud655\uc778\ud558\uc790\n", | |
| "asjson" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 100, | |
| "text": [ | |
| "'{\"pet\": null, \"siblings\": [{\"pet\": \"Zuko\", \"age\": 25, \"name\": \"Scott\"}, {\"pet\": \"Cisco\", \"age\": 33, \"name\": \"Katie\"}], \"name\": \"Wes\", \"places_lived\": [\"United States\", \"Spain\", \"Germany\"]}'" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 100 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### JSON \uac1d\uccb4\ub098 \uac1d\uccb4\uc758 \ub9ac\uc2a4\ud2b8\ub97c DataFrame\uc774\ub098 \ub2e4\ub978 \uc790\ub8cc \uad6c\uc870\ub85c \uc5b4\ub5bb\uac8c \ubcc0\ud658\ud574\uc11c \ubd84\uc11d\uc744 \ud560 \uac83\uc778\uc9c0\ub294 \ub3c5\uc790\uc758 \ubaab\n", | |
| "\n", | |
| "- JSON \uac1d\uccb4\uc758 \ub9ac\uc2a4\ud2b8\ub97c DataFrame \uc0dd\uc131\uc790\ub85c \ub118\uae30\uace0 \ub370\uc774\ud130 \ud544\ub4dc \uc120\ud0dd \uac00\ub2a5" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "siblings = DataFrame(result['siblings'], columns=['name', 'age'])" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 101 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "siblings" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>name</th>\n", | |
| " <th>age</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> Scott</td>\n", | |
| " <td> 25</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> Katie</td>\n", | |
| " <td> 33</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 102, | |
| "text": [ | |
| " name age\n", | |
| "0 Scott 25\n", | |
| "1 Katie 33" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 102 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### pandas\uc5d0\uc11c JSON\uc744 \ube60\ub974\uac8c \uc77d\uace0(from_json) \uc4f0\ub294(to_json) \ub124\uc774\ud2f0\ube0c \uad6c\ud604\uc911" | |
| ] | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 3, | |
| "metadata": {}, | |
| "source": [ | |
| "6.1.5 XML\uacfc HTML: \uc6f9 \ub0b4\uc6a9 \uae01\uc5b4\uc624\uae30" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### lxml\n", | |
| "\n", | |
| "- \uc544\uc8fc \ud070 \ud30c\uc77c\uc744 \ube60\ub974\uac8c \ucc98\ub9ac \uac00\ub2a5\n", | |
| "- \uc5ec\ub7ec \uc885\ub958\uc758 \uc778\ud130\ud398\uc774\uc2a4 \uc81c\uacf5\n", | |
| "- lxml.html, lxml.objectify\ub97c \uc774\uc6a9\ud574\uc11c XML \ucc98\ub9ac\n", | |
| "\n", | |
| "#### \ub300\ubd80\ubd84\uc758 \uc6f9\uc0ac\uc774\ud2b8\ub294 \ub531 \ud544\uc694\ud55c \ub0b4\uc6a9\ub9cc \ub4e4\uc5b4\uc788\ub294 JSON\uc774\ub098 XML\uc744 \ub9ce\uc774 \uc0ac\uc6a9\ud558\uc9c0 \uc54a\uace0 HTML\uc744 \uc0ac\uc6a9" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "from lxml.html import parse\n", | |
| "from urllib2 import urlopen\n", | |
| "\n", | |
| "# \ub370\uc774\ud130\ub97c \uac00\uc838 \uc62c url\uc744 \ub118\uae34 \ud6c4\n", | |
| "# \ub370\uc774\ud130\ub97c \ubc1b\uc544 \uc628 \ud6c4 parse\n", | |
| "parsed = parse(urlopen('http://finance.yahoo.com/q/op?s=AAPL+Options'))\n", | |
| "\n", | |
| "doc = parsed.getroot()" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 103 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### doc \uac1d\uccb4\uc5d0\ub294 \ubaa8\ub4e0 HTML \ud0dc\uadf8 \ucd94\ucd9c\n", | |
| "\n", | |
| "- \uc6b0\ub9ac\uac00 \uad00\uc2ec \uac00\uc838\uc57c \ud560 table \ud0dc\uadf8\ub3c4 \ud3ec\ud568\n", | |
| "- \uc5b4\ub5bb\uac8c \ub3d9\uc791\ud558\ub294\uc9c0 \ud655\uc778\ud558\uae30 \uc704\ud574 \uae01\uc5b4\uc628 HTML \ubb38\uc11c\uc5d0\uc11c \uc678\ubd80 \uc5f0\uacb0 URL\uc744 \ubaa8\ub450 \ucc3e\uc544\ubcf4\uc790.\n", | |
| "- \uc678\ubd80 \uc5f0\uacb0\uc740 a \ud0dc\uadf8\ub85c \uc9c0\uc815\n", | |
| "- findall \uba54\uc11c\ub4dc\uc5d0 XPath(\ubb38\uc11c \uc9c8\uc758 \uc5b8\uc5b4)\ub97c \uc0ac\uc6a9\ud574\uc11c \ud574\ub2f9 \uc5d8\ub9ac\uba3c\ud2b8\ub97c \uac00\uc838\uc62c \uc218 \uc788\ub2e4.\n", | |
| "\n", | |
| "#### XPath tutorial site\n", | |
| "\n", | |
| "- [W3schools](http://www.w3schools.com/XPath/)\n", | |
| "- [XPath and XSLT with lxml](http://lxml.de/xpathxslt.html)\n", | |
| "- [Using Chrome Developer Tools](http://stackoverflow.com/questions/3030487/is-there-a-way-to-get-the-xpath-in-google-chrome)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "links = doc.findall('.//a')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 104 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "links[15:20]" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 105, | |
| "text": [ | |
| "[<Element a at 0x109d23590>,\n", | |
| " <Element a at 0x109d235f0>,\n", | |
| " <Element a at 0x109d23650>,\n", | |
| " <Element a at 0x109d236b0>,\n", | |
| " <Element a at 0x109d23710>]" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 105 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### \uc774 \uac1d\uccb4\ub294 HTML \uc5d8\ub9ac\uba3c\ud2b8\ub97c \ud45c\ud604\ud558\ub294 \uac1d\uccb4\uc77c \ubfd0\n", | |
| "\n", | |
| "- \uc5d8\ub9ac\uba3c\ud2b8\ub97c \ud45c\ud604\ud558\ub294 \uac1d\uccb4\ub77c\uace0 \uc0dd\uac01\ud558\uc790. \uc548 \uadf8\ub7ec\uba74 \uc0bd\uc9c8\ud558\uac8c \ub41c\ub2e4!\n", | |
| "- URL\uacfc \ub9c1\ud06c \uc774\ub984\uc744 \uac00\uc838\uc624\ub824\uba74 \uac01 \uc5d8\ub9ac\uba3c\ud2b8\uc5d0 \ub300\ud574 get \uba54\uc11c\ub4dc\ub97c \ud638\ucd9c\ud558\uc5ec URL\uc744 \uc5bb\uace0, text_content \uba54\uc11c\ub4dc\ub97c \uc774\uc6a9\ud574\uc11c \ub9c1\ud06c \uc774\ub984\uc744 \uac00\uc838\uc640\uc57c \ud55c\ub2e4." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "lnk = links[28]" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 106 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "lnk" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 107, | |
| "text": [ | |
| "<Element a at 0x109d23a70>" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 107 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "lnk.get('href')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 108, | |
| "text": [ | |
| "'https://edit.yahoo.com/mc2.0/eval_profile?.intl=us&.lang=en-US&.done=http://finance.yahoo.com/q/op%3fs=AAPL%2bOptions&.src=quote&.intl=us&.lang=en-US'" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 108 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "lnk.text_content()" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 109, | |
| "text": [ | |
| "'Account Info'" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 109 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### [list comprehensions in Python](http://www.pythonforbeginners.com/lists/list-comprehensions-in-python/)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "urls = [lnk.get('href') for lnk in doc.findall('.//a')]" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 110 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "urls[-10:]" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 111, | |
| "text": [ | |
| "['/q/op?s=AAPL&k=580.000000',\n", | |
| " '/q?s=AAPL140328P00580000',\n", | |
| " '/q/op?s=AAPL&k=600.000000',\n", | |
| " '/q?s=AAPL140328P00600000',\n", | |
| " '/q/os?s=AAPL&m=2014-03-28',\n", | |
| " 'http://help.yahoo.com/l/us/yahoo/finance/quotes/fitadelay.html',\n", | |
| " 'http://billing.finance.yahoo.com/realtime_quotes/signup?.src=quote&.refer=quote',\n", | |
| " 'http://www.capitaliq.com',\n", | |
| " 'http://www.csidata.com',\n", | |
| " 'http://www.morningstar.com/']" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 111 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### \ucc3e\uace0\uc790 \ud558\ub294 table \uc77c\uc77c\uc774 \ud655\uc778\n", | |
| "\n", | |
| "- \uba87\uba87 \uc6f9\uc0ac\uc774\ud2b8\ub294 table\ub9c8\ub2e4 id \uc18d\uc131\uc744 \uc918\uc11c \uc27d\uac8c \ud560 \uc218 \uc788\uc9c0\ub9cc \uc5b4\ub514 \uc138\uc0c1 \uc77c\uc774 \uc27d\uac8c \ub418\ub294\uac8c \uc788\ub098? \ub178\uac00\ub2e4 \ud574\uc57c\uc9c0.." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "tables = doc.findall('.//table')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 112 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "calls = tables[9]" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 113 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "puts = tables[13]" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 114 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "rows = calls.findall('.//tr')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 115 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### \uc6f9\ud398\uc774\uc9c0 \uad6c\uc870\uac00 \uc548 \ubc14\ub00c\uc5c8\ub124.\n", | |
| "\n", | |
| "- \ucc45\uc744 \uc4f4\uac8c 2012\ub144 10\uc6d4 29\uc77c\uc778\ub370 \uc544\uc9c1\uae4c\uc9c0 \uc548 \ubc14\ub00c\ub2e4\ub2c8..." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "def _unpack(row, kind='td'):\n", | |
| " elts = row.findall('.//%s' % kind)\n", | |
| " return [val.text_content() for val in elts]" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 116 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "_unpack(rows[0], kind='th')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 117, | |
| "text": [ | |
| "['Strike', 'Symbol', 'Last', 'Chg', 'Bid', 'Ask', 'Vol', 'Open Int']" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 117 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "_unpack(rows[1], kind='td')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 118, | |
| "text": [ | |
| "['440.00',\n", | |
| " 'AAPL7140328C00440000',\n", | |
| " '94.04',\n", | |
| " ' 0.00',\n", | |
| " '98.30',\n", | |
| " '102.15',\n", | |
| " '1',\n", | |
| " '1']" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 118 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### \ub2e8\uacc4\ub4e4\uc744 \ud1b5\ud569\ud558\uc5ec \uc6f9\uc5d0\uc11c \uae01\uc5b4\uc628 \ub370\uc774\ud130\ub97c DataFrame\uc73c\ub85c \ubcc0\ud658\n", | |
| "\n", | |
| "- \uc22b\uc790 \ub370\uc774\ud130\uc9c0\ub9cc \uc5ec\uc804\ud788 \ubb38\uc790\uc5f4 \ud615\uc2dd\uc73c\ub85c \uc800\uc7a5\ub418\uc5b4 \uc788\uc73c\ubbc0\ub85c \uc801\uc808\ud558\uac8c \ubcc0\ud658\uc744 \ud574\uc918\uc57c \ud558\ub294\ub370 \ubaa8\ub4e0 \ub370\uc774\ud130\uac00 \uc2e4\uc218\ud615\uc740 \uc544\ub2d0 \uac83\uc774\ubbc0\ub85c \uc774 \uc791\uc5c5\uc740 \uc218\ub3d9\uc73c\ub85c \ucc98\ub9ac\n", | |
| "- \ud558\uc9c0\ub9cc \uc6b4 \uc88b\uaca0\ub3c4 pandas\uc5d0\ub294 TextParser \ud074\ub798\uc2a4\uac00 \uc788\uc5b4 \uc790\ub3d9 \ud615 \ubcc0\ud658\uc744 \uc801\uc808\ud558\uac8c \uc218\ud589\ud574\uc900\ub2e4.\n", | |
| "- TextParser \ud074\ub798\uc2a4\ub294 read_csv \ud568\uc218\uc640 \ub2e4\ub978 \ud30c\uc2f1 \ud568\uc218\uc5d0\uc11c\ub3c4 \uc0ac\uc6a9" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "from pandas.io.parsers import TextParser\n", | |
| "\n", | |
| "def parse_options_data(table):\n", | |
| " rows = table.findall('.//tr')\n", | |
| " header = _unpack(rows[0], kind='th')\n", | |
| " data = [_unpack(r) for r in rows[1:]]\n", | |
| " return TextParser(data, names=header).get_chunk()" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 119 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### \ub9c8\uc9c0\ub9c9\uc73c\ub85c lxml \ud14c\uc774\ube14 \uac1d\uccb4\ub97c \uc704\uc5d0\uc11c \uc791\uc131\ud55c \ud30c\uc2f1 \ud568\uc218\ub97c \uc774\uc6a9\ud574\uc11c \ucc98\ub9ac\ud558\uba74 DataFrame \uacb0\uacfc\uac12 \uc5bb\uc744 \uc218 \uc788\ub2e4" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### [\uc635\uc158(\uae08\uc735) - wiki kr](http://ko.wikipedia.org/wiki/%EC%98%B5%EC%85%98_(%EA%B8%88%EC%9C%B5)\n", | |
| "\n", | |
| "- \uae08\uc735 \ub370\uc774\ud130\ub97c \ubd84\uc11d\ud558\ub294 \uac83\uc774\uae30 \ub54c\ubb38\uc5d0 \uae08\uc735 \ud30c\ud2b8\uc5d0 \ub300\ud55c \ub3c4\uba54\uc778 \uc9c0\uc2dd\uc774 \uc788\uc5b4\uc57c \ud55c\ub2e4. \ub0b4\uac00 \ubd84\uc11d\ud558\ub824\ub294 \ub370\uc774\ud130\uac00 \uc5b4\ub5a0\ud55c \uc5ed\ud560\uc744 \ud558\ub294\uc9c0 \ubaa8\ub974\uba74 \ub9d0\uc9f1 \ud669!\n", | |
| "- \uc635\uc158(option)\uc740 \ud30c\uc0dd \uc0c1\ud488\uc758 \uc77c\uc885\uc774\uba70, \ubbf8\ub9ac \uacb0\uc815\ub41c \uae30\uac04 \uc548\uc5d0 \ud2b9\uc815 \uc0c1\ud488\uc744 \uc815\ud574\uc9c4 \uac00\uaca9\uc73c\ub85c \uc0ac\uace0 \ud314 \uc218 \uc788\ub294 \uad8c\ub9ac\ub97c \ub9d0\ud55c\ub2e4. \n", | |
| "- call option: \ud2b9\uc815 \uae08\uc735 \uc0c1\ud488\uc744 \uc815\ud574\uc9c4 \uac00\uaca9\uc5d0 \ub9e4\uc785\ud560 \uc218 \uc788\ub294 \uad8c\ub9ac\ub97c \uac00\uc9c4 \ub9e4\uc785 \uc635\uc158(call option)\n", | |
| "- put option: \ub9e4\ub3c4\ud560 \uc218 \uc788\ub294 \uad8c\ub9ac\ub97c \uac00\uc9c4 \ub9e4\ub3c4 \uc635\uc158(put option)\uc73c\ub85c \ub098\ub25c\ub2e4." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "call_data = parse_options_data(calls)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 120 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "put_data = parse_options_data(puts)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 121 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "call_data[:10]" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Strike</th>\n", | |
| " <th>Symbol</th>\n", | |
| " <th>Last</th>\n", | |
| " <th>Chg</th>\n", | |
| " <th>Bid</th>\n", | |
| " <th>Ask</th>\n", | |
| " <th>Vol</th>\n", | |
| " <th>Open Int</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> 440.0</td>\n", | |
| " <td> AAPL7140328C00440000</td>\n", | |
| " <td> 94.04</td>\n", | |
| " <td> 0.00</td>\n", | |
| " <td> 98.30</td>\n", | |
| " <td> 102.15</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> 450.0</td>\n", | |
| " <td> AAPL140328C00450000</td>\n", | |
| " <td> 86.25</td>\n", | |
| " <td> 0.00</td>\n", | |
| " <td> 89.40</td>\n", | |
| " <td> 91.75</td>\n", | |
| " <td> 5</td>\n", | |
| " <td> 11</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> 450.0</td>\n", | |
| " <td> AAPL7140328C00450000</td>\n", | |
| " <td> 79.35</td>\n", | |
| " <td> 0.00</td>\n", | |
| " <td> 88.20</td>\n", | |
| " <td> 92.15</td>\n", | |
| " <td> 2</td>\n", | |
| " <td> 2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td> 460.0</td>\n", | |
| " <td> AAPL140328C00460000</td>\n", | |
| " <td> 69.52</td>\n", | |
| " <td> 0.00</td>\n", | |
| " <td> 78.50</td>\n", | |
| " <td> 81.80</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 3</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td> 470.0</td>\n", | |
| " <td> AAPL140328C00470000</td>\n", | |
| " <td> 71.75</td>\n", | |
| " <td> 2.78</td>\n", | |
| " <td> 69.55</td>\n", | |
| " <td> 70.90</td>\n", | |
| " <td> 16</td>\n", | |
| " <td> 78</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td> 480.0</td>\n", | |
| " <td> AAPL140328C00480000</td>\n", | |
| " <td> 68.05</td>\n", | |
| " <td> 17.45</td>\n", | |
| " <td> 59.65</td>\n", | |
| " <td> 61.75</td>\n", | |
| " <td> 11</td>\n", | |
| " <td> 32</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td> 485.0</td>\n", | |
| " <td> AAPL140328C00485000</td>\n", | |
| " <td> 62.35</td>\n", | |
| " <td> 4.41</td>\n", | |
| " <td> 54.50</td>\n", | |
| " <td> 56.80</td>\n", | |
| " <td> 5</td>\n", | |
| " <td> 6</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td> 485.0</td>\n", | |
| " <td> AAPL7140328C00485000</td>\n", | |
| " <td> 64.00</td>\n", | |
| " <td> 0.00</td>\n", | |
| " <td> 53.20</td>\n", | |
| " <td> 57.20</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td> 490.0</td>\n", | |
| " <td> AAPL140328C00490000</td>\n", | |
| " <td> 58.50</td>\n", | |
| " <td> 7.09</td>\n", | |
| " <td> 49.30</td>\n", | |
| " <td> 50.90</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 60</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td> 492.5</td>\n", | |
| " <td> AAPL140328C00492500</td>\n", | |
| " <td> 37.15</td>\n", | |
| " <td> 0.00</td>\n", | |
| " <td> 46.30</td>\n", | |
| " <td> 49.10</td>\n", | |
| " <td> 5</td>\n", | |
| " <td> 5</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 122, | |
| "text": [ | |
| " Strike Symbol Last Chg Bid Ask Vol Open Int\n", | |
| "0 440.0 AAPL7140328C00440000 94.04 0.00 98.30 102.15 1 1\n", | |
| "1 450.0 AAPL140328C00450000 86.25 0.00 89.40 91.75 5 11\n", | |
| "2 450.0 AAPL7140328C00450000 79.35 0.00 88.20 92.15 2 2\n", | |
| "3 460.0 AAPL140328C00460000 69.52 0.00 78.50 81.80 3 3\n", | |
| "4 470.0 AAPL140328C00470000 71.75 2.78 69.55 70.90 16 78\n", | |
| "5 480.0 AAPL140328C00480000 68.05 17.45 59.65 61.75 11 32\n", | |
| "6 485.0 AAPL140328C00485000 62.35 4.41 54.50 56.80 5 6\n", | |
| "7 485.0 AAPL7140328C00485000 64.00 0.00 53.20 57.20 0 1\n", | |
| "8 490.0 AAPL140328C00490000 58.50 7.09 49.30 50.90 1 60\n", | |
| "9 492.5 AAPL140328C00492500 37.15 0.00 46.30 49.10 5 5" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 122 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "put_data[:10]" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Strike</th>\n", | |
| " <th>Symbol</th>\n", | |
| " <th>Last</th>\n", | |
| " <th>Chg</th>\n", | |
| " <th>Bid</th>\n", | |
| " <th>Ask</th>\n", | |
| " <th>Vol</th>\n", | |
| " <th>Open Int</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> 430</td>\n", | |
| " <td> AAPL140328P00430000</td>\n", | |
| " <td> 0.01</td>\n", | |
| " <td> 0.00</td>\n", | |
| " <td> N/A</td>\n", | |
| " <td> 0.01</td>\n", | |
| " <td> 6</td>\n", | |
| " <td> 164</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> 440</td>\n", | |
| " <td> AAPL140328P00440000</td>\n", | |
| " <td> 0.01</td>\n", | |
| " <td> 0.00</td>\n", | |
| " <td> N/A</td>\n", | |
| " <td> 0.01</td>\n", | |
| " <td> 105</td>\n", | |
| " <td> 236</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> 440</td>\n", | |
| " <td> AAPL7140328P00440000</td>\n", | |
| " <td> 0.18</td>\n", | |
| " <td> 0.00</td>\n", | |
| " <td> N/A</td>\n", | |
| " <td> 0.58</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td> 450</td>\n", | |
| " <td> AAPL140328P00450000</td>\n", | |
| " <td> 0.01</td>\n", | |
| " <td> 0.00</td>\n", | |
| " <td> N/A</td>\n", | |
| " <td> 0.01</td>\n", | |
| " <td> 27</td>\n", | |
| " <td> 771</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td> 460</td>\n", | |
| " <td> AAPL140328P00460000</td>\n", | |
| " <td> 0.01</td>\n", | |
| " <td> 0.01</td>\n", | |
| " <td> N/A</td>\n", | |
| " <td> 0.01</td>\n", | |
| " <td> 10</td>\n", | |
| " <td> 281</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td> 460</td>\n", | |
| " <td> AAPL7140328P00460000</td>\n", | |
| " <td> 0.15</td>\n", | |
| " <td> 0.00</td>\n", | |
| " <td> N/A</td>\n", | |
| " <td> 0.07</td>\n", | |
| " <td> 20</td>\n", | |
| " <td> 20</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td> 470</td>\n", | |
| " <td> AAPL140328P00470000</td>\n", | |
| " <td> 0.01</td>\n", | |
| " <td> 0.01</td>\n", | |
| " <td> N/A</td>\n", | |
| " <td> 0.02</td>\n", | |
| " <td> 2</td>\n", | |
| " <td> 493</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td> 480</td>\n", | |
| " <td> AAPL140328P00480000</td>\n", | |
| " <td> 0.02</td>\n", | |
| " <td> 0.00</td>\n", | |
| " <td> N/A</td>\n", | |
| " <td> 0.02</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 1,065</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td> 485</td>\n", | |
| " <td> AAPL140328P00485000</td>\n", | |
| " <td> 0.01</td>\n", | |
| " <td> 0.02</td>\n", | |
| " <td> 0.01</td>\n", | |
| " <td> 0.02</td>\n", | |
| " <td> 63</td>\n", | |
| " <td> 781</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td> 485</td>\n", | |
| " <td> AAPL7140328P00485000</td>\n", | |
| " <td> 0.21</td>\n", | |
| " <td> 0.00</td>\n", | |
| " <td> N/A</td>\n", | |
| " <td> 0.26</td>\n", | |
| " <td> 10</td>\n", | |
| " <td> 10</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 123, | |
| "text": [ | |
| " Strike Symbol Last Chg Bid Ask Vol Open Int\n", | |
| "0 430 AAPL140328P00430000 0.01 0.00 N/A 0.01 6 164\n", | |
| "1 440 AAPL140328P00440000 0.01 0.00 N/A 0.01 105 236\n", | |
| "2 440 AAPL7140328P00440000 0.18 0.00 N/A 0.58 1 1\n", | |
| "3 450 AAPL140328P00450000 0.01 0.00 N/A 0.01 27 771\n", | |
| "4 460 AAPL140328P00460000 0.01 0.01 N/A 0.01 10 281\n", | |
| "5 460 AAPL7140328P00460000 0.15 0.00 N/A 0.07 20 20\n", | |
| "6 470 AAPL140328P00470000 0.01 0.01 N/A 0.02 2 493\n", | |
| "7 480 AAPL140328P00480000 0.02 0.00 N/A 0.02 1 1,065\n", | |
| "8 485 AAPL140328P00485000 0.01 0.02 0.01 0.02 63 781\n", | |
| "9 485 AAPL7140328P00485000 0.21 0.00 N/A 0.26 10 10" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 123 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### lxml.objectify \uc774\uc6a9\ud574 XML \ud30c\uc2f1\ud558\uae30\n", | |
| "\n", | |
| "- [XML(eXtensible Markup Language)](http://en.wikipedia.org/wiki/Xml)\uc740 \uacc4\uce35\uc801 \uad6c\uc870\uc640 \uba54\ud0c0\ub370\uc774\ud130\ub97c \ud3ec\ud568\ud558\ub294 \uc911\ucca9\ub41c \ub370\uc774\ud130 \uad6c\uc870\ub97c \uc9c0\uc6d0\ud558\ub294 \ub610 \ub2e4\ub978 \uc720\uba85\ud55c \ub370\uc774\ud130 \ud615\uc2dd\uc774\ub2e4. \uc9c0\uae08 \uc774 \ucc45\ub3c4 \uc2e4\uc81c\ub85c\ub294 XML \ubb38\uc11c\ub85c \uc791\uc131\n", | |
| "- \ub274\uc695 MTA(Metropolitan Transportation Authority)\ub294 \ubc84\uc2a4\uc640 \uc804\ucca0 \uc6b4\uc601\uc5d0 \uad00\ud55c \uc5ec\ub7ec \uac00\uc9c0 [\ub370\uc774\ud130 \uacf5\uac1c](http://www.mta.info/developers/download.html)\n", | |
| "- \uc0b4\ud3b4\ubcfc \uac83\uc740 \uc5ec\ub7ec XML \ud30c\uc77c\ub85c \uc81c\uacf5\ub418\ub294 \uc2e4\uc801 \uc790\ub8cc\n", | |
| "- \uc804\ucca0\uacfc \ubc84\uc2a4 \uc6b4\uc601\uc740 \ub9e4\uc6d4 \uc544\ub798\uc640 \ube44\uc2b7\ud55c \ub0b4\uc6a9\uc758 \uac01\uac01 \ub2e4\ub978 \ud30c\uc77c(Metro-North Railroad\uc758 \uacbd\uc6b0 Preformance_MNR.xml \uac19\uc740)\ub85c \uc81c\uacf5" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### \uc9dc\uc99d\ub098\uac8c Performance_MNR.xml\uc744 \uc5b4\ub5bb\uac8c \ubc1b\ub294\uc9c0 \ubaa8\ub974\uaca0\ub2e4.\n", | |
| "\n", | |
| "- \uc18c\uc2a4\ud30c\uc77c\uc744 6\uc7a5\uc744 \ub4a4\uc838\ubd10\ub3c4 \uc5c6\uace0\n", | |
| "- \ud648\ud398\uc774\uc9c0\uc5d0\ub294 \uc544\ub9c8 XML \uad6c\uc870\uac00 \ubc14\ub010\ub4ef \uc2f6\ub2e4.\n", | |
| "- \uadf8\ub798\uc11c \ucd5c\ud6c4\uc758 \uc218\ub2e8\uc73c\ub85c \uc5bc\ub9c8 \uc548\ub418\uc11c \uadf8\ub0e5 \ub0b4\uac00 \uc77c\uc77c\uc774 \ucce4\ub2e4.\n", | |
| "- XML\uc740 \uc5c4\uaca9\ud558\uae30 \ub54c\ubb38\uc5d0 \ud558\ub098\ub77c\ub3c4 \uc624\ud0c0\uac00 \uc788\uc73c\uba74 \uc624\ub958 \ubc1c\uc0dd\ud558\ubbc0\ub85c \uc8fc\uc758!" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "%%writefile Performance_MNR.xml\n", | |
| "<INDICATOR>\n", | |
| " <INDICATOR_SEQ>373889</INDICATOR_SEQ>\n", | |
| " <PARENT_SEQ></PARENT_SEQ>\n", | |
| " <AGENCY_NAME>MEtro-North Railroad</AGENCY_NAME>\n", | |
| " <INDICATOR_NAME>Escalator Availability</INDICATOR_NAME>\n", | |
| " <DESCRIPTION>Percent of the time that escalators are operational systemwide. The availability rate is based on physical observations performed the morning of regular business days only. This is a new indicator the agency began reporting in 2009.</DESCRIPTION>\n", | |
| " <PERIOD_YEAR>2011</PERIOD_YEAR>\n", | |
| " <PERIOD_MONTH>12</PERIOD_MONTH>\n", | |
| " <CATEGORY>Service Indicators</CATEGORY>\n", | |
| " <FREQUENCY>M</FREQUENCY>\n", | |
| " <DESIRED_CHANGE>U</DESIRED_CHANGE>\n", | |
| " <INDICATOR_UNIT>%</INDICATOR_UNIT>\n", | |
| " <DECIMAL_PLACES>1</DECIMAL_PLACES>\n", | |
| " <YTD_TARGET>97.00</YTD_TARGET>\n", | |
| " <YTD_ACTUAL></YTD_ACTUAL>\n", | |
| " <MONTHLY_TARGET>97.00</MONTHLY_TARGET>\n", | |
| " <MONTHLY_ACTUAL></MONTHLY_ACTUAL>\n", | |
| "</INDICATOR>" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Overwriting Performance_MNR.xml\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 124 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "from lxml import objectify\n", | |
| "import urllib2\n", | |
| "\n", | |
| "path = 'Performance_MNR.xml'\n", | |
| "# online_path = 'http://www.mta.info/developers/data/lirr/lirr_gtfs.xml'\n", | |
| "\n", | |
| "# data = urllib2.urlopen(online_path).read()\n", | |
| "# f = open(path, 'w')\n", | |
| "# f.write(data)\n", | |
| "# f.close()\n", | |
| "\n", | |
| "parsed = objectify.parse(open(path))\n", | |
| "root = parsed.getroot()" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 125 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "data = []" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 126 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "skip_fields = ['PARENT_SEQ', 'INDICATOR_SEQ',\n", | |
| " 'DESIRED_CHANGE', 'DECIMAL_PLACES']" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 127 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### root.INDICATOR\ub97c \ud1b5\ud574 \ubaa8\ub4e0 <INDICATOR> \uc5d8\ub9ac\uba3c\ud2b8\ub97c \ub044\uc9d1\uc5b4 \ub0bc \uc218 \uc788\ub2e4\n", | |
| "\n", | |
| "- \uac01\uac01\uc758 \ud56d\ubaa9\uc5d0 \ub300\ud574 \uba87\uba87 \ud0dc\uadf8\ub294 \uc81c\uc678\ud558\uace0 \ud0dc\uadf8 \uc774\ub984(YTD_ACTUAL \uac19\uc740)\uc744 \ud0a4 \uac12\uc73c\ub85c \ud558\ub294 \uc0ac\uc804\uc744 \ub9cc\ub4e4\uc5b4 \ub0c4" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "# root.INDICATOR -> root\n", | |
| "\n", | |
| "for elt in root:\n", | |
| " el_data = {}\n", | |
| " for child in elt.getchildren():\n", | |
| " if child.tag in skip_fields:\n", | |
| " continue\n", | |
| " el_data[child.tag] = child.pyval\n", | |
| " data.append(el_data)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 128 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "data" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 129, | |
| "text": [ | |
| "[{'AGENCY_NAME': 'MEtro-North Railroad',\n", | |
| " 'CATEGORY': 'Service Indicators',\n", | |
| " 'DESCRIPTION': 'Percent of the time that escalators are operational systemwide. The availability rate is based on physical observations performed the morning of regular business days only. This is a new indicator the agency began reporting in 2009.',\n", | |
| " 'FREQUENCY': 'M',\n", | |
| " 'INDICATOR_NAME': 'Escalator Availability',\n", | |
| " 'INDICATOR_UNIT': '%',\n", | |
| " 'MONTHLY_ACTUAL': u'',\n", | |
| " 'MONTHLY_TARGET': 97.0,\n", | |
| " 'PERIOD_MONTH': 12,\n", | |
| " 'PERIOD_YEAR': 2011,\n", | |
| " 'YTD_ACTUAL': u'',\n", | |
| " 'YTD_TARGET': 97.0}]" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 129 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "# \uc704\uc758 \uac12\uacfc \ube44\uad50\ud558\uae30 \uc704\ud574 \ud14c\uc2a4\ud2b8 \ud574\ubcf8 \uac83\n", | |
| "for elt in root:\n", | |
| " for child in elt.getchildren():\n", | |
| " print child.tag, child.pyval" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "INDICATOR_SEQ 373889\n", | |
| "PARENT_SEQ \n", | |
| "AGENCY_NAME MEtro-North Railroad\n", | |
| "INDICATOR_NAME Escalator Availability\n", | |
| "DESCRIPTION Percent of the time that escalators are operational systemwide. The availability rate is based on physical observations performed the morning of regular business days only. This is a new indicator the agency began reporting in 2009.\n", | |
| "PERIOD_YEAR 2011\n", | |
| "PERIOD_MONTH 12\n", | |
| "CATEGORY Service Indicators\n", | |
| "FREQUENCY M\n", | |
| "DESIRED_CHANGE U\n", | |
| "INDICATOR_UNIT %\n", | |
| "DECIMAL_PLACES 1\n", | |
| "YTD_TARGET 97.0\n", | |
| "YTD_ACTUAL \n", | |
| "MONTHLY_TARGET 97.0\n", | |
| "MONTHLY_ACTUAL \n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 130 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### 5\uc7a5\uc5d0\uc11c \uc0ac\uc804 \ud615\uc2dd\uc740 DataFrame\uc73c\ub85c \ubcc0\ud658\ud560 \uc218 \uc788\ub2e4\ub294 \uac83 \ucc38\uace0" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "perf = DataFrame(data)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 131 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "perf" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>AGENCY_NAME</th>\n", | |
| " <th>CATEGORY</th>\n", | |
| " <th>DESCRIPTION</th>\n", | |
| " <th>FREQUENCY</th>\n", | |
| " <th>INDICATOR_NAME</th>\n", | |
| " <th>INDICATOR_UNIT</th>\n", | |
| " <th>MONTHLY_ACTUAL</th>\n", | |
| " <th>MONTHLY_TARGET</th>\n", | |
| " <th>PERIOD_MONTH</th>\n", | |
| " <th>PERIOD_YEAR</th>\n", | |
| " <th>YTD_ACTUAL</th>\n", | |
| " <th>YTD_TARGET</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> MEtro-North Railroad</td>\n", | |
| " <td> Service Indicators</td>\n", | |
| " <td> Percent of the time that escalators are operat...</td>\n", | |
| " <td> M</td>\n", | |
| " <td> Escalator Availability</td>\n", | |
| " <td> %</td>\n", | |
| " <td> </td>\n", | |
| " <td> 97</td>\n", | |
| " <td> 12</td>\n", | |
| " <td> 2011</td>\n", | |
| " <td> </td>\n", | |
| " <td> 97</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 132, | |
| "text": [ | |
| " AGENCY_NAME CATEGORY \\\n", | |
| "0 MEtro-North Railroad Service Indicators \n", | |
| "\n", | |
| " DESCRIPTION FREQUENCY \\\n", | |
| "0 Percent of the time that escalators are operat... M \n", | |
| "\n", | |
| " INDICATOR_NAME INDICATOR_UNIT MONTHLY_ACTUAL MONTHLY_TARGET \\\n", | |
| "0 Escalator Availability % 97 \n", | |
| "\n", | |
| " PERIOD_MONTH PERIOD_YEAR YTD_ACTUAL YTD_TARGET \n", | |
| "0 12 2011 97 " | |
| ] | |
| } | |
| ], | |
| "prompt_number": 132 | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 2, | |
| "metadata": {}, | |
| "source": [ | |
| "6.2 \uc774\uc9c4 \ub370\uc774\ud130 \ud615\uc2dd" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### \ub370\uc774\ud130\ub97c \ud6a8\uc728\uc801\uc73c\ub85c \uc800\uc7a5\ud558\ub294 \uac00\uc7a5 \uc190\uc26c\uc6b4 \ubc29\ubc95\n", | |
| "\n", | |
| "- \ud30c\uc774\uc36c\uc5d0 \uae30\ubcf8\uc73c\ub85c \ub0b4\uc7a5\ub418\uc5b4 \uc788\ub294 pickle \uc9c1\ub82c\ud654\ub97c \ud1b5\ud574 \ub370\uc774\ud130\ub97c \uc774\uc9c4 \ud615\uc2dd\uc73c\ub85c \uc800\uc7a5\ud558\ub294 \uac83\n", | |
| "- \ud3b8\ub9ac\ud558\uac8c\ub3c4 pandas\uc758 \uac1d\uccb4\ub294 \ubaa8\ub450 pickle\uc744 \uc774\uc6a9\ud574\uc11c \ub370\uc774\ud130\ub97c \uc800\uc7a5\ud558\ub294 save \uba54\uc11c\ub4dc \uc788\uc74c" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "frame = pd.read_csv('ch06/ex1.csv')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 133 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "frame" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " <th>c</th>\n", | |
| " <th>d</th>\n", | |
| " <th>message</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> 1</td>\n", | |
| " <td> 2</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 4</td>\n", | |
| " <td> hello</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> 5</td>\n", | |
| " <td> 6</td>\n", | |
| " <td> 7</td>\n", | |
| " <td> 8</td>\n", | |
| " <td> world</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> 9</td>\n", | |
| " <td> 10</td>\n", | |
| " <td> 11</td>\n", | |
| " <td> 12</td>\n", | |
| " <td> foo</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 134, | |
| "text": [ | |
| " a b c d message\n", | |
| "0 1 2 3 4 hello\n", | |
| "1 5 6 7 8 world\n", | |
| "2 9 10 11 12 foo" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 134 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "frame.save('ch06/frame_pickle')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stderr", | |
| "text": [ | |
| "/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/core/generic.py:720: FutureWarning: save is deprecated, use to_pickle\n", | |
| " warnings.warn(\"save is deprecated, use to_pickle\", FutureWarning)\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 135 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "pd.load('ch06/frame_pickle')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stderr", | |
| "text": [ | |
| "/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/core/common.py:2306: FutureWarning: load is deprecated, use read_pickle\n", | |
| " warnings.warn(\"load is deprecated, use read_pickle\", FutureWarning)\n" | |
| ] | |
| }, | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " <th>c</th>\n", | |
| " <th>d</th>\n", | |
| " <th>message</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> 1</td>\n", | |
| " <td> 2</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 4</td>\n", | |
| " <td> hello</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> 5</td>\n", | |
| " <td> 6</td>\n", | |
| " <td> 7</td>\n", | |
| " <td> 8</td>\n", | |
| " <td> world</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> 9</td>\n", | |
| " <td> 10</td>\n", | |
| " <td> 11</td>\n", | |
| " <td> 12</td>\n", | |
| " <td> foo</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 136, | |
| "text": [ | |
| " a b c d message\n", | |
| "0 1 2 3 4 hello\n", | |
| "1 5 6 7 8 world\n", | |
| "2 9 10 11 12 foo" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 136 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### pickle \uc0ac\uc6a9\uc2dc \uc8fc\uc758\uc0ac\ud56d\n", | |
| "\n", | |
| "- pickle\uc740 \uc624\ub798 \ubcf4\uad00\ud560 \ud544\uc694\uac00 \uc5c6\ub294 \ub370\uc774\ud130\uc5d0\ub9cc \ucd94\ucc9c\n", | |
| "- \uc624\ub79c \uc2dc\uac04\uc774 \uc9c0\ub098\ub3c4 \uc548\uc815\uc801\uc73c\ub85c \ub370\uc774\ud130\ub97c \uc800\uc7a5\ud560 \uac70\ub77c\uace0 \ubcf4\uc7a5 \ubabb\ud568" | |
| ] | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 3, | |
| "metadata": {}, | |
| "source": [ | |
| "6.2.1 HDF5 \ud615\uc2dd \uc0ac\uc6a9\ud558\uae30" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "- \ub514\uc2a4\ud06c\uc5d0 \uc774\uc9c4 \ud615\uc2dd\uc73c\ub85c \uc800\uc7a5\ub41c \ub300\uc6a9\ub7c9\uc758 \uacfc\ud559 \uc790\ub8cc\ub97c \ud6a8\uc728\uc801\uc73c\ub85c \uc77d\uace0 \uc4f8 \uc218 \uc788\ub294 \ub2e4\uc591\ud55c \ub3c4\uad6c \uc874\uc7ac\n", | |
| "- \uc0b0\uc5c5 \uae30\uc900\uc5d0 \ub9de\ub294 \uc778\uae30 \ub77c\uc774\ube0c\ub7ec\ub9ac\uc911 \ud558\ub098\uac00 HDF5(Hierarchical Data Format), \uacc4\uce35\uc801 \ub370\uc774\ud130 \ud615\uc2dd\n", | |
| "- \ub0b4\ubd80\uc801\uc73c\ub85c \ud30c\uc77c \uc2dc\uc2a4\ud15c \uac19\uc740 \ub178\ub4dc \uad6c\uc870\n", | |
| "- \uc5ec\ub7ec \uac1c\uc758 \ub370\uc774\ud130\uc14b\uc744 \uc800\uc7a5\ud558\uace0 \ubd80\uac00 \uc815\ubcf4 \uae30\ub85d \uac00\ub2a5\n", | |
| "- \ub2e4\uc591\ud55c \uc555\ucd95 \uae30\uc220\uc744 \uc0ac\uc6a9\ud574\uc11c on-the-fly(\uc2e4\uc2dc\uac04) \uc555\ucd95 \uc9c0\uc6d0\n", | |
| "- \ubc18\ubcf5\ub418\ub294 \ud328\ud134\uc744 \uac00\uc9c4 \ub370\uc774\ud130 \uc880 \ub354 \ud6a8\uacfc\uc801 \uc800\uc7a5\n", | |
| "- \uba54\ubaa8\ub9ac\uc5d0 \ubaa8\ub450 \uc801\uc7ac\ud560 \uc218 \uc5c6\ub294 \uc5c4\u314a\uc5b4\ub098\uac8c \ud070 \ub370\uc774\ud130\ub97c \uc544\uc8fc \ud070 \ubc30\uc5f4\uc5d0\uc11c \ud544\uc694\ud55c \ub9cc\ud07c\uc758 \uc791\uc740 \ubd80\ubd84\ub4e4\ub9cc \ud6a8\uacfc\uc801\uc73c\ub85c \uc77d\uace0 \uc4f8 \uc218 \uc788\ub294 \ud6cc\ub96d\ud55c \uc120\ud0dd\n", | |
| "\n", | |
| "- PyTables: HDF5\ub97c \ucd94\uc0c1\ud654\ud558\uc5ec \uc5ec\ub7ec\uac00\uc9c0 \uc720\uc5f0\ud55c \ub370\uc774\ud130 \ucee8\ud14c\uc774\ub108\uc640 \ud14c\uc774\ube14 \uc0c9\uc778, \uc9c8\uc758 \uae30\ub2a5 \uadf8\ub9ac\uace0 \uc678\ubd80 \uba54\ubaa8\ub9ac \uc5f0\uc0b0(out-of-core, external memory algorithm) \uc9c0\uc6d0\n", | |
| "- h5py: \uc9c1\uc811\uc801\uc774\uc9c0\ub9cc \uace0\uc218\uc900\uc758 HDF5 API\uc5d0 \ub300\ud55c \uc778\ud130\ud398\uc774\uc2a4 \uc81c\uacf5\n", | |
| "- pandas\ub294 PyTable\ub97c \uc774\uc6a9\ud55c HDFStore\ub77c\ub294 \uac00\ubcbc\uc6b4 \uc0ac\uc804 \ud074\ub798\uc2a4\ub97c \ud1b5\ud574 pandas \uac1d\uccb4\ub97c \uc800\uc7a5" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "store = pd.HDFStore('mydata.h5')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "ename": "Exception", | |
| "evalue": "HDFStore requires PyTables", | |
| "output_type": "pyerr", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mException\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-137-35f4287dfd8f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mstore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mHDFStore\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'mydata.h5'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
| "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/io/pytables.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, path, mode, complevel, complib, fletcher32, **kwargs)\u001b[0m\n\u001b[1;32m 343\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtables\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 344\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mImportError\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# pragma: no cover\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 345\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'HDFStore requires PyTables'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 346\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 347\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_path\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;31mException\u001b[0m: HDFStore requires PyTables" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 137 | |
| }, | |
| { | |
| "cell_type": "raw", | |
| "metadata": {}, | |
| "source": [ | |
| "\n", | |
| "---------------------------------------------------------------------------\n", | |
| "Exception Traceback (most recent call last)\n", | |
| "<ipython-input-135-35f4287dfd8f> in <module>()\n", | |
| "----> 1 store = pd.HDFStore('mydata.h5')\n", | |
| "\n", | |
| "/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/io/pytables.pyc in __init__(self, path, mode, complevel, complib, fletcher32, **kwargs)\n", | |
| " 343 import tables as _\n", | |
| " 344 except ImportError: # pragma: no cover\n", | |
| "--> 345 raise Exception('HDFStore requires PyTables')\n", | |
| " 346 \n", | |
| " 347 self._path = path\n", | |
| "\n", | |
| "Exception: HDFStore requires PyTables" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### HDFStore\ub97c \uc0ac\uc6a9\ud558\uae30 \uc704\ud574\uc11c\ub294 PyTables \ub77c\uc774\ube0c\ub7ec\ub9ac\ub97c \uc124\uce58\ud574\uc57c \ud55c\ub2e4.\n", | |
| "\n", | |
| "- \uadf8\ub0e5 \ud14c\uc2a4\ud2b8\uae30 \ub54c\ubb38\uc5d0 pass \ud55c\ub2e4.\n", | |
| "- \uc774\ub7f0\uac8c \uc788\ub2e4\ub294 \uac83\ub9cc indexing \ud574\ub450\uace0 \ub098\uc911\uc5d0 \ud544\uc694\ud558\uba74 \ucc3e\uc544\ubcf4\uc790!" | |
| ] | |
| }, | |
| { | |
| "cell_type": "raw", | |
| "metadata": {}, | |
| "source": [ | |
| "# \ub77c\uc774\ube0c\ub7ec\ub9ac \uc124\uce58\ud574\ubcf4\uace0 \ud14c\uc2a4\ud2b8 \ud574\ubcf4\ub77c.\n", | |
| "\n", | |
| "store = pd.HDFStore('mydata.h5')\n", | |
| "store['obj1'] = frame\n", | |
| "store['obj1_col'] = frame['a']\n", | |
| "store\n", | |
| "store[obj1']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### \ub370\uc774\ud130 \ubd84\uc11d \ubb38\uc81c\n", | |
| "\n", | |
| "- \ub300\ubd80\ubd84 CPU\ubcf4\ub2e4\ub294 IO \uc131\ub2a5\uc5d0 \uc758\uc874\uc801\n", | |
| "- HDF5\ub294 \ub370\uc774\ud130\ubca0\uc774\uc2a4\uac00 \uc544\ub2c8\ub2e4. HDF5\ub294 \ud55c \ubc88\ub9cc \uae30\ub85d\ud558\uace0 \uc5ec\ub7ec \ubc88 \uc790\uc8fc \uc77d\uc5b4\uc57c \ud558\ub294 \ub370\uc774\ud130\uc5d0 \ucd5c\uc801\ud654\ub418\uc5b4 \uc788\ub2e4. \ub370\uc774\ud130\ub294 \uc544\ubb34\ub54c\ub098 \ud30c\uc77c\uc5d0 \ucd94\uac00\ud560 \uc218 \uc788\uc9c0\ub9cc \ub9cc\uc57d \uc5ec\ub7ec \uacf3\uc5d0\uc11c \ub3d9\uc2dc\uc5d0 \ud30c\uc77c\uc744 \uc4f4\ub2e4\uba74 \ud30c\uc77c\uc774 \uae68\uc9c0\ub294 \ubb38\uc81c\uac00 \ubc1c\uc0dd\ud560 \uc218 \uc788\ub2e4." | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### 6.2.2 \ub9c8\uc774\ud06c\ub85c\uc18c\ud504\ud2b8 \uc5d1\uc140 \ud30c\uc77c\uc5d0\uc11c \ub370\uc774\ud130 \uc77d\uc5b4\uc624\uae30\n", | |
| "\n", | |
| "- pandas\ub294 ExcelFile \ud074\ub798\uc2a4\ub97c \ud1b5\ud574 \ub9c8\uc774\ud06c\ub85c\uc18c\ud504\ud2b8 \uc5d1\uc140 2003 \uc774\ud6c4 \ubc84\uc804\uc758 \ub370\uc774\ud130\ub97c \uc77d\uae30 \uac00\ub2a5\n", | |
| "- \ub0b4\ubd80\uc801\uc73c\ub85c ExcelFile \ud074\ub798\uc2a4\ub294 xlrd, openpyxl \ud328\ud0a4\uc9c0 \ud65c\uc6a9. \uc0ac\uc6a9\ud558\uae30 \uc804\uc5d0 \uba3c\uc800 \uc124\uce58" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "xls_file = pd.ExcelFile('data.xls')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "ename": "ImportError", | |
| "evalue": "No module named xlrd", | |
| "output_type": "pyerr", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-138-42764eb0b1ec>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mxls_file\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mExcelFile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'data.xls'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
| "\u001b[0;32m/Library/Python/2.7/site-packages/pandas-0.12.0_307_g3a2fe0b-py2.7-macosx-10.8-intel.egg/pandas/io/excel.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, path_or_buf, **kwds)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpath_or_buf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 73\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mxlrd\u001b[0m \u001b[0;31m# throw an ImportError if we need to\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 74\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0mver\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxlrd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__VERSION__\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\".\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;31mImportError\u001b[0m: No module named xlrd" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 138 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "table = xls_file.parse('Sheet1')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "ename": "NameError", | |
| "evalue": "name 'xls_file' is not defined", | |
| "output_type": "pyerr", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-139-b85a48af3b10>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxls_file\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Sheet1'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
| "\u001b[0;31mNameError\u001b[0m: name 'xls_file' is not defined" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 139 | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 2, | |
| "metadata": {}, | |
| "source": [ | |
| "6.3 HTML, \uc6f9 API\uc640 \ud568\uaf10 \uc0ac\uc6a9\ud558\uae30" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### [Requests: HTTP for Humans](http://docs.python-requests.org/en/latest/#)\n", | |
| "\n", | |
| "- urllib2\ubcf4\ub2e4 \ub354 \uac04\ud3b8\n", | |
| "- [similar code, without Requests.](https://gist.github.com/973705)\n", | |
| "\n", | |
| "### \ud2b8\uc704\ud130 \ubd84\uc11d \ubb38\uc81c \ubc1c\uc0dd\n", | |
| "\n", | |
| "- \ud2b8\uc704\ud130\ub294 \ucc98\uc74c\uc5d0 \uc544\ubb34\ub7f0 \uc778\uc99d\uc5c6\uc774 API\ub97c \uc81c\uacf5\ud558\ub2e4 \ub9dd \uacfc\ubd80\ud558\uac00 \ubc1c\uc0dd\ud558\uc790 OAuth \uc778\uc99d \ubc29\uc2dd\uc73c\ub85c \ubcc0\uacbd\n", | |
| "- [OAuth2 - API \uc778\uc99d\uc744 \uc704\ud55c \ub9cc\ub2a5\ub3c4\uad6c\uc0c1\uc790](http://www.slideshare.net/tebica/oauth2-api)\n", | |
| "- [Twitter API](https://dev.twitter.com/docs/auth/sign-twitter)\n", | |
| "- \uc9c0\uae08\uc740 \uc778\uc99d \ubb38\uc81c \ub54c\ubb38\uc5d0 pass \ud558\uaca0\uc74c" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "import requests" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 140 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "url = 'http://search.twitter.com/search.json?q=python%20pandas'" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 141 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "resp = requests.get(url)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 142 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "resp" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 143, | |
| "text": [ | |
| "<Response [401]>" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 143 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "resp.text" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 144, | |
| "text": [ | |
| "u'{\"errors\":[{\"message\":\"The Twitter REST API v1 is no longer active. Please migrate to API v1.1. https://dev.twitter.com/docs/api/1.1/overview.\",\"code\":64}]}'" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 144 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### \uc870\uae08\ub9cc \uc218\uace0\ub97c \ud558\uba74 \ud3c9\ubc94\ud55c \uc6f9 API\ub97c \uc704\ud55c \uace0\uc218\uc900\uc758 \uc778\ud130\ud398\uc774\uc2a4\ub97c \ub9cc\ub4e4\uc5b4\uc11c DataFrame\uc5d0 \uc800\uc7a5\ud558\uace0 \uc27d\uac8c \ubd84\uc11d \uc791\uc5c5 \uc218\ud589 \uac00\ub2a5" | |
| ] | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 2, | |
| "metadata": {}, | |
| "source": [ | |
| "6.4 \ub370\uc774\ud130\ubca0\uc774\uc2a4\uc640 \ud568\uaed8 \uc0ac\uc6a9\ud558\uae30" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "- \ub300\ubd80\ubd84\uc758 \uc560\ud50c\ub9ac\ucf00\uc774\uc158\uc740 \ud14d\uc2a4\ud2b8 \ud30c\uc77c\uc5d0\uc11c \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc624\uc9c0 \uc54a\uc74c\n", | |
| "- \uc65c\ub0d0\ud558\uba74 \ub300\uc6a9\ub7c9\uc758 \ub370\uc774\ud130\ub97c \uc800\uc7a5\ud558\uae30\uc5d0 \ud14d\uc2a4\ud2b8 \ud30c\uc77c\uc740 \uc0c1\ub2f9\ud788 \ube44\ud6a8\uc728\uc801\n", | |
| "- SQL \uae30\ubc18\uc758 \uad00\uacc4\ud615 \ub370\uc774\ud130 \ubca0\uc774\uc2a4\uac00 \ub9ce\uc774 \uc0ac\uc6a9\ub428. MySql \uac19\uc740\n", | |
| "- \ucd5c\uadfc \uc720\uba85\ud574\uc9c4 NoSQL\uc774\ub77c \ubd88\ub9ac\ub294 \ube44 SQL \uae30\ubc18\uc758 \ub370\uc774\ud130\ubca0\uc774\uc2a4\ub3c4 \ub9ce\uc774 \uc0ac\uc6a9\ub428\n", | |
| "- SQL vs NoSQL\uc740 \uc11c\ub85c \uac01\uac01\uc758 \uc7a5\uc810\uc744 \ud30c\uc545\ud558\uace0 \uc790\uc2e0\uc758 \uc5c5\ubb34\uc5d0 \ub9de\ub294 DB\ub97c \uc120\ud0dd\n", | |
| "- SQL\uc5d0\uc11c \ub370\uc774\ud130\ub97c \uc77d\uc5b4\uc640\uc11c DataFrame\uc5d0 \uc800\uc7a5\ud558\ub294 \ubc29\ubc95\uc740 \uaf64 \uc9c1\uad00\uc801" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "import sqlite3\n", | |
| "\n", | |
| "query = \"\"\"\n", | |
| "CREATE TABLE test\n", | |
| "(a VARCHAR(20), b VARCHAR(20),\n", | |
| "c REAL, d INTEGER\n", | |
| ");\"\"\"\n", | |
| "\n", | |
| "con = sqlite3.connect(':memory:')\n", | |
| "con.execute(query)\n", | |
| "con.commit()" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 145 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "data = [('Atlanta', 'Georgia', 1.25, 6),\n", | |
| " ('Tallahassee', 'Florida', 2.6, 3),\n", | |
| " ('Sacramento', 'California', 1.7, 5)]\n", | |
| "\n", | |
| "stmt = \"INSERT INTO test VALUES(?, ?, ?, ?)\"\n", | |
| "\n", | |
| "con.executemany(stmt, data)\n", | |
| "con.commit()" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 146 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "\ub300\ubd80\ubd84\uc758 \ud30c\uc774\uc36c SQL \ub4dc\ub77c\uc774\ubc84(PyODBC, psycopg2, MySQLdb, pymssql \ub4f1)\ub294 \ud14c\uc774\ube14\uc5d0 \ub300\ud574 select \ucffc\ub9ac\ub97c \uc218\ud589\ud558\uba74 \ud29c\ud50c \ub9ac\uc2a4\ud2b8\ub97c \ubc18\ud658\ud55c\ub2e4" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "cursor = con.execute('select * from test')" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 147 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "rows = cursor.fetchall()" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 148 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "rows" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 149, | |
| "text": [ | |
| "[(u'Atlanta', u'Georgia', 1.25, 6),\n", | |
| " (u'Tallahassee', u'Florida', 2.6, 3),\n", | |
| " (u'Sacramento', u'California', 1.7, 5)]" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 149 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "\ubc18\ud658\ub41c \ud29c\ud50c \ub9ac\uc2a4\ud2b8\ub97c DataFrame \uc0dd\uc131\uc790\uc5d0 \ubc14\ub85c \uc804\ub2ec\ud574\ub3c4 \ub418\uc9c0\ub9cc \uce7c\ub7fc\uc758 \uc774\ub984\uc744 \uc9c0\uc815\ud574\uc8fc\uba74 \ub354 \ud3b8\ud558\ub2e4. cursor\uc758 description \uc18d\uc131\uc744 \ud65c\uc6a9\ud558\uc790." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "cursor.description" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 150, | |
| "text": [ | |
| "(('a', None, None, None, None, None, None),\n", | |
| " ('b', None, None, None, None, None, None),\n", | |
| " ('c', None, None, None, None, None, None),\n", | |
| " ('d', None, None, None, None, None, None))" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 150 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "DataFrame(rows, columns=zip(*cursor.description)[0])" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " <th>c</th>\n", | |
| " <th>d</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> Atlanta</td>\n", | |
| " <td> Georgia</td>\n", | |
| " <td> 1.25</td>\n", | |
| " <td> 6</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> Tallahassee</td>\n", | |
| " <td> Florida</td>\n", | |
| " <td> 2.60</td>\n", | |
| " <td> 3</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> Sacramento</td>\n", | |
| " <td> California</td>\n", | |
| " <td> 1.70</td>\n", | |
| " <td> 5</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 151, | |
| "text": [ | |
| " a b c d\n", | |
| "0 Atlanta Georgia 1.25 6\n", | |
| "1 Tallahassee Florida 2.60 3\n", | |
| "2 Sacramento California 1.70 5" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 151 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "- \ub370\uc774\ud130\ubca0\uc774\uc2a4\uc5d0 \ucffc\ub9ac\ub97c \ubcf4\ub0b4\ub824\uace0 \ub9e4\ubc88 \uc774\ub807\uac8c \ud558\ub294\uac74 \ub108\ubb34 \uadc0\ucc2e\uc74c\n", | |
| "- pandas.io.sql \ubaa8\ub4c8\uc758 read_frame \ud568\uc218\ub97c \uc774\uc6a9\ud558\uba74 \uac04\ud3b8\ud558\uac8c \ud574\uacb0\n", | |
| "- \uadf8\ub0e5 select \ucffc\ub9ac\ubb38\uacfc \ub370\uc774\ud130 \ubca0\uc774\uc2a4 \uc5f0\uacb0 \uac1d\uccb4(con)\ub9cc \ub118\uae30\uba74 \ub41c\ub2e4" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "import pandas.io.sql as sql" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 152 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "sql.read_frame('select * from test', con)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>b</th>\n", | |
| " <th>c</th>\n", | |
| " <th>d</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> Atlanta</td>\n", | |
| " <td> Georgia</td>\n", | |
| " <td> 1.25</td>\n", | |
| " <td> 6</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> Tallahassee</td>\n", | |
| " <td> Florida</td>\n", | |
| " <td> 2.60</td>\n", | |
| " <td> 3</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> Sacramento</td>\n", | |
| " <td> California</td>\n", | |
| " <td> 1.70</td>\n", | |
| " <td> 5</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 153, | |
| "text": [ | |
| " a b c d\n", | |
| "0 Atlanta Georgia 1.25 6\n", | |
| "1 Tallahassee Florida 2.60 3\n", | |
| "2 Sacramento California 1.70 5" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 153 | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 3, | |
| "metadata": {}, | |
| "source": [ | |
| "6.4.1 MongoDB\uc5d0 \ub370\uc774\ud130 \uc800\uc7a5\ud558\uace0 \ubd88\ub7ec\uc624\uae30" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "- NoSQL \ub370\uc774\ud130\ubca0\uc774\uc2a4\ub294 \ub9e4\uc6b0 \ub2e4\uc591\ud55c \ud615\ud0dc\n", | |
| "- \ubc84\ud074\ub9acDB\ub098 \ub3c4\ucfc4\uce90\ube44\ub2db \uac19\uc740 \uac83\uc740 \uc0ac\uc804\ucc98\ub7fc \ud0a4-\uac12\uc744 \uc800\uc7a5\ud558\uae30\ub3c4 \ud558\uace0\n", | |
| "- \ub610 \ub2e4\ub978 \uac83\uc740 \uae30\ubcf8 \uc800\uc7a5\uc18c\ub294 \uc0ac\uc804 \uac19\uc740 \uac1d\uccb4\ub97c \uc0ac\uc6a9\ud558\uba70 \ubb38\uc11c \uae30\ubc18\uc73c\ub85c \ub370\uc774\ud130\ub97c \uc800\uc7a5\ud558\uae30\ub3c4 \ud55c\ub2e4.\n", | |
| "- \uc774 \ucc45\uc5d0\uc11c\ub294 [MongoDB](http://mongodb.org)\ub97c \uc608\uc81c\ub85c \uc120\ud0dd\n", | |
| "- MongoDB \uc11c\ubc84\ub97c \ub85c\uceec\uc5d0 \uc124\uce58\ud558\uace0 \uacf5\uc2dd \ub4dc\ub77c\uc774\ubc84\uc778 pymongo\ub97c \uc0ac\uc6a9\ud574\uc11c \uae30\ubcf8 \ud3ec\ud2b8\ub85c \ubc88\ud638\ub85c \uc5f0\uacb0\n", | |
| "- \ud604\uc7ac \ud544\uc790\uc758 \ucef4\ud4e8\ud130\uc5d0\ub294 \uc544\uc9c1 \uc124\uce58\ud558\uc9c0 \uc54a\uc74c. \uc774\ub7f0 \ud615\uc2dd\uc73c\ub85c \ud55c\ub2e4\ub294 \ub290\ub08c\ub9cc \uac00\uc9c0\uc790" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [] | |
| } | |
| ], | |
| "metadata": {} | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment