Created
July 6, 2014 07:46
-
-
Save RobinL/107f6946398655d83d12 to your computer and use it in GitHub Desktop.
First steps
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:3672de2a5757dfb0cea1ce29671ac75e8dcfcb360e88722112d3150498eb386d" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import pandas as pd\n", | |
"\n", | |
"#Usually you could do the following on a csv file to read it into Python, but yours doesn't meet the spec so the import errors\n", | |
"#pd.read_csv(r\"C:\\Users\\Robin\\Desktop\\greg\\out100_geom.csv\")" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 41 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"text_file = open(r\"C:\\Users\\Robin\\Desktop\\greg\\out100_geom.csv\",\"r\")\n", | |
"lines = text_file.read().split(\"\\n\")" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 17 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"newlines = []\n", | |
"for line in lines:\n", | |
" line = line.replace(\" \", \"\")\n", | |
" newlines.append(line.split(\",\"))\n", | |
" " | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 36 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"df = pd.DataFrame(newlines)\n", | |
"df[:20] #This just displays the data" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" <th>2</th>\n", | |
" <th>3</th>\n", | |
" <th>4</th>\n", | |
" <th>5</th>\n", | |
" <th>6</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0 </th>\n", | |
" <td> 0.1154000000000000E-02</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1 </th>\n", | |
" <td> 0.3750000000000000E-02</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2 </th>\n", | |
" <td> 0.3000000000000000E-03</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3 </th>\n", | |
" <td> 0.6928203230275509E-04</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4 </th>\n", | |
" <td> 0.1128735660559901E-03</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5 </th>\n", | |
" <td> 0.1551734123505818E-03</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6 </th>\n", | |
" <td> 294</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7 </th>\n", | |
" <td> 654</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8 </th>\n", | |
" <td> 948</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9 </th>\n", | |
" <td> -0.1507631069447828E-04</td>\n", | |
" <td> -0.5000301924910927E-04</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td> -0.1500194316020339E-04</td>\n", | |
" <td> -0.5370053071584762E-04</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td> -0.1749409844757973E-04</td>\n", | |
" <td> -0.5386785305567992E-04</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td> -0.2055076404940263E-04</td>\n", | |
" <td> -0.5254495197646113E-04</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td> -0.2133109769736448E-04</td>\n", | |
" <td> -0.5102141927611776E-04</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td> -0.1739899657348860E-04</td>\n", | |
" <td> -0.4875728873032857E-04</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td> -0.9184659004444475E-05</td>\n", | |
" <td> -0.4975817923201625E-04</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td> -0.9141899013156223E-05</td>\n", | |
" <td> -0.5454013784688450E-04</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td> -0.1186917140177538E-04</td>\n", | |
" <td> -0.5482135443098251E-04</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td> -0.1187897631635176E-04</td>\n", | |
" <td> -0.4828975990224411E-04</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td> -0.3085468329002458E-05</td>\n", | |
" <td> -0.4965511906208863E-04</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" <td> None</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>20 rows \u00d7 7 columns</p>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 48, | |
"text": [ | |
" 0 1 2 3 4 5 \\\n", | |
"0 0.1154000000000000E-02 None None None None None \n", | |
"1 0.3750000000000000E-02 None None None None None \n", | |
"2 0.3000000000000000E-03 None None None None None \n", | |
"3 0.6928203230275509E-04 None None None None None \n", | |
"4 0.1128735660559901E-03 None None None None None \n", | |
"5 0.1551734123505818E-03 None None None None None \n", | |
"6 294 None None None None None \n", | |
"7 654 None None None None None \n", | |
"8 948 None None None None None \n", | |
"9 -0.1507631069447828E-04 -0.5000301924910927E-04 None None None None \n", | |
"10 -0.1500194316020339E-04 -0.5370053071584762E-04 None None None None \n", | |
"11 -0.1749409844757973E-04 -0.5386785305567992E-04 None None None None \n", | |
"12 -0.2055076404940263E-04 -0.5254495197646113E-04 None None None None \n", | |
"13 -0.2133109769736448E-04 -0.5102141927611776E-04 None None None None \n", | |
"14 -0.1739899657348860E-04 -0.4875728873032857E-04 None None None None \n", | |
"15 -0.9184659004444475E-05 -0.4975817923201625E-04 None None None None \n", | |
"16 -0.9141899013156223E-05 -0.5454013784688450E-04 None None None None \n", | |
"17 -0.1186917140177538E-04 -0.5482135443098251E-04 None None None None \n", | |
"18 -0.1187897631635176E-04 -0.4828975990224411E-04 None None None None \n", | |
"19 -0.3085468329002458E-05 -0.4965511906208863E-04 None None None None \n", | |
"\n", | |
" 6 \n", | |
"0 None \n", | |
"1 None \n", | |
"2 None \n", | |
"3 None \n", | |
"4 None \n", | |
"5 None \n", | |
"6 None \n", | |
"7 None \n", | |
"8 None \n", | |
"9 None \n", | |
"10 None \n", | |
"11 None \n", | |
"12 None \n", | |
"13 None \n", | |
"14 None \n", | |
"15 None \n", | |
"16 None \n", | |
"17 None \n", | |
"18 None \n", | |
"19 None \n", | |
"\n", | |
"[20 rows x 7 columns]" | |
] | |
} | |
], | |
"prompt_number": 48 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#Now we can manipulate the dataframe to keep only columns where there is data in cols 0 and 1, and no where else:\n", | |
"\n", | |
"#Keep records only if col 2 is blank\n", | |
"df2 = df[pd.isnull(df[2]) == True]\n", | |
"\n", | |
"#Keep records only if col1 is not blank\n", | |
"df3 = df2[pd.isnull(df2[1]) == False]\n", | |
"\n", | |
"#Drop the extraneous columns (we only needed them to know which rows to filter out)\n", | |
"df4 = df3.drop([2,3,4,5,6], axis=1)\n", | |
"\n", | |
"df4.head() #Display the first 10 records" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>0</th>\n", | |
" <th>1</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>9 </th>\n", | |
" <td> -0.1507631069447828E-04</td>\n", | |
" <td> -0.5000301924910927E-04</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td> -0.1500194316020339E-04</td>\n", | |
" <td> -0.5370053071584762E-04</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td> -0.1749409844757973E-04</td>\n", | |
" <td> -0.5386785305567992E-04</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td> -0.2055076404940263E-04</td>\n", | |
" <td> -0.5254495197646113E-04</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td> -0.2133109769736448E-04</td>\n", | |
" <td> -0.5102141927611776E-04</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows \u00d7 2 columns</p>\n", | |
"</div>" | |
], | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 72, | |
"text": [ | |
" 0 1\n", | |
"9 -0.1507631069447828E-04 -0.5000301924910927E-04\n", | |
"10 -0.1500194316020339E-04 -0.5370053071584762E-04\n", | |
"11 -0.1749409844757973E-04 -0.5386785305567992E-04\n", | |
"12 -0.2055076404940263E-04 -0.5254495197646113E-04\n", | |
"13 -0.2133109769736448E-04 -0.5102141927611776E-04\n", | |
"\n", | |
"[5 rows x 2 columns]" | |
] | |
} | |
], | |
"prompt_number": 72 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment