Skip to content

Instantly share code, notes, and snippets.

@RobinL
Created July 6, 2014 07:46
Show Gist options
  • Save RobinL/107f6946398655d83d12 to your computer and use it in GitHub Desktop.
Save RobinL/107f6946398655d83d12 to your computer and use it in GitHub Desktop.
First steps
{
"metadata": {
"name": "",
"signature": "sha256:3672de2a5757dfb0cea1ce29671ac75e8dcfcb360e88722112d3150498eb386d"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd\n",
"\n",
"#Usually you could do the following on a csv file to read it into Python, but yours doesn't meet the spec so the import errors\n",
"#pd.read_csv(r\"C:\\Users\\Robin\\Desktop\\greg\\out100_geom.csv\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 41
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"text_file = open(r\"C:\\Users\\Robin\\Desktop\\greg\\out100_geom.csv\",\"r\")\n",
"lines = text_file.read().split(\"\\n\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 17
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"newlines = []\n",
"for line in lines:\n",
" line = line.replace(\" \", \"\")\n",
" newlines.append(line.split(\",\"))\n",
" "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 36
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df = pd.DataFrame(newlines)\n",
"df[:20] #This just displays the data"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0 </th>\n",
" <td> 0.1154000000000000E-02</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1 </th>\n",
" <td> 0.3750000000000000E-02</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2 </th>\n",
" <td> 0.3000000000000000E-03</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3 </th>\n",
" <td> 0.6928203230275509E-04</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4 </th>\n",
" <td> 0.1128735660559901E-03</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5 </th>\n",
" <td> 0.1551734123505818E-03</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6 </th>\n",
" <td> 294</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7 </th>\n",
" <td> 654</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8 </th>\n",
" <td> 948</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9 </th>\n",
" <td> -0.1507631069447828E-04</td>\n",
" <td> -0.5000301924910927E-04</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td> -0.1500194316020339E-04</td>\n",
" <td> -0.5370053071584762E-04</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td> -0.1749409844757973E-04</td>\n",
" <td> -0.5386785305567992E-04</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td> -0.2055076404940263E-04</td>\n",
" <td> -0.5254495197646113E-04</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td> -0.2133109769736448E-04</td>\n",
" <td> -0.5102141927611776E-04</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td> -0.1739899657348860E-04</td>\n",
" <td> -0.4875728873032857E-04</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td> -0.9184659004444475E-05</td>\n",
" <td> -0.4975817923201625E-04</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td> -0.9141899013156223E-05</td>\n",
" <td> -0.5454013784688450E-04</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td> -0.1186917140177538E-04</td>\n",
" <td> -0.5482135443098251E-04</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td> -0.1187897631635176E-04</td>\n",
" <td> -0.4828975990224411E-04</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td> -0.3085468329002458E-05</td>\n",
" <td> -0.4965511906208863E-04</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" <td> None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>20 rows \u00d7 7 columns</p>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 48,
"text": [
" 0 1 2 3 4 5 \\\n",
"0 0.1154000000000000E-02 None None None None None \n",
"1 0.3750000000000000E-02 None None None None None \n",
"2 0.3000000000000000E-03 None None None None None \n",
"3 0.6928203230275509E-04 None None None None None \n",
"4 0.1128735660559901E-03 None None None None None \n",
"5 0.1551734123505818E-03 None None None None None \n",
"6 294 None None None None None \n",
"7 654 None None None None None \n",
"8 948 None None None None None \n",
"9 -0.1507631069447828E-04 -0.5000301924910927E-04 None None None None \n",
"10 -0.1500194316020339E-04 -0.5370053071584762E-04 None None None None \n",
"11 -0.1749409844757973E-04 -0.5386785305567992E-04 None None None None \n",
"12 -0.2055076404940263E-04 -0.5254495197646113E-04 None None None None \n",
"13 -0.2133109769736448E-04 -0.5102141927611776E-04 None None None None \n",
"14 -0.1739899657348860E-04 -0.4875728873032857E-04 None None None None \n",
"15 -0.9184659004444475E-05 -0.4975817923201625E-04 None None None None \n",
"16 -0.9141899013156223E-05 -0.5454013784688450E-04 None None None None \n",
"17 -0.1186917140177538E-04 -0.5482135443098251E-04 None None None None \n",
"18 -0.1187897631635176E-04 -0.4828975990224411E-04 None None None None \n",
"19 -0.3085468329002458E-05 -0.4965511906208863E-04 None None None None \n",
"\n",
" 6 \n",
"0 None \n",
"1 None \n",
"2 None \n",
"3 None \n",
"4 None \n",
"5 None \n",
"6 None \n",
"7 None \n",
"8 None \n",
"9 None \n",
"10 None \n",
"11 None \n",
"12 None \n",
"13 None \n",
"14 None \n",
"15 None \n",
"16 None \n",
"17 None \n",
"18 None \n",
"19 None \n",
"\n",
"[20 rows x 7 columns]"
]
}
],
"prompt_number": 48
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#Now we can manipulate the dataframe to keep only columns where there is data in cols 0 and 1, and no where else:\n",
"\n",
"#Keep records only if col 2 is blank\n",
"df2 = df[pd.isnull(df[2]) == True]\n",
"\n",
"#Keep records only if col1 is not blank\n",
"df3 = df2[pd.isnull(df2[1]) == False]\n",
"\n",
"#Drop the extraneous columns (we only needed them to know which rows to filter out)\n",
"df4 = df3.drop([2,3,4,5,6], axis=1)\n",
"\n",
"df4.head() #Display the first 10 records"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9 </th>\n",
" <td> -0.1507631069447828E-04</td>\n",
" <td> -0.5000301924910927E-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td> -0.1500194316020339E-04</td>\n",
" <td> -0.5370053071584762E-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td> -0.1749409844757973E-04</td>\n",
" <td> -0.5386785305567992E-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td> -0.2055076404940263E-04</td>\n",
" <td> -0.5254495197646113E-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td> -0.2133109769736448E-04</td>\n",
" <td> -0.5102141927611776E-04</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows \u00d7 2 columns</p>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 72,
"text": [
" 0 1\n",
"9 -0.1507631069447828E-04 -0.5000301924910927E-04\n",
"10 -0.1500194316020339E-04 -0.5370053071584762E-04\n",
"11 -0.1749409844757973E-04 -0.5386785305567992E-04\n",
"12 -0.2055076404940263E-04 -0.5254495197646113E-04\n",
"13 -0.2133109769736448E-04 -0.5102141927611776E-04\n",
"\n",
"[5 rows x 2 columns]"
]
}
],
"prompt_number": 72
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment