Created
March 9, 2017 21:45
-
-
Save iangow/4d8a2a9f0488d8093a2d08cb74d73eb9 to your computer and use it in GitHub Desktop.
Code to get data from PostgreSQL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 157, | |
"metadata": { | |
"collapsed": false, | |
"deletable": true, | |
"editable": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from sqlalchemy import MetaData, create_engine, Table\n", | |
"from sqlalchemy.sql import func\n", | |
"\n", | |
"metadata = MetaData()\n", | |
"engine = create_engine('postgresql://iangow.me/crsp')\n", | |
" \n", | |
"photo_matches = Table('photo_matches', metadata, autoload = True, autoload_with=engine, schema=\"director_photo\")\n", | |
"executive = Table('executive', metadata, autoload = True, autoload_with=engine, schema=\"executive\")\n", | |
"proxy_board_director = Table('proxy_board_director', metadata, autoload = True, \\\n", | |
" autoload_with=engine, schema=\"executive\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 158, | |
"metadata": { | |
"collapsed": false, | |
"deletable": true, | |
"editable": true | |
}, | |
"outputs": [], | |
"source": [ | |
"dirs = executive.join(proxy_board_director, executive.c.executive_id == proxy_board_director.c.executive_id)\n", | |
"pm = dirs.outerjoin(photo_matches, executive.c.executive_id == photo_matches.c.executive_id)\n", | |
" \n", | |
"edgar = select([executive.c.executive_id, \n", | |
" func.coalesce(func.bool_or(photo_matches.c.local_path != ''), False).\\\n", | |
" label('has_edgar_match')]).\\\n", | |
" where(executive.c.executive_id > 0).\\\n", | |
" group_by(executive.c.executive_id).\\\n", | |
" select_from(pm).\\\n", | |
" alias('edgar')\n", | |
"# pd.read_sql(edgar, engine)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 159, | |
"metadata": { | |
"collapsed": false, | |
"deletable": true, | |
"editable": true | |
}, | |
"outputs": [], | |
"source": [ | |
"equilar = select([executive.c.executive_id, executive.c.large_photo_path,\n", | |
" executive.c.lname , executive.c.fname, executive.c.middle_name,\n", | |
" executive.c.name_suffix]).\\\n", | |
" where(executive.c.large_photo_path.isnot(None)).\\\n", | |
" select_from(dirs).\\\n", | |
" alias('equilar')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 160, | |
"metadata": { | |
"collapsed": false, | |
"deletable": true, | |
"editable": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>executive_id</th>\n", | |
" <th>large_photo_path</th>\n", | |
" <th>lname</th>\n", | |
" <th>fname</th>\n", | |
" <th>middle_name</th>\n", | |
" <th>name_suffix</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>914060</td>\n", | |
" <td>914000/914060.jpg</td>\n", | |
" <td>Lockhart</td>\n", | |
" <td>H. Eugene</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>86153</td>\n", | |
" <td>86000/86153.jpg</td>\n", | |
" <td>Leighton</td>\n", | |
" <td>Allan</td>\n", | |
" <td>L.</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>706301</td>\n", | |
" <td>706000/706301.jpg</td>\n", | |
" <td>Adolph</td>\n", | |
" <td>Gerald</td>\n", | |
" <td>S.</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>970115</td>\n", | |
" <td>673000/673818.jpg</td>\n", | |
" <td>West</td>\n", | |
" <td>David</td>\n", | |
" <td>J.</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>594560</td>\n", | |
" <td>594000/594560.jpg</td>\n", | |
" <td>Townsend</td>\n", | |
" <td>John</td>\n", | |
" <td>L.</td>\n", | |
" <td>III</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>775162</td>\n", | |
" <td>775000/775162.jpg</td>\n", | |
" <td>Biggins</td>\n", | |
" <td>J. Veronica</td>\n", | |
" <td>None</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>200080</td>\n", | |
" <td>200000/200080.jpg</td>\n", | |
" <td>Palmer</td>\n", | |
" <td>Anthony</td>\n", | |
" <td>J.</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>706301</td>\n", | |
" <td>706000/706301.jpg</td>\n", | |
" <td>Adolph</td>\n", | |
" <td>Gerald</td>\n", | |
" <td>S.</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>13179</td>\n", | |
" <td>13000/13179.jpg</td>\n", | |
" <td>Jackson</td>\n", | |
" <td>Lawrence</td>\n", | |
" <td>V.</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>58866</td>\n", | |
" <td>58000/58866.jpg</td>\n", | |
" <td>Verma</td>\n", | |
" <td>Inder</td>\n", | |
" <td>M.</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" executive_id large_photo_path lname fname middle_name \\\n", | |
"0 914060 914000/914060.jpg Lockhart H. Eugene None \n", | |
"1 86153 86000/86153.jpg Leighton Allan L. \n", | |
"2 706301 706000/706301.jpg Adolph Gerald S. \n", | |
"3 970115 673000/673818.jpg West David J. \n", | |
"4 594560 594000/594560.jpg Townsend John L. \n", | |
"5 775162 775000/775162.jpg Biggins J. Veronica None \n", | |
"6 200080 200000/200080.jpg Palmer Anthony J. \n", | |
"7 706301 706000/706301.jpg Adolph Gerald S. \n", | |
"8 13179 13000/13179.jpg Jackson Lawrence V. \n", | |
"9 58866 58000/58866.jpg Verma Inder M. \n", | |
"\n", | |
" name_suffix \n", | |
"0 None \n", | |
"1 None \n", | |
"2 None \n", | |
"3 None \n", | |
"4 III \n", | |
"5 None \n", | |
"6 None \n", | |
"7 None \n", | |
"8 None \n", | |
"9 None " | |
] | |
}, | |
"execution_count": 160, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"merged = equilar.join(edgar, equilar.c.executive_id == edgar.c.executive_id)\n", | |
"sel = select([equilar])\n", | |
"sel = sel.limit(10) # Comment out this line to get everything.\n", | |
"sel = sel.where(edgar.c.has_edgar_match == False).select_from(merged)\n", | |
"df = pd.read_sql(sel, engine)\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true, | |
"deletable": true, | |
"editable": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment