Skip to content

Instantly share code, notes, and snippets.

@iangow
Created April 25, 2017 14:45
Show Gist options
  • Save iangow/f2efdeab2288a2e51a4a455c427e16d3 to your computer and use it in GitHub Desktop.
Save iangow/f2efdeab2288a2e51a4a455c427e16d3 to your computer and use it in GitHub Desktop.
Code to illustrate getting data from WRDS to Melbourne.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# TODO: Get DB connection information from environment\n",
"# variables with sensible defaults\n",
"from sqlalchemy import create_engine\n",
"engine = create_engine(\"postgresql://10.101.14.248/data\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 loop, best of 1: 4min 57s per loop\n"
]
}
],
"source": [
"schema = \"crsp\"\n",
"table_name = \"msf\"\n",
"wrds_id = \"iangow\"\n",
"from wrds_fetch import wrds_to_pg\n",
"%timeit -n1 -r1 wrds_to_pg(schema, table_name, engine, wrds_id, fix_missing=True)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>cusip</th>\n",
" <th>permno</th>\n",
" <th>permco</th>\n",
" <th>issuno</th>\n",
" <th>hexcd</th>\n",
" <th>hsiccd</th>\n",
" <th>date</th>\n",
" <th>bidlo</th>\n",
" <th>askhi</th>\n",
" <th>prc</th>\n",
" <th>...</th>\n",
" <th>ret</th>\n",
" <th>bid</th>\n",
" <th>ask</th>\n",
" <th>shrout</th>\n",
" <th>cfacpr</th>\n",
" <th>cfacshr</th>\n",
" <th>altprc</th>\n",
" <th>spread</th>\n",
" <th>altprcdt</th>\n",
" <th>retx</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>68391610</td>\n",
" <td>10000</td>\n",
" <td>7952</td>\n",
" <td>10396</td>\n",
" <td>3</td>\n",
" <td>3990</td>\n",
" <td>1985-12-31</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-2.56250</td>\n",
" <td>NaN</td>\n",
" <td>1986-01-07</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>68391610</td>\n",
" <td>10000</td>\n",
" <td>7952</td>\n",
" <td>10396</td>\n",
" <td>3</td>\n",
" <td>3990</td>\n",
" <td>1986-01-31</td>\n",
" <td>-2.5000</td>\n",
" <td>-4.43750</td>\n",
" <td>-4.37500</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>3680.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>-4.37500</td>\n",
" <td>0.25000</td>\n",
" <td>1986-01-31</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>68391610</td>\n",
" <td>10000</td>\n",
" <td>7952</td>\n",
" <td>10396</td>\n",
" <td>3</td>\n",
" <td>3990</td>\n",
" <td>1986-02-28</td>\n",
" <td>-3.2500</td>\n",
" <td>-4.37500</td>\n",
" <td>-3.25000</td>\n",
" <td>...</td>\n",
" <td>-0.257143</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>3680.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>-3.25000</td>\n",
" <td>0.25000</td>\n",
" <td>1986-02-28</td>\n",
" <td>-0.257143</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>68391610</td>\n",
" <td>10000</td>\n",
" <td>7952</td>\n",
" <td>10396</td>\n",
" <td>3</td>\n",
" <td>3990</td>\n",
" <td>1986-03-31</td>\n",
" <td>-3.2500</td>\n",
" <td>-4.43750</td>\n",
" <td>-4.43750</td>\n",
" <td>...</td>\n",
" <td>0.365385</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>3680.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>-4.43750</td>\n",
" <td>0.12500</td>\n",
" <td>1986-03-31</td>\n",
" <td>0.365385</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>68391610</td>\n",
" <td>10000</td>\n",
" <td>7952</td>\n",
" <td>10396</td>\n",
" <td>3</td>\n",
" <td>3990</td>\n",
" <td>1986-04-30</td>\n",
" <td>-4.0000</td>\n",
" <td>-4.31250</td>\n",
" <td>-4.00000</td>\n",
" <td>...</td>\n",
" <td>-0.098592</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>3793.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>-4.00000</td>\n",
" <td>0.25000</td>\n",
" <td>1986-04-30</td>\n",
" <td>-0.098592</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>68391610</td>\n",
" <td>10000</td>\n",
" <td>7952</td>\n",
" <td>10396</td>\n",
" <td>3</td>\n",
" <td>3990</td>\n",
" <td>1986-05-30</td>\n",
" <td>-3.0625</td>\n",
" <td>-4.21875</td>\n",
" <td>-3.10938</td>\n",
" <td>...</td>\n",
" <td>-0.222656</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>3793.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>-3.10938</td>\n",
" <td>0.09375</td>\n",
" <td>1986-05-30</td>\n",
" <td>-0.222656</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>6 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" cusip permno permco issuno hexcd hsiccd date bidlo \\\n",
"0 68391610 10000 7952 10396 3 3990 1985-12-31 NaN \n",
"1 68391610 10000 7952 10396 3 3990 1986-01-31 -2.5000 \n",
"2 68391610 10000 7952 10396 3 3990 1986-02-28 -3.2500 \n",
"3 68391610 10000 7952 10396 3 3990 1986-03-31 -3.2500 \n",
"4 68391610 10000 7952 10396 3 3990 1986-04-30 -4.0000 \n",
"5 68391610 10000 7952 10396 3 3990 1986-05-30 -3.0625 \n",
"\n",
" askhi prc ... ret bid ask shrout cfacpr cfacshr \\\n",
"0 NaN NaN ... NaN None None NaN NaN NaN \n",
"1 -4.43750 -4.37500 ... NaN None None 3680.0 1.0 1.0 \n",
"2 -4.37500 -3.25000 ... -0.257143 None None 3680.0 1.0 1.0 \n",
"3 -4.43750 -4.43750 ... 0.365385 None None 3680.0 1.0 1.0 \n",
"4 -4.31250 -4.00000 ... -0.098592 None None 3793.0 1.0 1.0 \n",
"5 -4.21875 -3.10938 ... -0.222656 None None 3793.0 1.0 1.0 \n",
"\n",
" altprc spread altprcdt retx \n",
"0 -2.56250 NaN 1986-01-07 NaN \n",
"1 -4.37500 0.25000 1986-01-31 NaN \n",
"2 -3.25000 0.25000 1986-02-28 -0.257143 \n",
"3 -4.43750 0.12500 1986-03-31 0.365385 \n",
"4 -4.00000 0.25000 1986-04-30 -0.098592 \n",
"5 -3.10938 0.09375 1986-05-30 -0.222656 \n",
"\n",
"[6 rows x 21 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Check thatthe data loaded into PostgreSQL correctly\n",
"import pandas as pd\n",
"df = pd.read_sql(\"SELECT * FROM \" + schema + \".\" + table_name + \" LIMIT 6\", engine)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>vwretd</th>\n",
" <th>vwretx</th>\n",
" <th>ewretd</th>\n",
" <th>ewretx</th>\n",
" <th>sprtrn</th>\n",
" <th>spindx</th>\n",
" <th>totval</th>\n",
" <th>totcnt</th>\n",
" <th>usdval</th>\n",
" <th>usdcnt</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>19251231</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>12.46</td>\n",
" <td>27487487.2</td>\n",
" <td>503</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>19260130</td>\n",
" <td>0.000561</td>\n",
" <td>-0.001395</td>\n",
" <td>0.023174</td>\n",
" <td>0.021395</td>\n",
" <td>0.022472</td>\n",
" <td>12.74</td>\n",
" <td>27624240.8</td>\n",
" <td>506</td>\n",
" <td>27412916.2</td>\n",
" <td>496.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19260227</td>\n",
" <td>-0.033046</td>\n",
" <td>-0.036587</td>\n",
" <td>-0.053510</td>\n",
" <td>-0.055547</td>\n",
" <td>-0.043956</td>\n",
" <td>12.18</td>\n",
" <td>26752064.1</td>\n",
" <td>514</td>\n",
" <td>27600952.1</td>\n",
" <td>500.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>19260331</td>\n",
" <td>-0.064002</td>\n",
" <td>-0.070021</td>\n",
" <td>-0.096824</td>\n",
" <td>-0.101404</td>\n",
" <td>-0.059113</td>\n",
" <td>11.46</td>\n",
" <td>25083173.4</td>\n",
" <td>519</td>\n",
" <td>26683758.1</td>\n",
" <td>507.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>19260430</td>\n",
" <td>0.037019</td>\n",
" <td>0.034031</td>\n",
" <td>0.032946</td>\n",
" <td>0.030121</td>\n",
" <td>0.022688</td>\n",
" <td>11.72</td>\n",
" <td>25886743.8</td>\n",
" <td>521</td>\n",
" <td>24899755.6</td>\n",
" <td>512.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" date vwretd vwretx ewretd ewretx sprtrn spindx \\\n",
"0 19251231 NaN NaN NaN NaN NaN 12.46 \n",
"1 19260130 0.000561 -0.001395 0.023174 0.021395 0.022472 12.74 \n",
"2 19260227 -0.033046 -0.036587 -0.053510 -0.055547 -0.043956 12.18 \n",
"3 19260331 -0.064002 -0.070021 -0.096824 -0.101404 -0.059113 11.46 \n",
"4 19260430 0.037019 0.034031 0.032946 0.030121 0.022688 11.72 \n",
"\n",
" totval totcnt usdval usdcnt \n",
"0 27487487.2 503 NaN NaN \n",
"1 27624240.8 506 27412916.2 496.0 \n",
"2 26752064.1 514 27600952.1 500.0 \n",
"3 25083173.4 519 26683758.1 507.0 \n",
"4 25886743.8 521 24899755.6 512.0 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from wrds_fetch import wrds_to_pandas\n",
"# Here is code to get data directly from WRDS into Pandas\n",
"df = wrds_to_pandas(schema, \"msi\", wrds_id)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment