Created
June 17, 2017 19:14
-
-
Save kforeman/7e87f6fd94b8fb50d67ea6260f4ed9a0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import os\n", | |
"import xarray as xr\n", | |
"import numpy as np\n", | |
"import cStringIO\n", | |
"import gzip" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.read_csv('./u5m_coords_with_pop.csv',\n", | |
" usecols=['x','y','mean_5q0_2015','pop'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = df.sort_values(['x','y'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>x</th>\n", | |
" <th>y</th>\n", | |
" <th>mean_5q0_2015</th>\n", | |
" <th>pop</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>39705</th>\n", | |
" <td>-25.354167</td>\n", | |
" <td>17.062500</td>\n", | |
" <td>0.018566</td>\n", | |
" <td>11.890158</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>40240</th>\n", | |
" <td>-25.312500</td>\n", | |
" <td>16.937500</td>\n", | |
" <td>0.018664</td>\n", | |
" <td>12.634308</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>40039</th>\n", | |
" <td>-25.312500</td>\n", | |
" <td>16.979167</td>\n", | |
" <td>0.018606</td>\n", | |
" <td>14.405664</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>39863</th>\n", | |
" <td>-25.312500</td>\n", | |
" <td>17.020833</td>\n", | |
" <td>0.018459</td>\n", | |
" <td>20.374880</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>39706</th>\n", | |
" <td>-25.312500</td>\n", | |
" <td>17.062500</td>\n", | |
" <td>0.018678</td>\n", | |
" <td>22.877031</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" x y mean_5q0_2015 pop\n", | |
"39705 -25.354167 17.062500 0.018566 11.890158\n", | |
"40240 -25.312500 16.937500 0.018664 12.634308\n", | |
"40039 -25.312500 16.979167 0.018606 14.405664\n", | |
"39863 -25.312500 17.020833 0.018459 20.374880\n", | |
"39706 -25.312500 17.062500 0.018678 22.877031" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df['lng'],lng = df.x.factorize()\n", | |
"df['lat'],lat = df.y.factorize()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df['m'] = (df.mean_5q0_2015 * 1000).round(1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df['p'] = df['pop'].round(1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>x</th>\n", | |
" <th>y</th>\n", | |
" <th>mean_5q0_2015</th>\n", | |
" <th>pop</th>\n", | |
" <th>lng</th>\n", | |
" <th>lat</th>\n", | |
" <th>m</th>\n", | |
" <th>p</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>39705</th>\n", | |
" <td>-25.354167</td>\n", | |
" <td>17.062500</td>\n", | |
" <td>0.018566</td>\n", | |
" <td>11.890158</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>18.6</td>\n", | |
" <td>11.9</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>40240</th>\n", | |
" <td>-25.312500</td>\n", | |
" <td>16.937500</td>\n", | |
" <td>0.018664</td>\n", | |
" <td>12.634308</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>18.7</td>\n", | |
" <td>12.6</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>40039</th>\n", | |
" <td>-25.312500</td>\n", | |
" <td>16.979167</td>\n", | |
" <td>0.018606</td>\n", | |
" <td>14.405664</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>18.6</td>\n", | |
" <td>14.4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>39863</th>\n", | |
" <td>-25.312500</td>\n", | |
" <td>17.020833</td>\n", | |
" <td>0.018459</td>\n", | |
" <td>20.374880</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>18.5</td>\n", | |
" <td>20.4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>39706</th>\n", | |
" <td>-25.312500</td>\n", | |
" <td>17.062500</td>\n", | |
" <td>0.018678</td>\n", | |
" <td>22.877031</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>18.7</td>\n", | |
" <td>22.9</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" x y mean_5q0_2015 pop lng lat m p\n", | |
"39705 -25.354167 17.062500 0.018566 11.890158 0 0 18.6 11.9\n", | |
"40240 -25.312500 16.937500 0.018664 12.634308 1 1 18.7 12.6\n", | |
"40039 -25.312500 16.979167 0.018606 14.405664 1 2 18.6 14.4\n", | |
"39863 -25.312500 17.020833 0.018459 20.374880 1 3 18.5 20.4\n", | |
"39706 -25.312500 17.062500 0.018678 22.877031 1 0 18.7 22.9" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"x = df.set_index(['lng','lat'])[['m','p']].to_xarray()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"with open('./tiny-data.json', 'w') as f:\n", | |
" f.write('{')\n", | |
" f.write('\"lng\":[{lng}],'.format(lng=','.join(lng.values.round(3).astype(str))))\n", | |
" f.write('\"lat\":[{lat}],'.format(lat=','.join(lat.values.round(3).astype(str))))\n", | |
" f.write('\"5q0\":[')\n", | |
" for g in x.coords['lng'].values:\n", | |
" f.write('[')\n", | |
" row = x.m.sel(lng=g).values\n", | |
" f.write(','.join(['' if np.isnan(v) else '{:.1f}'.format(v) for v in row]))\n", | |
" f.write('],')\n", | |
" f.seek(-1, os.SEEK_END)\n", | |
" f.write('],')\n", | |
" f.write('\"pop\":[')\n", | |
" for g in x.coords['lng'].values:\n", | |
" f.write('[')\n", | |
" row = x.p.sel(lng=g).values\n", | |
" f.write(','.join(['' if np.isnan(v) else '{:.1f}'.format(v) for v in row]))\n", | |
" f.write('],')\n", | |
" f.seek(-1, os.SEEK_END)\n", | |
" f.write(']}')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def compressFileToString(inputFile):\n", | |
" \"\"\"\n", | |
" read the given open file, compress the data and return it as string.\n", | |
" \"\"\"\n", | |
" stream = cStringIO.StringIO()\n", | |
" compressor = gzip.GzipFile(fileobj=stream, mode='w')\n", | |
" while True: # until EOF\n", | |
" chunk = inputFile.read(8192)\n", | |
" if not chunk: # EOF?\n", | |
" compressor.close()\n", | |
" return stream.getvalue()\n", | |
" compressor.write(chunk)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"with open('./compressed.json','w') as compressed:\n", | |
" compressed.write(compressFileToString(file('./tiny-data.json','r')))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment