Created
September 10, 2016 15:19
-
-
Save tilarids/7d01bdc205423998796ae81ef0b7e394 to your computer and use it in GitHub Desktop.
JSON + Pandas query demo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import json\n", | |
| "import glob\n", | |
| "\n", | |
| "data = [json.load(open(x)) for x in glob.glob('/tmp/tenders/*/*.json')]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import itertools\n", | |
| "import pandas as pd\n", | |
| "\n", | |
| "def get_field(node, path):\n", | |
| " if not path:\n", | |
| " yield node\n", | |
| " return\n", | |
| " value = node.get(path[0], [])\n", | |
| " if isinstance(value, list):\n", | |
| " for v in value:\n", | |
| " for x in get_field(v, path[1:]):\n", | |
| " yield x\n", | |
| " else:\n", | |
| " for x in get_field(value, path[1:]):\n", | |
| " yield x\n", | |
| " \n", | |
| "def query(data, *query_elems):\n", | |
| " paths = [x.split('.') for x in query_elems]\n", | |
| " def generator():\n", | |
| " for elem in data:\n", | |
| " row = []\n", | |
| " for path in paths:\n", | |
| " row.append(get_field(elem, path))\n", | |
| " for x in itertools.product(*row):\n", | |
| " yield x\n", | |
| " return pd.DataFrame(generator(), columns=query_elems)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "q2 = query(data, 'tenderID', 'value.amount', 'status', 'bids.tenderers.name', 'bids.value.amount', \n", | |
| " 'bids.tenderers.identifier.id', 'awards.value.amount')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>tenderID</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>UA-2015-03-06-000009</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>UA-2015-03-17-000014</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>UA-2015-03-17-000019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>UA-2015-03-30-000001</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>UA-2015-04-06-000007</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>UA-2015-04-16-000015</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>UA-2015-04-17-000008</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>UA-2015-04-28-000003</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>UA-2015-04-28-000013</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>UA-2015-04-28-000033</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>UA-2015-04-29-000002</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>UA-2015-05-05-000024</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td>UA-2015-05-05-000026</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>13</th>\n", | |
| " <td>UA-2015-05-06-000023</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>14</th>\n", | |
| " <td>UA-2015-05-07-000011</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>15</th>\n", | |
| " <td>UA-2015-05-08-000006</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>16</th>\n", | |
| " <td>UA-2015-05-08-000011</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>17</th>\n", | |
| " <td>UA-2015-05-12-000001</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>18</th>\n", | |
| " <td>UA-2015-05-12-000009</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>19</th>\n", | |
| " <td>UA-2015-05-13-000001</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>20</th>\n", | |
| " <td>UA-2015-05-13-000002</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>21</th>\n", | |
| " <td>UA-2015-05-14-000002</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>22</th>\n", | |
| " <td>UA-2015-05-14-000025</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>23</th>\n", | |
| " <td>UA-2015-05-14-000026</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>24</th>\n", | |
| " <td>UA-2015-05-15-000001</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>25</th>\n", | |
| " <td>UA-2015-05-15-000002</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>26</th>\n", | |
| " <td>UA-2015-05-18-000001</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>27</th>\n", | |
| " <td>UA-2015-05-18-000024</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>28</th>\n", | |
| " <td>UA-2015-05-18-000026</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>29</th>\n", | |
| " <td>UA-2015-05-18-000027</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>...</th>\n", | |
| " <td>...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7690</th>\n", | |
| " <td>UA-2016-02-25-000246-b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7691</th>\n", | |
| " <td>UA-2016-02-25-000307-b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7692</th>\n", | |
| " <td>UA-2016-02-25-000338-b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7693</th>\n", | |
| " <td>UA-2016-02-25-000344-b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7694</th>\n", | |
| " <td>UA-2016-02-25-000377-b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7695</th>\n", | |
| " <td>UA-2016-02-26-000089-b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7696</th>\n", | |
| " <td>UA-2016-02-26-000098-c</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7697</th>\n", | |
| " <td>UA-2016-02-26-000135-b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7698</th>\n", | |
| " <td>UA-2016-02-26-000166-b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7699</th>\n", | |
| " <td>UA-2016-02-26-000315-b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7700</th>\n", | |
| " <td>UA-2016-02-26-000329-b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7701</th>\n", | |
| " <td>UA-2016-02-26-000331-b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7702</th>\n", | |
| " <td>UA-2016-02-26-000332-b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7703</th>\n", | |
| " <td>UA-2016-02-26-000334-b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7704</th>\n", | |
| " <td>UA-2016-02-26-000343-b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7705</th>\n", | |
| " <td>UA-2016-02-28-000001-b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7706</th>\n", | |
| " <td>UA-2016-02-29-000086-b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7707</th>\n", | |
| " <td>UA-2016-02-29-000087-c</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7708</th>\n", | |
| " <td>UA-2016-02-29-000131-c</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7709</th>\n", | |
| " <td>UA-2016-02-29-000150-a</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7710</th>\n", | |
| " <td>UA-2016-03-01-000110-a</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7711</th>\n", | |
| " <td>UA-2016-03-01-000135-c</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7712</th>\n", | |
| " <td>UA-2016-03-01-000195-c</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7713</th>\n", | |
| " <td>UA-2016-03-02-000039-c</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7714</th>\n", | |
| " <td>UA-2016-03-02-000051-a</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7715</th>\n", | |
| " <td>UA-2016-03-02-000070-c</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7716</th>\n", | |
| " <td>UA-2016-03-02-000119-c</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7717</th>\n", | |
| " <td>UA-2016-03-02-000120-b</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7718</th>\n", | |
| " <td>UA-2016-03-02-000266-c</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7719</th>\n", | |
| " <td>UA-2016-03-03-000088-b</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>7720 rows × 1 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " tenderID\n", | |
| "0 UA-2015-03-06-000009\n", | |
| "1 UA-2015-03-17-000014\n", | |
| "2 UA-2015-03-17-000019\n", | |
| "3 UA-2015-03-30-000001\n", | |
| "4 UA-2015-04-06-000007\n", | |
| "5 UA-2015-04-16-000015\n", | |
| "6 UA-2015-04-17-000008\n", | |
| "7 UA-2015-04-28-000003\n", | |
| "8 UA-2015-04-28-000013\n", | |
| "9 UA-2015-04-28-000033\n", | |
| "10 UA-2015-04-29-000002\n", | |
| "11 UA-2015-05-05-000024\n", | |
| "12 UA-2015-05-05-000026\n", | |
| "13 UA-2015-05-06-000023\n", | |
| "14 UA-2015-05-07-000011\n", | |
| "15 UA-2015-05-08-000006\n", | |
| "16 UA-2015-05-08-000011\n", | |
| "17 UA-2015-05-12-000001\n", | |
| "18 UA-2015-05-12-000009\n", | |
| "19 UA-2015-05-13-000001\n", | |
| "20 UA-2015-05-13-000002\n", | |
| "21 UA-2015-05-14-000002\n", | |
| "22 UA-2015-05-14-000025\n", | |
| "23 UA-2015-05-14-000026\n", | |
| "24 UA-2015-05-15-000001\n", | |
| "25 UA-2015-05-15-000002\n", | |
| "26 UA-2015-05-18-000001\n", | |
| "27 UA-2015-05-18-000024\n", | |
| "28 UA-2015-05-18-000026\n", | |
| "29 UA-2015-05-18-000027\n", | |
| "... ...\n", | |
| "7690 UA-2016-02-25-000246-b\n", | |
| "7691 UA-2016-02-25-000307-b\n", | |
| "7692 UA-2016-02-25-000338-b\n", | |
| "7693 UA-2016-02-25-000344-b\n", | |
| "7694 UA-2016-02-25-000377-b\n", | |
| "7695 UA-2016-02-26-000089-b\n", | |
| "7696 UA-2016-02-26-000098-c\n", | |
| "7697 UA-2016-02-26-000135-b\n", | |
| "7698 UA-2016-02-26-000166-b\n", | |
| "7699 UA-2016-02-26-000315-b\n", | |
| "7700 UA-2016-02-26-000329-b\n", | |
| "7701 UA-2016-02-26-000331-b\n", | |
| "7702 UA-2016-02-26-000332-b\n", | |
| "7703 UA-2016-02-26-000334-b\n", | |
| "7704 UA-2016-02-26-000343-b\n", | |
| "7705 UA-2016-02-28-000001-b\n", | |
| "7706 UA-2016-02-29-000086-b\n", | |
| "7707 UA-2016-02-29-000087-c\n", | |
| "7708 UA-2016-02-29-000131-c\n", | |
| "7709 UA-2016-02-29-000150-a\n", | |
| "7710 UA-2016-03-01-000110-a\n", | |
| "7711 UA-2016-03-01-000135-c\n", | |
| "7712 UA-2016-03-01-000195-c\n", | |
| "7713 UA-2016-03-02-000039-c\n", | |
| "7714 UA-2016-03-02-000051-a\n", | |
| "7715 UA-2016-03-02-000070-c\n", | |
| "7716 UA-2016-03-02-000119-c\n", | |
| "7717 UA-2016-03-02-000120-b\n", | |
| "7718 UA-2016-03-02-000266-c\n", | |
| "7719 UA-2016-03-03-000088-b\n", | |
| "\n", | |
| "[7720 rows x 1 columns]" | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "bid_counts2 = q2[q2['status']=='complete'].groupby(['tenderID'])['bids.tenderers.name'].count()\n", | |
| "bid_counts2[bid_counts2 == 1].reset_index()[['tenderID']]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 2", | |
| "language": "python", | |
| "name": "python2" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 2 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython2", | |
| "version": "2.7.12" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment