Created
October 4, 2024 18:59
-
-
Save atticus-sullivan/e6a88927b204a3bf3c16e1ab95d4f43c to your computer and use it in GitHub Desktop.
trying to get MdB information directly from DIP API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"id": "2e29816d-ead7-4de1-8e05-8e50cd6d4d18", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import requests\n", | |
"import pandas as pd\n", | |
"import urllib.parse\n", | |
"import json\n", | |
"\n", | |
"api_key = 'I9FKdCn.hbfefNWCY336dL6x62vfwNKpoN2RZ1gp21' # API key in plaintext is fine as it is public anyhow (see https://dip.bundestag.de/%C3%BCber-dip/hilfe/api)\n", | |
"base_url = 'https://search.dip.bundestag.de/api/v1/'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"id": "0d646fe4-d2ab-423c-9dd0-0ce9887b185d", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'https://search.dip.bundestag.de/api/v1/person/?apikey=I9FKdCn.hbfefNWCY336dL6x62vfwNKpoN2RZ1gp21&f.wahlperiode=20'" | |
] | |
}, | |
"execution_count": 36, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"query = {'apikey': api_key, 'f.wahlperiode': 20}\n", | |
"def query_url(query):\n", | |
" return base_url + \"person/?\" + urllib.parse.urlencode(query)\n", | |
"query_url(query)\n", | |
"# https://search.dip.bundestag.de/api/v1/person?apikey=I9FKdCn.hbfefNWCY336dL6x62vfwNKpoN2RZ1gp21&f.wahlperiode=20" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"id": "a76682f1-97fb-44c5-9a7b-a7ecb28b75b0", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"full_data = pd.DataFrame()\n", | |
"\n", | |
"old_cursor = ''\n", | |
"while query.get('cursor') != old_cursor:\n", | |
" old_cursor = query.get('cursor')\n", | |
" url = query_url(query)\n", | |
" resp = requests.get(url)\n", | |
" if not resp.ok:\n", | |
" print(resp.status, url)\n", | |
" break\n", | |
" current_page_data = resp.json()\n", | |
" full_data = pd.concat([full_data, pd.json_normalize(current_page_data[\"documents\"])], ignore_index=True)\n", | |
" query['cursor'] = current_page_data['cursor']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"id": "ee36af4b-f1dc-47b0-9d52-3ce1e3cf514a", | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>nachname</th>\n", | |
" <th>vorname</th>\n", | |
" <th>typ</th>\n", | |
" <th>wahlperiode</th>\n", | |
" <th>aktualisiert</th>\n", | |
" <th>person_roles</th>\n", | |
" <th>titel</th>\n", | |
" <th>datum</th>\n", | |
" <th>basisdatum</th>\n", | |
" <th>namenszusatz</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>847</td>\n", | |
" <td>Pau</td>\n", | |
" <td>Petra</td>\n", | |
" <td>Person</td>\n", | |
" <td>14</td>\n", | |
" <td>2024-04-05T11:21:36+02:00</td>\n", | |
" <td>[{'funktion': 'MdB', 'fraktion': 'DIE LINKE', ...</td>\n", | |
" <td>Petra Pau, Bundestagsvizepräs.</td>\n", | |
" <td>2024-10-04</td>\n", | |
" <td>1998-10-26</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>7559</td>\n", | |
" <td>Bünger</td>\n", | |
" <td>Clara</td>\n", | |
" <td>Person</td>\n", | |
" <td>20</td>\n", | |
" <td>2024-04-05T10:58:01+02:00</td>\n", | |
" <td>[{'funktion': 'MdB', 'fraktion': 'fraktionslos...</td>\n", | |
" <td>Clara Bünger, MdB, DIE LINKE</td>\n", | |
" <td>2024-10-04</td>\n", | |
" <td>2022-01-14</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>7490</td>\n", | |
" <td>Reichinnek</td>\n", | |
" <td>Heidi</td>\n", | |
" <td>Person</td>\n", | |
" <td>20</td>\n", | |
" <td>2024-04-05T11:24:49+02:00</td>\n", | |
" <td>[{'funktion': 'MdB', 'fraktion': 'fraktionslos...</td>\n", | |
" <td>Heidi Reichinnek, MdB, DIE LINKE</td>\n", | |
" <td>2024-10-04</td>\n", | |
" <td>2021-11-05</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>7462</td>\n", | |
" <td>Wissler</td>\n", | |
" <td>Janine</td>\n", | |
" <td>Person</td>\n", | |
" <td>20</td>\n", | |
" <td>2024-04-05T11:30:35+02:00</td>\n", | |
" <td>[{'funktion': 'MdB', 'fraktion': 'fraktionslos...</td>\n", | |
" <td>Janine Wissler, MdB, DIE LINKE</td>\n", | |
" <td>2024-10-04</td>\n", | |
" <td>2021-12-07</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>7457</td>\n", | |
" <td>Naujok</td>\n", | |
" <td>Edgar</td>\n", | |
" <td>Person</td>\n", | |
" <td>20</td>\n", | |
" <td>2022-07-26T19:57:10+02:00</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Edgar Naujok, MdB, AfD</td>\n", | |
" <td>2024-10-04</td>\n", | |
" <td>2021-11-16</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1046</th>\n", | |
" <td>1933</td>\n", | |
" <td>Behrendt</td>\n", | |
" <td>Dirk</td>\n", | |
" <td>Person</td>\n", | |
" <td>18</td>\n", | |
" <td>2022-07-26T19:57:10+02:00</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Dr. Dirk Behrendt, Stellv. MdBR (Senator für J...</td>\n", | |
" <td>2021-11-19</td>\n", | |
" <td>2016-12-16</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1047</th>\n", | |
" <td>1984</td>\n", | |
" <td>Maas</td>\n", | |
" <td>Heiko</td>\n", | |
" <td>Person</td>\n", | |
" <td>17</td>\n", | |
" <td>2023-10-30T16:00:43+01:00</td>\n", | |
" <td>[{'funktion': 'Bundesmin.', 'ressort_titel': '...</td>\n", | |
" <td>Heiko Maas, MdB, SPD</td>\n", | |
" <td>2021-11-11</td>\n", | |
" <td>2012-06-15</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1048</th>\n", | |
" <td>1741</td>\n", | |
" <td>Maas</td>\n", | |
" <td>Heiko</td>\n", | |
" <td>Person</td>\n", | |
" <td>17</td>\n", | |
" <td>2023-10-30T15:48:35+01:00</td>\n", | |
" <td>[{'funktion': 'Bundesmin.', 'ressort_titel': '...</td>\n", | |
" <td>Heiko Maas, Bundesmin., Bundesministerium der ...</td>\n", | |
" <td>2021-11-11</td>\n", | |
" <td>2012-06-15</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1049</th>\n", | |
" <td>7503</td>\n", | |
" <td>Luft</td>\n", | |
" <td>Christian</td>\n", | |
" <td>Person</td>\n", | |
" <td>20</td>\n", | |
" <td>2022-07-26T19:57:10+02:00</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Christian Luft, Staatssekr., Bundesministerium...</td>\n", | |
" <td>2021-11-05</td>\n", | |
" <td>2021-11-05</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1050</th>\n", | |
" <td>7032</td>\n", | |
" <td>Kerber</td>\n", | |
" <td>Markus</td>\n", | |
" <td>Person</td>\n", | |
" <td>19</td>\n", | |
" <td>2022-07-26T19:57:10+02:00</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Dr. Markus Kerber, Staatssekr., Bundesminister...</td>\n", | |
" <td>2021-11-05</td>\n", | |
" <td>2018-04-13</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>1051 rows × 11 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id nachname vorname typ wahlperiode \\\n", | |
"0 847 Pau Petra Person 14 \n", | |
"1 7559 Bünger Clara Person 20 \n", | |
"2 7490 Reichinnek Heidi Person 20 \n", | |
"3 7462 Wissler Janine Person 20 \n", | |
"4 7457 Naujok Edgar Person 20 \n", | |
"... ... ... ... ... ... \n", | |
"1046 1933 Behrendt Dirk Person 18 \n", | |
"1047 1984 Maas Heiko Person 17 \n", | |
"1048 1741 Maas Heiko Person 17 \n", | |
"1049 7503 Luft Christian Person 20 \n", | |
"1050 7032 Kerber Markus Person 19 \n", | |
"\n", | |
" aktualisiert \\\n", | |
"0 2024-04-05T11:21:36+02:00 \n", | |
"1 2024-04-05T10:58:01+02:00 \n", | |
"2 2024-04-05T11:24:49+02:00 \n", | |
"3 2024-04-05T11:30:35+02:00 \n", | |
"4 2022-07-26T19:57:10+02:00 \n", | |
"... ... \n", | |
"1046 2022-07-26T19:57:10+02:00 \n", | |
"1047 2023-10-30T16:00:43+01:00 \n", | |
"1048 2023-10-30T15:48:35+01:00 \n", | |
"1049 2022-07-26T19:57:10+02:00 \n", | |
"1050 2022-07-26T19:57:10+02:00 \n", | |
"\n", | |
" person_roles \\\n", | |
"0 [{'funktion': 'MdB', 'fraktion': 'DIE LINKE', ... \n", | |
"1 [{'funktion': 'MdB', 'fraktion': 'fraktionslos... \n", | |
"2 [{'funktion': 'MdB', 'fraktion': 'fraktionslos... \n", | |
"3 [{'funktion': 'MdB', 'fraktion': 'fraktionslos... \n", | |
"4 NaN \n", | |
"... ... \n", | |
"1046 NaN \n", | |
"1047 [{'funktion': 'Bundesmin.', 'ressort_titel': '... \n", | |
"1048 [{'funktion': 'Bundesmin.', 'ressort_titel': '... \n", | |
"1049 NaN \n", | |
"1050 NaN \n", | |
"\n", | |
" titel datum \\\n", | |
"0 Petra Pau, Bundestagsvizepräs. 2024-10-04 \n", | |
"1 Clara Bünger, MdB, DIE LINKE 2024-10-04 \n", | |
"2 Heidi Reichinnek, MdB, DIE LINKE 2024-10-04 \n", | |
"3 Janine Wissler, MdB, DIE LINKE 2024-10-04 \n", | |
"4 Edgar Naujok, MdB, AfD 2024-10-04 \n", | |
"... ... ... \n", | |
"1046 Dr. Dirk Behrendt, Stellv. MdBR (Senator für J... 2021-11-19 \n", | |
"1047 Heiko Maas, MdB, SPD 2021-11-11 \n", | |
"1048 Heiko Maas, Bundesmin., Bundesministerium der ... 2021-11-11 \n", | |
"1049 Christian Luft, Staatssekr., Bundesministerium... 2021-11-05 \n", | |
"1050 Dr. Markus Kerber, Staatssekr., Bundesminister... 2021-11-05 \n", | |
"\n", | |
" basisdatum namenszusatz \n", | |
"0 1998-10-26 NaN \n", | |
"1 2022-01-14 NaN \n", | |
"2 2021-11-05 NaN \n", | |
"3 2021-12-07 NaN \n", | |
"4 2021-11-16 NaN \n", | |
"... ... ... \n", | |
"1046 2016-12-16 NaN \n", | |
"1047 2012-06-15 NaN \n", | |
"1048 2012-06-15 NaN \n", | |
"1049 2021-11-05 NaN \n", | |
"1050 2018-04-13 NaN \n", | |
"\n", | |
"[1051 rows x 11 columns]" | |
] | |
}, | |
"execution_count": 38, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"full_data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"id": "6da4627c-6a06-47e7-9e00-e0bf67905eac", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" id nachname vorname typ wahlperiode aktualisiert person_roles titel datum basisdatum namenszusatz\n", | |
"556 7549 Theurer Michael Person 19 2024-09-30T15:27:29+02:00 [{'funktion': 'MdB', 'fraktion': 'FDP', 'nachname': 'Theurer', 'vorname': 'Michael', 'wahlperiode_nummer': [19, 20]}] Michael Theurer, Parl. Staatssekr., Bundesministerium für Digitales und Verkehr 2024-09-06 2017-12-08 NaN\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>nachname</th>\n", | |
" <th>vorname</th>\n", | |
" <th>typ</th>\n", | |
" <th>wahlperiode</th>\n", | |
" <th>aktualisiert</th>\n", | |
" <th>person_roles</th>\n", | |
" <th>titel</th>\n", | |
" <th>datum</th>\n", | |
" <th>basisdatum</th>\n", | |
" <th>namenszusatz</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>556</th>\n", | |
" <td>7549</td>\n", | |
" <td>Theurer</td>\n", | |
" <td>Michael</td>\n", | |
" <td>Person</td>\n", | |
" <td>19</td>\n", | |
" <td>2024-09-30T15:27:29+02:00</td>\n", | |
" <td>[{'funktion': 'MdB', 'fraktion': 'FDP', 'nachn...</td>\n", | |
" <td>Michael Theurer, Parl. Staatssekr., Bundesmini...</td>\n", | |
" <td>2024-09-06</td>\n", | |
" <td>2017-12-08</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id nachname vorname typ wahlperiode aktualisiert \\\n", | |
"556 7549 Theurer Michael Person 19 2024-09-30T15:27:29+02:00 \n", | |
"\n", | |
" person_roles \\\n", | |
"556 [{'funktion': 'MdB', 'fraktion': 'FDP', 'nachn... \n", | |
"\n", | |
" titel datum \\\n", | |
"556 Michael Theurer, Parl. Staatssekr., Bundesmini... 2024-09-06 \n", | |
"\n", | |
" basisdatum namenszusatz \n", | |
"556 2017-12-08 NaN " | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"print(full_data[full_data['nachname'] == 'Theurer'].to_string())\n", | |
"display(full_data[full_data['nachname'] == 'Theurer'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "cc0395ad-f35d-414a-9a54-26fb5d5d4392", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.12.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment