Created
February 17, 2021 13:18
-
-
Save MaxMatti/f06c2eefc8faa56f331055a2f5384047 to your computer and use it in GitHub Desktop.
Source code used to generate age distribution of german citizens and federal parliament members 1950-2018
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
# Author: Max Staff <[email protected]> | |
import csv | |
import matplotlib.animation as animation | |
import matplotlib.pyplot as pyplot | |
import numpy | |
import re | |
import xml.etree.ElementTree as ElementTree | |
population = "data/14_bevoelkerungsvorausberechnung_daten.csv" | |
bundestag = "data/MDB_STAMMDATEN.XML" | |
def main(): | |
years = {} | |
for yr in range(1800, 2020): | |
years[str(yr)] = {} | |
with open(population) as file: | |
table = csv.reader(file, delimiter=";") | |
for row in table: | |
# only use actually measured data, not predictions and not the header | |
if str(row[0]) != "0": | |
continue | |
if str(row[1]) not in years: | |
years[str(row[1])] = {} | |
years[str(row[1])]["pop_" + str(row[2])] = [int("0" + x, 10) for x in row[4:]] + [0] | |
tree = ElementTree.parse(bundestag) | |
root = tree.getroot() | |
for mdb in root: | |
years_in_office = [] | |
for section in mdb: | |
if section.tag == "BIOGRAFISCHE_ANGABEN": | |
for attr in section: | |
if attr.tag == "GESCHLECHT": | |
if attr.text != None and len(attr.text) > 0: | |
gender = attr.text[0] | |
else: | |
print("Unknown gender: \"" + str(attr.text) + "\"") | |
return | |
elif attr.tag == "GEBURTSDATUM": | |
if attr.text != None and len(attr.text) == 10 and attr.text[6:].isnumeric(): | |
birth_year = int(attr.text[6:]) | |
else: | |
print("Unknown birthdate: \"" + str(attr.text) + "\"") | |
return | |
elif section.tag == "WAHLPERIODEN": | |
for period in section: | |
for attr in period: | |
if attr.tag == "MDBWP_VON": | |
if attr.text != None and len(attr.text) == 10 and attr.text[6:].isnumeric(): | |
start_year = int(attr.text[6:]) | |
else: | |
print("Unknown startdate: \"" + str(attr.text) + "\"") | |
return | |
elif attr.tag == "MDBWP_BIS": | |
if attr.text != None and len(attr.text) == 10 and attr.text[6:].isnumeric(): | |
end_year = int(attr.text[6:]) | |
elif attr.text == None: | |
end_year = 2021 | |
else: | |
print("Unknown enddate: \"" + str(attr.text) + "\" (using 2021)") | |
end_year = 2021 | |
for year in range(start_year, end_year): | |
years_in_office.append(year) | |
for year in years_in_office: | |
if str(year) not in years: | |
years[str(year)] = {} | |
if ("bundestag_" + gender) not in years[str(year)]: | |
years[str(year)]["bundestag_m"] = [0] * 101 | |
years[str(year)]["bundestag_w"] = [0] * 101 | |
years[str(year)]["bundestag_" + gender] = [0] * 101 | |
years[str(year)]["bundestag_" + gender][year - birth_year] += 1 | |
keys = [key for key in list(years.keys()) if "pop_m" in years[key] and "pop_w" in years[key] and "bundestag_m" in years[key] and "bundestag_w" in years[key]] | |
fig, (ax1, ax2) = pyplot.subplots(1, 2, figsize=(12, 10), gridspec_kw={"width_ratios": [8, 7]}) | |
pyplot.subplots_adjust(wspace=0, hspace=0) | |
pos = numpy.arange(101) | |
tickpos = range(5, 96, 5) | |
pop_m_bar = ax1.barh(y=pos, width=years[keys[0]]["pop_m"], height=1, left=0, align="edge", label="m") | |
pop_w_bar = ax1.barh(y=pos, width=[-x for x in years[keys[0]]["pop_w"]], height=1, left=0, align="edge", label="w") | |
fig.suptitle("Altersstruktur Deutschlands im Jahr " + keys[0]) | |
ax1.set_title("Bevölkerung (in Tsd.)") | |
ax1.set_xlim([-800, 800]) | |
ax1.set_ylim([0, 100]) | |
ax1.set_yticks(tickpos) | |
ax1.set_yticklabels([str(x) for x in tickpos]) | |
ax1.tick_params(axis="y", which="both", length=0) | |
ax1.set_xticks([-600,-400,-200,0,200,400,600]) | |
ax1.set_xticklabels(["600","400","200","0","200","400","600"]) | |
ax1.spines['top'].set_visible(False) | |
ax1.spines['right'].set_visible(False) | |
ax1.spines['bottom'].set_visible(False) | |
ax1.spines['left'].set_visible(False) | |
bundestag_m_bar = ax2.barh(y=pos, width=years[keys[0]]["bundestag_m"], height=1, left=0, align="edge", label="m") | |
bundestag_w_bar = ax2.barh(y=pos, width=[-x for x in years[keys[0]]["bundestag_w"]], height=1, left=0, align="edge", label="w") | |
ax2.set_title("Bundestag") | |
ax2.set_xlim([-30, 40]) | |
ax2.set_ylim([0, 100]) | |
ax2.set_yticks(tickpos) | |
ax2.set_yticklabels([str(x) for x in tickpos]) | |
ax2.tick_params(axis="y", which="both", length=0) | |
ax2.set_xticks([-20,-10,0,10,20,30]) | |
ax2.set_xticklabels(["20","10","0","10","20","30"]) | |
ax2.yaxis.tick_right() | |
ax2.spines['top'].set_visible(False) | |
ax2.spines['right'].set_visible(False) | |
ax2.spines['bottom'].set_visible(False) | |
ax2.spines['left'].set_visible(False) | |
ax2.legend(loc="lower right") | |
for i in range(10): | |
ax1.axhspan(i * 10, 5 + i * 10, color='#eee', alpha=1, zorder=0) | |
ax2.axhspan(i * 10, 5 + i * 10, color='#eee', alpha=1, zorder=0) | |
ax1.annotate("Datenquelle Bevölkerung: Statistisches Bundesamt (Destatis), 2019\nDatenquelle Bundestag: Open Data Portal des Bundestages, 2021", (0,0), (0,-30), xycoords="axes fraction", textcoords="offset points", fontsize="8", va="top") | |
ax2.annotate("© Max Staff, 2021", (1,0), (0,-30), xycoords="axes fraction", textcoords="offset points", va="top", fontsize="8", horizontalalignment="right") | |
def animate(i): | |
frame = years[keys[i]] | |
fig.suptitle("Altersstruktur Deutschlands im Jahr " + keys[i]) | |
for i, b in enumerate(pop_m_bar): | |
b.set_width(frame["pop_m"][i]) | |
for i, b in enumerate(pop_w_bar): | |
b.set_width(-frame["pop_w"][i]) | |
for i, b in enumerate(bundestag_m_bar): | |
b.set_width(frame["bundestag_m"][i]) | |
for i, b in enumerate(bundestag_w_bar): | |
b.set_width(-frame["bundestag_w"][i]) | |
ani = animation.FuncAnimation(fig, animate, repeat=False, blit=False, frames=len(keys), interval=100) | |
pyplot.show() | |
ani.save("output.mp4", writer=animation.FFMpegWriter(fps=4)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I originally wanted to check wether this comment by u/FicusRobtusa also applies to germany. Turns out it doesn't.
Data sources:
Left plot: https://service.destatis.de/bevoelkerungspyramide/
Right plot: https://www.bundestag.de/services/opendata
Discussion: https://www.reddit.com/r/dataisbeautiful/comments/lltzna/oc_age_distribution_of_german_citizens_and/