-
-
Save hughdbrown/ac86c0e0035930ca434fd594c1673888 to your computer and use it in GitHub Desktop.
Count how many people are alive per year, based on their birthyear and deathyear, aims for performances.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import collections | |
import itertools | |
import random | |
import sys | |
import timeit | |
pop = [ | |
(b := random.randint(1900, 2000), random.randint(b + 1, b + 100)) | |
for _ in range(10000) | |
] | |
# 0.162 s (with 10000 people [1,100] years old) | |
def count_living_per_year1(population: list[tuple[int, int]]) -> dict[int, int]: | |
living_per_year = {} | |
for birth, death in population: | |
if birth in living_per_year: | |
living_per_year[birth] += 1 | |
else: | |
living_per_year[birth] = 1 | |
if death in living_per_year: | |
living_per_year[death] -= 1 | |
else: | |
living_per_year[death] = -1 | |
for year in range(min(living_per_year), max(living_per_year)): | |
living_per_year[year] = living_per_year.get(year, 0) - living_per_year.get(year - 1, 0) | |
living_per_year.pop(max(living_per_year)) | |
return living_per_year | |
# 0.118 s (with 10000 people [1,100] years old) | |
def count_living_per_year2(population: list[tuple[int, int]]) -> dict[int, int]: | |
living_per_year = collections.defaultdict(int) | |
for birth, death in population: | |
living_per_year[birth] += 1 | |
living_per_year[death] -= 1 | |
min_, max_ = min(living_per_year), max(living_per_year) | |
for year in range(min_, max_): | |
living_per_year[year] += living_per_year[year - 1] | |
del living_per_year[min_ - 1], living_per_year[max_] | |
return living_per_year | |
# 0.105 s (with 10000 people [1,100] years old) | |
def count_living_per_year3(population: list[tuple[int, int]]) -> dict[int, int]: | |
births = collections.Counter(birth for birth, _ in population) | |
deaths = collections.Counter(death for _, death in population) | |
living_per_year = births | |
for year in range(min(births), max(deaths)): | |
living_per_year[year] += living_per_year[year - 1] - deaths[year] | |
return living_per_year | |
# 0.108 s (with 10000 people [1,100] years old) | |
def count_living_per_year4(population: list[tuple[int, int]]) -> dict[int, int]: | |
births = collections.Counter(birth for birth, _ in population) | |
deaths = collections.Counter(death for _, death in population) | |
(delta_per_year := births).subtract(deaths) | |
living_per_year = collections.Counter() | |
for year in range(min(delta_per_year), max(delta_per_year)): | |
living_per_year[year] = living_per_year[year - 1] + delta_per_year[year] | |
return living_per_year | |
# 0.093 s (with 10000 people [1,100] years old) | |
def count_living_per_year5(population: list[tuple[int, int]]) -> dict[int, int]: | |
births, deaths = zip(*population) | |
births_counter = collections.Counter(births) | |
deaths_counter = collections.Counter(deaths) | |
living_per_year = collections.Counter() | |
for year in range(min(births_counter), max(deaths_counter)): | |
delta_living = births_counter[year] - deaths_counter[year] | |
living_per_year[year] = living_per_year[year - 1] + delta_living | |
return living_per_year | |
def count_living_per_year6(population: list[tuple[int, int]]) -> dict[int, int]: | |
""" | |
Version by hughdbrown | |
Write as few python loops as possible, drive execution into compiled code/builtins | |
1. Generate yearly deltas | |
2. Flatten into array | |
3. Calculate with cumulative sum | |
4. Return result in dict | |
""" | |
b, d = zip(*population) | |
delta_per_year = collections.Counter(b) | |
delta_per_year.subtract(collections.Counter(d)) | |
first_birth, last_death = min(delta_per_year), max(delta_per_year) | |
delta_get = delta_per_year.get | |
deltas = [delta_get(year, 0) for year in range(first_birth, last_death)] | |
return dict( | |
enumerate(itertools.accumulate(deltas), start=first_birth) | |
) | |
def count_living_per_year7(population): | |
""" | |
Version by Stefan Schuerger | |
""" | |
born_by = collections.defaultdict(int) | |
dead_by = collections.defaultdict(int) | |
living_per_year = collections.defaultdict(int) | |
for birth, death in population: | |
born_by[birth] += 1 | |
dead_by[death] += 1 | |
alive = 0 | |
for year in range(1900,2100): | |
living_per_year[year] = alive = alive + born_by[year] - dead_by[year] | |
return living_per_year | |
def run_benchmarks(): | |
print(f"python version: {sys.version}") | |
for i in range(1, 8): | |
fn = f"count_living_per_year{i}(pop)" | |
timing = timeit.timeit(stmt=fn, globals=globals(), number=1000) | |
print(f"{fn}: {timing}") | |
if __name__ == '__main__': | |
run_benchmarks() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
python version: 3.11.3 (main, Apr 12 2023, 18:27:18) [Clang 14.0.3 (clang-1403.0.22.14.1)]
count_living_per_year1(pop): 2.138002374966163
count_living_per_year2(pop): 1.818175038031768
count_living_per_year3(pop): 1.5936920850072056
count_living_per_year4(pop): 1.6447406660299748
count_living_per_year5(pop): 1.3068225800525397
count_living_per_year6(pop): 1.2849942080210894
count_living_per_year7(pop): 1.710214082035236
python version: 3.9.4 (default, Apr 20 2021, 22:14:53)
[Clang 12.0.0 (clang-1200.0.32.2)]
count_living_per_year1(pop): 2.671531382
count_living_per_year2(pop): 1.9882104379999999
count_living_per_year3(pop): 1.7378031969999999
count_living_per_year4(pop): 1.7831689280000003
count_living_per_year5(pop): 1.548357961999999
count_living_per_year6(pop): 1.5165221899999999
count_living_per_year7(pop): 1.823261629000001