Created
March 12, 2024 20:58
-
-
Save dsprenkels/20f345977fb4ab834d8db1aff4b51252 to your computer and use it in GitHub Desktop.
climbing_stats.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# climbing_stats.py - Compute climbing stats from Google location history | |
# | |
# Author: Amber Sprenkels <[email protected]> | |
# Date: 2024-03-12 | |
# | |
# This scripts computes the number of times I went to a local climbing or | |
# bouldering gym close to where I live, based on my Google location history. | |
import argparse | |
import json | |
import datetime | |
import logging | |
import os | |
import polars as pl | |
STADSMUUR_LOCATION = (51.821852202326895, 5.788025389774361) | |
GRIP_LOCATION = (51.8130421187338, 5.838377108340926) | |
EARTH_CIRCUMFERENCE = 40075000 # in meters | |
CLOSEBY_DISTANCE = 50 # in meters | |
ACCURATE_DISTANCE = 100 # in meters | |
if __name__ == "__main__": | |
# Set up the logging | |
logging.basicConfig(level=os.environ.get("PYTHON_LOG", "INFO").upper()) | |
logger = logging.getLogger(__name__) | |
pl.Config.set_tbl_rows(1000) | |
# Parse the arguments and open a file with the climbing stats | |
parser = argparse.ArgumentParser(description="Compute climbing stats from Google location history") | |
parser.add_argument( | |
"takeout", type=argparse.FileType("r"), help="The 'Records.json' location history json file" | |
) | |
args = parser.parse_args() | |
# Read the json file into a DataFrame | |
logger.debug(f"reading json file '{args.takeout.name}'") | |
json_data = json.load(args.takeout) | |
data = { | |
"latitude": [], | |
"longitude": [], | |
"accuracy": [], | |
"timestamp": [], | |
"source": [], | |
} | |
for i, loc in enumerate(json_data["locations"]): | |
timestamp = loc.get("timestamp") | |
if timestamp is not None: | |
timestamp = datetime.datetime.fromisoformat(timestamp) | |
data["latitude"].append(loc.get("latitudeE7") / 10**7) | |
data["longitude"].append(loc.get("longitudeE7") / 10**7) | |
data["accuracy"].append(loc.get("accuracy")) | |
data["timestamp"].append(timestamp) | |
data["source"].append(loc.get("source")) | |
logger.debug(f"constructing data frame") | |
df = pl.DataFrame( | |
data, | |
schema={ | |
"latitude": pl.Float32, | |
"longitude": pl.Float32, | |
"accuracy": pl.Int32, | |
"timestamp": pl.Datetime, | |
"source": pl.Utf8, | |
}, | |
) | |
# Filter when I was close to stadsmuur or grip using geometrical distance | |
# (not taking into account the curvature of the earth) | |
is_close_stadsmuur = (pl.col("latitude") - STADSMUUR_LOCATION[0]) ** 2 + ( | |
pl.col("longitude") - STADSMUUR_LOCATION[1] | |
) ** 2 < (CLOSEBY_DISTANCE / EARTH_CIRCUMFERENCE * 360) ** 2 | |
is_close_grip = (pl.col("latitude") - GRIP_LOCATION[0]) ** 2 + ( | |
pl.col("longitude") - GRIP_LOCATION[1] | |
) ** 2 < (CLOSEBY_DISTANCE / EARTH_CIRCUMFERENCE * 360) ** 2 | |
is_accurate = pl.col("accuracy") < ACCURATE_DISTANCE | |
is_close = (is_close_stadsmuur | is_close_grip) & is_accurate | |
is_during_day = pl.col("timestamp").dt.hour() >= 8 | |
# Get instances where I was close to stadsmuur for at least 15 consecutive minutes | |
df = ( | |
df.filter(is_close & is_during_day) | |
.with_columns( | |
pl.col("timestamp").dt.date().alias("date"), | |
is_close_stadsmuur.alias("stadsmuur"), | |
is_close_grip.alias("grip"), | |
) | |
.group_by("date") | |
.agg( | |
pl.col("timestamp").min().alias("start"), | |
pl.col("timestamp").max().alias("end"), | |
(pl.col("timestamp").max() - pl.col("timestamp").min()).alias("duration"), | |
pl.col("stadsmuur").sum().ne(0).alias("stadsmuur_count"), | |
pl.col("grip").sum().ne(0).alias("grip_count"), | |
) | |
.filter(pl.col("duration") > (15 * 60)) | |
.sort("date") | |
) | |
# print(df) | |
print( | |
df.group_by(pl.col("date").dt.year().alias("year")) | |
.sum() | |
.sort("year") | |
.drop(["date", "start", "end", "duration"]) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Today's output: