Skip to content

Instantly share code, notes, and snippets.

@dsprenkels
Created March 12, 2024 20:58
Show Gist options
  • Save dsprenkels/20f345977fb4ab834d8db1aff4b51252 to your computer and use it in GitHub Desktop.
Save dsprenkels/20f345977fb4ab834d8db1aff4b51252 to your computer and use it in GitHub Desktop.
climbing_stats.py
#!/usr/bin/env python3
# climbing_stats.py - Compute climbing stats from Google location history
#
# Author: Amber Sprenkels <[email protected]>
# Date: 2024-03-12
#
# This scripts computes the number of times I went to a local climbing or
# bouldering gym close to where I live, based on my Google location history.
import argparse
import json
import datetime
import logging
import os
import polars as pl
STADSMUUR_LOCATION = (51.821852202326895, 5.788025389774361)
GRIP_LOCATION = (51.8130421187338, 5.838377108340926)
EARTH_CIRCUMFERENCE = 40075000 # in meters
CLOSEBY_DISTANCE = 50 # in meters
ACCURATE_DISTANCE = 100 # in meters
if __name__ == "__main__":
# Set up the logging
logging.basicConfig(level=os.environ.get("PYTHON_LOG", "INFO").upper())
logger = logging.getLogger(__name__)
pl.Config.set_tbl_rows(1000)
# Parse the arguments and open a file with the climbing stats
parser = argparse.ArgumentParser(description="Compute climbing stats from Google location history")
parser.add_argument(
"takeout", type=argparse.FileType("r"), help="The 'Records.json' location history json file"
)
args = parser.parse_args()
# Read the json file into a DataFrame
logger.debug(f"reading json file '{args.takeout.name}'")
json_data = json.load(args.takeout)
data = {
"latitude": [],
"longitude": [],
"accuracy": [],
"timestamp": [],
"source": [],
}
for i, loc in enumerate(json_data["locations"]):
timestamp = loc.get("timestamp")
if timestamp is not None:
timestamp = datetime.datetime.fromisoformat(timestamp)
data["latitude"].append(loc.get("latitudeE7") / 10**7)
data["longitude"].append(loc.get("longitudeE7") / 10**7)
data["accuracy"].append(loc.get("accuracy"))
data["timestamp"].append(timestamp)
data["source"].append(loc.get("source"))
logger.debug(f"constructing data frame")
df = pl.DataFrame(
data,
schema={
"latitude": pl.Float32,
"longitude": pl.Float32,
"accuracy": pl.Int32,
"timestamp": pl.Datetime,
"source": pl.Utf8,
},
)
# Filter when I was close to stadsmuur or grip using geometrical distance
# (not taking into account the curvature of the earth)
is_close_stadsmuur = (pl.col("latitude") - STADSMUUR_LOCATION[0]) ** 2 + (
pl.col("longitude") - STADSMUUR_LOCATION[1]
) ** 2 < (CLOSEBY_DISTANCE / EARTH_CIRCUMFERENCE * 360) ** 2
is_close_grip = (pl.col("latitude") - GRIP_LOCATION[0]) ** 2 + (
pl.col("longitude") - GRIP_LOCATION[1]
) ** 2 < (CLOSEBY_DISTANCE / EARTH_CIRCUMFERENCE * 360) ** 2
is_accurate = pl.col("accuracy") < ACCURATE_DISTANCE
is_close = (is_close_stadsmuur | is_close_grip) & is_accurate
is_during_day = pl.col("timestamp").dt.hour() >= 8
# Get instances where I was close to stadsmuur for at least 15 consecutive minutes
df = (
df.filter(is_close & is_during_day)
.with_columns(
pl.col("timestamp").dt.date().alias("date"),
is_close_stadsmuur.alias("stadsmuur"),
is_close_grip.alias("grip"),
)
.group_by("date")
.agg(
pl.col("timestamp").min().alias("start"),
pl.col("timestamp").max().alias("end"),
(pl.col("timestamp").max() - pl.col("timestamp").min()).alias("duration"),
pl.col("stadsmuur").sum().ne(0).alias("stadsmuur_count"),
pl.col("grip").sum().ne(0).alias("grip_count"),
)
.filter(pl.col("duration") > (15 * 60))
.sort("date")
)
# print(df)
print(
df.group_by(pl.col("date").dt.year().alias("year"))
.sum()
.sort("year")
.drop(["date", "start", "end", "duration"])
)
@dsprenkels
Copy link
Author

Today's output:

shape: (9, 3)
┌──────┬─────────────────┬────────────┐
│ year ┆ stadsmuur_count ┆ grip_count │
│ ---  ┆ ---             ┆ ---        │
│ i32  ┆ u32             ┆ u32        │
╞══════╪═════════════════╪════════════╡
│ 2016 ┆ 0               ┆ 3          │
│ 2017 ┆ 0               ┆ 12         │
│ 2018 ┆ 0               ┆ 43         │
│ 2019 ┆ 5               ┆ 19         │
│ 2020 ┆ 7               ┆ 18         │
│ 2021 ┆ 13              ┆ 17         │
│ 2022 ┆ 17              ┆ 15         │
│ 2023 ┆ 22              ┆ 12         │
│ 2024 ┆ 6               ┆ 2          │
└──────┴─────────────────┴────────────┘

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment