Created
February 12, 2024 16:30
-
-
Save kongmunist/a4945c339b11d4e953e5e806344e42c8 to your computer and use it in GitHub Desktop.
Code to correct Fitbit sleep data using Google Timeline data, as described in andykong.org/blog/glocfitbittzcorrection
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
# use long/lat at the end of the day to get the timezone, then correct each sleep date's time to UTC. | |
from timezonefinder import TimezoneFinder | |
import datetime | |
import pytz | |
# how to get offset from UTC from timezone name | |
pacific_now = datetime.datetime.now(pytz.timezone('US/Pacific')) | |
pacific_now.utcoffset().total_seconds()/60/60 | |
df = pd.read_csv("cooked/loc.csv") | |
print(f"Total rows: {df.shape[0]}" | |
# Assign timezone to each row | |
obj = TimezoneFinder() | |
df['timestamp'] = pd.to_datetime(df['timestamp'], format="mixed") | |
df['timezone'] = df.apply(lambda x: obj.timezone_at(lng=x['Longitude'], lat=x['Latitude']), axis=1) | |
df['timezone_offset'] = df.apply(lambda x: datetime.datetime.now(pytz.timezone(x['timezone'])).utcoffset().total_seconds()/60/60, axis=1) | |
# now do DST offset. If offset is >-8 and <-4, then it's in American DST, if >-7 and <-5, then it's in European DST | |
# American DST: | |
# Sun, Mar 14, 2021 – Sun, Nov 7, 2021 | |
# Sun, Mar 13, 2022 – Sun, Nov 6, 2022 | |
# Sun, Mar 12, 2023 – Sun, Nov 5, 2023 | |
# Sun, Mar 10, 2024 – Sun, Nov 3, 2024 | |
# Euro DST: | |
# Sun, Mar 27, 2022 – Sun, Oct 30, 2022 | |
# Sun, Mar 26, 2023 – Sun, Oct 29, 2023 | |
american_dst_pairs = [(datetime.datetime(2021,3,14), datetime.datetime(2021,11,7)), (datetime.datetime(2022,3,13), datetime.datetime(2022,11,6)), (datetime.datetime(2023,3,12), datetime.datetime(2023,11,5)), (datetime.datetime(2024,3,10), datetime.datetime(2024,11,3))] | |
euro_dst_pairs = [(datetime.datetime(2022,3,27), datetime.datetime(2022,10,30)), (datetime.datetime(2023,3,26), datetime.datetime(2023,10,29))] | |
# for each row with offset >-8 and <-4, check if it's in american DST | |
american_rows = df[(df['timezone_offset'] > -8) & (df['timezone_offset'] < -4)] | |
a = american_rows.apply(lambda x: any([x['timestamp'] > y[0] and x['timestamp'] < y[1] for y in american_dst_pairs]), axis=1) | |
df['dst'] = a | |
# for each row with offset >-2 and <3, check if it's in euro DST | |
euro_rows = df[(df['timezone_offset'] > -2) & (df['timezone_offset'] < 3)] | |
a = euro_rows.apply(lambda x: any([x['timestamp'] > y[0] and x['timestamp'] < y[1] for y in euro_dst_pairs]), axis=1) | |
df['dst'] = df['dst'] | a | |
# convert dst col to int | |
df['dst'] = df['dst'].astype(int) | |
# create timestamp_local | |
df['timestamp_local'] = df['timestamp'] + pd.to_timedelta(df['timezone_offset'], unit='h') - pd.to_timedelta(df['dst'], unit='h') | |
# df['timestamp_local'] = df['timestamp'] + pd.to_timedelta(df['timezone_offset'], unit='h') | |
# import the fitbit sleep data | |
sleepfile = "../fitbitimporterdata/cooked/Sleep_sleep.csv" | |
sleep = pd.read_csv(sleepfile) | |
sleep['timestamp'] = pd.to_datetime(sleep['timestamp'], format="mixed") | |
# get the timezone offset for each sleep date | |
df['date_local'] = df['timestamp_local'].dt.date | |
for i, row in sleep.iterrows(): | |
# find closest long/lat to the sleep date. first get only rows with matching date (from local) | |
localdate = row['timestamp'].date() | |
localrows = df[df['date_local'] == localdate] | |
if localrows.shape[0] != 0: | |
# get the row with the closest time | |
localrows['diff'] = abs(localrows['timestamp_local'] - row['timestamp']) | |
closestrow = localrows[localrows['diff'] == localrows['diff'].min()] | |
sleep.at[i, 'timezone_offset'] = closestrow['timezone_offset'].values[0] | |
sleep.at[i, 'dst'] = closestrow['dst'].values[0] | |
sleep.at[i, 'howclose'] = closestrow['diff'].values[0] | |
# print(localdate) | |
# if i==15: | |
# break | |
if i % 100 == 0: | |
print(f"Processed {i}/{sleep.shape[0]}") | |
# how many nan in timezone_offset in sleep | |
sleep['timezone_offset'].isna().sum() | |
# filter for nan in timezone_offset | |
sleep2 = sleep[~sleep['timezone_offset'].isna()] | |
# plot dates, nans in differnt colors,only after 2021 | |
plt.scatter(sleep2['timestamp'], sleep2['timezone_offset'], c='blue',marker='.') | |
plt.scatter(sleep[sleep['timezone_offset'].isna()]['timestamp'],[0]*sleep[sleep['timezone_offset'].isna()].shape[0], c='red',marker='.') | |
plt.xlim([datetime.datetime(2021,5,1), max(sleep['timestamp'])]) | |
plt.title("Timezone offset for each sleep date, missing in red") | |
# remove rows where howclose is > 6 hr | |
sleep2 = sleep2[sleep2['howclose'] < pd.Timedelta(6, unit='h')] | |
sleep2['timestamp_utc'] = sleep2['timestamp'] - pd.to_timedelta(sleep2['timezone_offset'], unit='h') + pd.to_timedelta(sleep2['dst'], unit='h') | |
sleep2['timestamp'] = sleep2['timestamp_utc'] | |
# save | |
sleep2.to_csv("../fitbitimporterdata/cooked/Sleep_sleep_loctzcorrected_utc.csv", index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment