Created
February 25, 2020 02:44
-
-
Save thiagomarzagao/f2fc571fa6ccd29bb87e14717d7ba65b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
from collections import Counter | |
from bs4 import BeautifulSoup | |
from scipy import stats | |
from matplotlib import pyplot as plt | |
path = '/path/to/export.xml' | |
with open(path) as f: | |
raw = f.read() | |
soup = BeautifulSoup(raw, 'xml') | |
cnt_runn_min = Counter() | |
cnt_runn_cal = Counter() | |
tag_runn = soup.find_all('Workout', {'workoutActivityType': 'HKWorkoutActivityTypeRunning'}) | |
for e in tag_runn: | |
date = e['creationDate'].split()[0] | |
runn_min = float(e['duration']) | |
runn_cal = float(e['totalEnergyBurned']) | |
cnt_runn_min[date] += runn_min | |
cnt_runn_cal[date] += runn_cal | |
l = [] | |
for key in cnt_runn_min.keys(): | |
l.append((key, cnt_runn_min[key])) | |
df = pd.DataFrame(l) | |
df.columns = ['date', 'running_minutes'] | |
df.to_csv('running_minutes.csv', index = False) | |
l = [] | |
for key in cnt_runn_cal.keys(): | |
l.append((key, cnt_runn_cal[key])) | |
df = pd.DataFrame(l) | |
df.columns = ['date', 'running_calories'] | |
df.to_csv('running_calories.csv', index = False) | |
cnt_lift_min = Counter() | |
cnt_lift_cal = Counter() | |
tag_lift = soup.find_all('Workout', {'workoutActivityType': 'HKWorkoutActivityTypeOther'}) | |
for e in tag_lift: | |
date = e['creationDate'].split()[0] | |
lift_min = float(e['duration']) | |
lift_cal = float(e['totalEnergyBurned']) | |
cnt_lift_min[date] += lift_min | |
cnt_lift_cal[date] += lift_cal | |
l = [] | |
for key in cnt_lift_min.keys(): | |
l.append((key, cnt_lift_min[key])) | |
df = pd.DataFrame(l) | |
df.columns = ['date', 'lifting_minutes'] | |
df.to_csv('lifting_minutes.csv', index = False) | |
l = [] | |
for key in cnt_lift_cal.keys(): | |
l.append((key, cnt_lift_cal[key])) | |
df = pd.DataFrame(l) | |
df.columns = ['date', 'lifting_calories'] | |
df.to_csv('lifting_calories.csv', index = False) | |
cnt_calr = Counter() | |
tag_cal1 = soup.find_all('Record', {'type': 'HKQuantityTypeIdentifierActiveEnergyBurned'}) | |
for e in tag_cal1: | |
date = e['creationDate'].split()[0] | |
calr = float(e['value']) | |
cnt_calr[date] += calr | |
tag_cal2 = soup.find_all('Record', {'type': 'HKQuantityTypeIdentifierBasalEnergyBurned'}) | |
for e in tag_cal2: | |
date = e['creationDate'].split()[0] | |
calr = float(e['value']) | |
cnt_calr[date] += calr | |
l = [] | |
for key in cnt_calr.keys(): | |
l.append((key, cnt_calr[key])) | |
df = pd.DataFrame(l) | |
df.columns = ['date', 'calories'] | |
df.to_csv('calories.csv', index = False) | |
cnt_step = Counter() | |
tag_step = soup.find_all('Record', {'type': 'HKQuantityTypeIdentifierStepCount'}) | |
for e in tag_step: | |
date = e['creationDate'].split()[0] | |
step = int(e['value']) # number of steps | |
cnt_step[date] += step | |
l = [] | |
for key in cnt_step.keys(): | |
l.append((key, cnt_step[key])) | |
df = pd.DataFrame(l) | |
df.columns = ['date', 'steps'] | |
df.to_csv('steps.csv', index = False) | |
cnt_dist = Counter() | |
tag_dist = soup.find_all('Record', {'type': 'HKQuantityTypeIdentifierDistanceWalkingRunning'}) | |
for e in tag_dist: | |
date = e['creationDate'].split()[0] | |
dist = float(e['value']) # distance in km | |
cnt_dist[date] += dist | |
l = [] | |
for key in cnt_dist.keys(): | |
l.append((key, cnt_dist[key])) | |
df = pd.DataFrame(l) | |
df.columns = ['date', 'distance'] | |
df.to_csv('distance.csv', index = False) | |
filenames = [ | |
'calories.csv', | |
'distance.csv', | |
'lifting_calories.csv', | |
'lifting_minutes.csv', | |
'running_calories.csv', | |
'running_minutes.csv', | |
'steps.csv' | |
] | |
for filename in filenames: | |
df = pd.read_csv(filename) | |
df['date'] = pd.to_datetime(df['date']) | |
df.index = df['date'] | |
del df['date'] | |
print(df) | |
print(type(df)) | |
df = df[(np.abs(stats.zscore(df)) < 3).all(axis = 1)] | |
df = df.rolling(window = 14).mean() | |
df.plot() | |
plt.axvline('2019-08-01', color = 'red') | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment