Created
October 22, 2024 14:41
-
-
Save gloriouslyawkwardlife/8bb7b78b2446e2baac66995a9df9c5ec to your computer and use it in GitHub Desktop.
Python code I use to pull my data sources into MariaDB for my Daily Log creation. 'xxxx' indicates values to be provided by the end-user.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3.12 | |
import pandas as pd | |
import sqlalchemy | |
import requests as r | |
from datetime import datetime, timedelta | |
import xml.etree.ElementTree as et | |
import zipfile | |
import json | |
# set Folder for charts and the Apple Health export file ('iCloud' for Pythonista or a folder on the PC) | |
folder = "xxxx" | |
engine = sqlalchemy.create_engine('mariadb+pymysql://xxxx:[email protected]/personalinformatics?charset=utf8mb4') | |
# Apple Health | |
archive = zipfile.ZipFile(f"{folder}/export.zip", "r") | |
tree = et.parse(archive.open("apple_health_export/export.xml")) | |
root = tree.getroot() | |
records = [] | |
for n in root.iter("Record"): | |
if n.attrib["type"].startswith("HKQuantityTypeIdentifier"): | |
records.append( | |
{ | |
"type": n.attrib["type"], | |
"unit": n.attrib["unit"], | |
"sourceName": n.attrib["sourceName"], | |
"creationDate": datetime.strptime( | |
n.attrib["creationDate"], "%Y-%m-%d %H:%M:%S %z" | |
).astimezone(), | |
"startDate": datetime.strptime( | |
n.attrib["startDate"], "%Y-%m-%d %H:%M:%S %z" | |
).astimezone(), | |
"endDate": datetime.strptime( | |
n.attrib["endDate"], "%Y-%m-%d %H:%M:%S %z" | |
).astimezone(), | |
"value": float(n.attrib["value"]), | |
} | |
) | |
pd.DataFrame(records).drop_duplicates( | |
subset=["type", "sourceName", "startDate", "endDate"] | |
).to_sql("applehealth", engine, if_exists="replace", index=False) | |
records = [] | |
for n in root.iter("Record"): | |
if n.attrib["type"].startswith("HKCategoryTypeIdentifier"): | |
records.append( | |
{ | |
"type": n.attrib["type"], | |
"sourceName": n.attrib["sourceName"], | |
"creationDate": datetime.strptime( | |
n.attrib["creationDate"], "%Y-%m-%d %H:%M:%S %z" | |
).astimezone(), | |
"startDate": datetime.strptime( | |
n.attrib["startDate"], "%Y-%m-%d %H:%M:%S %z" | |
).astimezone(), | |
"endDate": datetime.strptime( | |
n.attrib["endDate"], "%Y-%m-%d %H:%M:%S %z" | |
).astimezone(), | |
"value": n.attrib["value"], | |
} | |
) | |
pd.DataFrame(records).drop_duplicates( | |
subset=["type", "sourceName", "startDate", "endDate"] | |
).to_sql("applehealth_category", engine, if_exists="replace", index=False) | |
# Swarm Checkins | |
offset = 0 | |
data = r.get( | |
"https://api.foursquare.com/v2/users/self/checkins?limit=250&v=20241001&oauth_token=xxxx" | |
).json() | |
records = [] | |
while len(data["response"]["checkins"]["items"]) > 0: | |
for d in data["response"]["checkins"]["items"]: | |
records.append( | |
{ | |
"checkinId": d["id"], | |
"createdAt": datetime.fromtimestamp(d["createdAt"]).astimezone(), | |
"venue": d["venue"]["name"], | |
"address": ", ".join(d["venue"]["location"].get("formattedAddress")), | |
"latitude": d["venue"]["location"]["lat"], | |
"longitude": d["venue"]["location"]["lng"], | |
"category": d["venue"]["categories"][0]["name"], | |
"privacy": d["venue"].get("private"), | |
} | |
) | |
offset += 250 | |
data = r.get( | |
f"https://api.foursquare.com/v2/users/self/checkins?offset={str(offset)}&limit=250&v=20241001&oauth_token=xxxx" | |
).json() | |
pd.DataFrame(records).to_sql("checkins", engine, if_exists="replace", index=False) | |
# Bouncie Trips | |
token = r.post( | |
"https://auth.bouncie.com/oauth/token", | |
headers={"Content-Type": "application/json"}, | |
json={ | |
"client_id": "xxxx", | |
"client_secret": "xxxx", | |
"grant_type": "authorization_code", | |
"code": "xxxx", | |
"redirect_uri": "xxxx", | |
}, | |
).json() | |
end = datetime.today().date() + timedelta(days=1) | |
start = end - timedelta(days=6) | |
records = [] | |
while start > datetime(2022, 1, 1).date(): | |
trips = r.get( | |
f"https://api.bouncie.dev/v1/trips", | |
params={ | |
"imei": "xxxx", | |
"gps-format": "geojson", | |
"starts-after": start.strftime("%Y-%m-%d"), | |
"ends-before": end.strftime("%Y-%m-%d"), | |
}, | |
headers={"Authorization": token["access_token"], "Accept": "application/json"}, | |
).json() | |
for t in trips: | |
records.append( | |
{ | |
"transactionId": t.get("transactionId"), | |
"hardBrakingCount": t.get("hardBrakingCount"), | |
"hardAccelerationCount": t.get("hardAccelerationCount"), | |
"distance": t.get("distance"), | |
"gps": json.dumps(t.get("gps")), | |
"startTime": datetime.fromisoformat(t.get("startTime")).astimezone(), | |
"endTime": ( | |
datetime.fromisoformat(t.get("endTime")).astimezone() | |
if t.get("endTime") != None | |
else datetime.fromisoformat(t.get("startTime")).astimezone() | |
), | |
"startOdometer": t.get("startOdometer"), | |
"endOdometer": t.get("endOdometer"), | |
"averageSpeed": t.get("averageSpeed"), | |
"maxSpeed": t.get("maxSpeed"), | |
"fuelConsumed": t.get("fuelConsumed"), | |
"timeZone": t.get("timeZone"), | |
"totalIdleDuration": ( | |
int(t.get("totalIdleDuration")) | |
if t.get("totalIdleDuration") != None | |
else 0 | |
), | |
"imei": t.get("imei"), | |
} | |
) | |
end = start | |
start = end - timedelta(days=6) | |
pd.DataFrame(records).drop_duplicates(subset="transactionId").to_sql( | |
"bouncietrips", engine, if_exists="replace", index=False | |
) | |
# Threads | |
feed = r.get( | |
'https://graph.threads.net/v1.0/me/threads', | |
params={ | |
'fields': 'id,media_product_type,media_type,media_url,permalink,text,timestamp,shortcode,thumbnail_url,is_quote_post', | |
'limit': 100, | |
'access_token': 'xxxx' | |
} | |
).json() | |
records = [] | |
while True: | |
for f in feed['data']: | |
records.append({ | |
"id": f.get('id'), | |
"media_product_type": f.get('media_product_type'), | |
"media_type": f.get('media_type'), | |
"media_url": f.get('media_url'), | |
"permalink": f.get('permalink'), | |
"owner": json.dumps(f.get('owner')), | |
"username": f.get('username'), | |
"text": f.get('text'), | |
"timestamp": f.get('timestamp'), | |
"shortcode": f.get('shortcode'), | |
"is_quote_post": f.get('is_quote_post'), | |
"children": json.dumps(f.get('children')) if f.get('children') else None | |
}) | |
try: | |
feed = r.get(feed.get('paging').get('next')).json() | |
except: | |
break | |
pd.DataFrame(records).to_sql('threads',engine,if_exists='replace',index=False) | |
# Overland (from CouchDB) | |
data = r.get("http://macmini.local:5984/overland/_all_docs?include_docs=true").json() | |
records = [] | |
for row in data['rows']: | |
doc = row['doc'] | |
for location in doc['locations']: | |
records.append({ | |
'latitude': location['geometry']['coordinates'][1], | |
'longitude': location['geometry']['coordinates'][0], | |
'battery_state': location['properties'].get('battery_state'), | |
'motion': ','.join(location['properties'].get('motion')) if location['properties'].get('motion') != None and len(location['properties'].get('motion')) > 0 else None, | |
'timestamp': datetime.fromisoformat(location['properties'].get('timestamp')).astimezone(), | |
'battery_level': location['properties'].get('battery_level') if location['properties'].get('battery_level') != None and location['properties'].get('battery_level') >=0 else None, | |
'speed_accuracy': location['properties'].get('speed_accuracy') if location['properties'].get('speed_accuracy') != None and location['properties'].get('speed_accuracy') >=0 else None, | |
'horizontal_accuracy': location['properties'].get('horizontal_accuracy') if location['properties'].get('horizontal_accuracy') != None and location['properties'].get('horizontal_accuracy') >= 0 else None, | |
'vertical_accuracy': location['properties'].get('vertical_accuracy') if location['properties'].get('vertical_accuracy') != None and location['properties'].get('vertical_accuracy') >= 0 else None, | |
'wifi': location['properties'].get('wifi'), | |
'course': location['properties'].get('course') if location['properties'].get('course') != None and location['properties'].get('course') >= 0 else None, | |
'altitude': location['properties'].get('altitude'), | |
'course_accuracy': location['properties'].get('course_accuracy') if location['properties'].get('course_accuracy') != None and location['properties'].get('course_accuracy') >= 0 else None | |
}) | |
df = pd.DataFrame(records) | |
df.to_sql('overland',engine,if_exists='replace',index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment