Skip to content

Instantly share code, notes, and snippets.

@gloriouslyawkwardlife
Created October 22, 2024 14:41
Show Gist options
  • Save gloriouslyawkwardlife/8bb7b78b2446e2baac66995a9df9c5ec to your computer and use it in GitHub Desktop.
Save gloriouslyawkwardlife/8bb7b78b2446e2baac66995a9df9c5ec to your computer and use it in GitHub Desktop.
Python code I use to pull my data sources into MariaDB for my Daily Log creation. 'xxxx' indicates values to be provided by the end-user.
#!/usr/bin/env python3.12
import pandas as pd
import sqlalchemy
import requests as r
from datetime import datetime, timedelta
import xml.etree.ElementTree as et
import zipfile
import json
# set Folder for charts and the Apple Health export file ('iCloud' for Pythonista or a folder on the PC)
folder = "xxxx"
engine = sqlalchemy.create_engine('mariadb+pymysql://xxxx:[email protected]/personalinformatics?charset=utf8mb4')
# Apple Health
archive = zipfile.ZipFile(f"{folder}/export.zip", "r")
tree = et.parse(archive.open("apple_health_export/export.xml"))
root = tree.getroot()
records = []
for n in root.iter("Record"):
if n.attrib["type"].startswith("HKQuantityTypeIdentifier"):
records.append(
{
"type": n.attrib["type"],
"unit": n.attrib["unit"],
"sourceName": n.attrib["sourceName"],
"creationDate": datetime.strptime(
n.attrib["creationDate"], "%Y-%m-%d %H:%M:%S %z"
).astimezone(),
"startDate": datetime.strptime(
n.attrib["startDate"], "%Y-%m-%d %H:%M:%S %z"
).astimezone(),
"endDate": datetime.strptime(
n.attrib["endDate"], "%Y-%m-%d %H:%M:%S %z"
).astimezone(),
"value": float(n.attrib["value"]),
}
)
pd.DataFrame(records).drop_duplicates(
subset=["type", "sourceName", "startDate", "endDate"]
).to_sql("applehealth", engine, if_exists="replace", index=False)
records = []
for n in root.iter("Record"):
if n.attrib["type"].startswith("HKCategoryTypeIdentifier"):
records.append(
{
"type": n.attrib["type"],
"sourceName": n.attrib["sourceName"],
"creationDate": datetime.strptime(
n.attrib["creationDate"], "%Y-%m-%d %H:%M:%S %z"
).astimezone(),
"startDate": datetime.strptime(
n.attrib["startDate"], "%Y-%m-%d %H:%M:%S %z"
).astimezone(),
"endDate": datetime.strptime(
n.attrib["endDate"], "%Y-%m-%d %H:%M:%S %z"
).astimezone(),
"value": n.attrib["value"],
}
)
pd.DataFrame(records).drop_duplicates(
subset=["type", "sourceName", "startDate", "endDate"]
).to_sql("applehealth_category", engine, if_exists="replace", index=False)
# Swarm Checkins
offset = 0
data = r.get(
"https://api.foursquare.com/v2/users/self/checkins?limit=250&v=20241001&oauth_token=xxxx"
).json()
records = []
while len(data["response"]["checkins"]["items"]) > 0:
for d in data["response"]["checkins"]["items"]:
records.append(
{
"checkinId": d["id"],
"createdAt": datetime.fromtimestamp(d["createdAt"]).astimezone(),
"venue": d["venue"]["name"],
"address": ", ".join(d["venue"]["location"].get("formattedAddress")),
"latitude": d["venue"]["location"]["lat"],
"longitude": d["venue"]["location"]["lng"],
"category": d["venue"]["categories"][0]["name"],
"privacy": d["venue"].get("private"),
}
)
offset += 250
data = r.get(
f"https://api.foursquare.com/v2/users/self/checkins?offset={str(offset)}&limit=250&v=20241001&oauth_token=xxxx"
).json()
pd.DataFrame(records).to_sql("checkins", engine, if_exists="replace", index=False)
# Bouncie Trips
token = r.post(
"https://auth.bouncie.com/oauth/token",
headers={"Content-Type": "application/json"},
json={
"client_id": "xxxx",
"client_secret": "xxxx",
"grant_type": "authorization_code",
"code": "xxxx",
"redirect_uri": "xxxx",
},
).json()
end = datetime.today().date() + timedelta(days=1)
start = end - timedelta(days=6)
records = []
while start > datetime(2022, 1, 1).date():
trips = r.get(
f"https://api.bouncie.dev/v1/trips",
params={
"imei": "xxxx",
"gps-format": "geojson",
"starts-after": start.strftime("%Y-%m-%d"),
"ends-before": end.strftime("%Y-%m-%d"),
},
headers={"Authorization": token["access_token"], "Accept": "application/json"},
).json()
for t in trips:
records.append(
{
"transactionId": t.get("transactionId"),
"hardBrakingCount": t.get("hardBrakingCount"),
"hardAccelerationCount": t.get("hardAccelerationCount"),
"distance": t.get("distance"),
"gps": json.dumps(t.get("gps")),
"startTime": datetime.fromisoformat(t.get("startTime")).astimezone(),
"endTime": (
datetime.fromisoformat(t.get("endTime")).astimezone()
if t.get("endTime") != None
else datetime.fromisoformat(t.get("startTime")).astimezone()
),
"startOdometer": t.get("startOdometer"),
"endOdometer": t.get("endOdometer"),
"averageSpeed": t.get("averageSpeed"),
"maxSpeed": t.get("maxSpeed"),
"fuelConsumed": t.get("fuelConsumed"),
"timeZone": t.get("timeZone"),
"totalIdleDuration": (
int(t.get("totalIdleDuration"))
if t.get("totalIdleDuration") != None
else 0
),
"imei": t.get("imei"),
}
)
end = start
start = end - timedelta(days=6)
pd.DataFrame(records).drop_duplicates(subset="transactionId").to_sql(
"bouncietrips", engine, if_exists="replace", index=False
)
# Threads
feed = r.get(
'https://graph.threads.net/v1.0/me/threads',
params={
'fields': 'id,media_product_type,media_type,media_url,permalink,text,timestamp,shortcode,thumbnail_url,is_quote_post',
'limit': 100,
'access_token': 'xxxx'
}
).json()
records = []
while True:
for f in feed['data']:
records.append({
"id": f.get('id'),
"media_product_type": f.get('media_product_type'),
"media_type": f.get('media_type'),
"media_url": f.get('media_url'),
"permalink": f.get('permalink'),
"owner": json.dumps(f.get('owner')),
"username": f.get('username'),
"text": f.get('text'),
"timestamp": f.get('timestamp'),
"shortcode": f.get('shortcode'),
"is_quote_post": f.get('is_quote_post'),
"children": json.dumps(f.get('children')) if f.get('children') else None
})
try:
feed = r.get(feed.get('paging').get('next')).json()
except:
break
pd.DataFrame(records).to_sql('threads',engine,if_exists='replace',index=False)
# Overland (from CouchDB)
data = r.get("http://macmini.local:5984/overland/_all_docs?include_docs=true").json()
records = []
for row in data['rows']:
doc = row['doc']
for location in doc['locations']:
records.append({
'latitude': location['geometry']['coordinates'][1],
'longitude': location['geometry']['coordinates'][0],
'battery_state': location['properties'].get('battery_state'),
'motion': ','.join(location['properties'].get('motion')) if location['properties'].get('motion') != None and len(location['properties'].get('motion')) > 0 else None,
'timestamp': datetime.fromisoformat(location['properties'].get('timestamp')).astimezone(),
'battery_level': location['properties'].get('battery_level') if location['properties'].get('battery_level') != None and location['properties'].get('battery_level') >=0 else None,
'speed_accuracy': location['properties'].get('speed_accuracy') if location['properties'].get('speed_accuracy') != None and location['properties'].get('speed_accuracy') >=0 else None,
'horizontal_accuracy': location['properties'].get('horizontal_accuracy') if location['properties'].get('horizontal_accuracy') != None and location['properties'].get('horizontal_accuracy') >= 0 else None,
'vertical_accuracy': location['properties'].get('vertical_accuracy') if location['properties'].get('vertical_accuracy') != None and location['properties'].get('vertical_accuracy') >= 0 else None,
'wifi': location['properties'].get('wifi'),
'course': location['properties'].get('course') if location['properties'].get('course') != None and location['properties'].get('course') >= 0 else None,
'altitude': location['properties'].get('altitude'),
'course_accuracy': location['properties'].get('course_accuracy') if location['properties'].get('course_accuracy') != None and location['properties'].get('course_accuracy') >= 0 else None
})
df = pd.DataFrame(records)
df.to_sql('overland',engine,if_exists='replace',index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment