gloriouslyawkwardlife · October 22, 2024 14:41
diff --git a/datacollection.py b/datacollection.py
 #!/usr/bin/env python3.12

 import pandas as pd
 import sqlalchemy
 import requests as r
 from datetime import datetime, timedelta
 import xml.etree.ElementTree as et
 import zipfile
 import json

 # set Folder for charts and the Apple Health export file ('iCloud' for Pythonista or a folder on the PC)
 folder = "xxxx"

 engine = sqlalchemy.create_engine('mariadb+pymysql://xxxx:[email protected]/personalinformatics?charset=utf8mb4')

 # Apple Health
 archive = zipfile.ZipFile(f"{folder}/export.zip", "r")
 tree = et.parse(archive.open("apple_health_export/export.xml"))
 root = tree.getroot()
 records = []
 for n in root.iter("Record"):
    if n.attrib["type"].startswith("HKQuantityTypeIdentifier"):
        records.append(
            {
                "type": n.attrib["type"],
                "unit": n.attrib["unit"],
                "sourceName": n.attrib["sourceName"],
                "creationDate": datetime.strptime(
                    n.attrib["creationDate"], "%Y-%m-%d %H:%M:%S %z"
                ).astimezone(),
                "startDate": datetime.strptime(
                    n.attrib["startDate"], "%Y-%m-%d %H:%M:%S %z"
                ).astimezone(),
                "endDate": datetime.strptime(
                    n.attrib["endDate"], "%Y-%m-%d %H:%M:%S %z"
                ).astimezone(),
                "value": float(n.attrib["value"]),
            }
        )
 pd.DataFrame(records).drop_duplicates(
    subset=["type", "sourceName", "startDate", "endDate"]
 ).to_sql("applehealth", engine, if_exists="replace", index=False)

 records = []
 for n in root.iter("Record"):
    if n.attrib["type"].startswith("HKCategoryTypeIdentifier"):
        records.append(
            {
                "type": n.attrib["type"],
                "sourceName": n.attrib["sourceName"],
                "creationDate": datetime.strptime(
                    n.attrib["creationDate"], "%Y-%m-%d %H:%M:%S %z"
                ).astimezone(),
                "startDate": datetime.strptime(
                    n.attrib["startDate"], "%Y-%m-%d %H:%M:%S %z"
                ).astimezone(),
                "endDate": datetime.strptime(
                    n.attrib["endDate"], "%Y-%m-%d %H:%M:%S %z"
                ).astimezone(),
                "value": n.attrib["value"],
            }
        )
 pd.DataFrame(records).drop_duplicates(
    subset=["type", "sourceName", "startDate", "endDate"]
 ).to_sql("applehealth_category", engine, if_exists="replace", index=False)

 # Swarm Checkins
 offset = 0
 data = r.get(
    "https://api.foursquare.com/v2/users/self/checkins?limit=250&v=20241001&oauth_token=xxxx"
 ).json()
 records = []
 while len(data["response"]["checkins"]["items"]) > 0:
    for d in data["response"]["checkins"]["items"]:
        records.append(
            {
                "checkinId": d["id"],
                "createdAt": datetime.fromtimestamp(d["createdAt"]).astimezone(),
                "venue": d["venue"]["name"],
                "address": ", ".join(d["venue"]["location"].get("formattedAddress")),
                "latitude": d["venue"]["location"]["lat"],
                "longitude": d["venue"]["location"]["lng"],
                "category": d["venue"]["categories"][0]["name"],
                "privacy": d["venue"].get("private"),
            }
        )
    offset += 250
    data = r.get(
        f"https://api.foursquare.com/v2/users/self/checkins?offset={str(offset)}&limit=250&v=20241001&oauth_token=xxxx"
    ).json()
 pd.DataFrame(records).to_sql("checkins", engine, if_exists="replace", index=False)

 # Bouncie Trips
 token = r.post(
    "https://auth.bouncie.com/oauth/token",
    headers={"Content-Type": "application/json"},
    json={
        "client_id": "xxxx",
        "client_secret": "xxxx",
        "grant_type": "authorization_code",
        "code": "xxxx",
        "redirect_uri": "xxxx",
    },
 ).json()
 end = datetime.today().date() + timedelta(days=1)
 start = end - timedelta(days=6)
 records = []
 while start > datetime(2022, 1, 1).date():
    trips = r.get(
        f"https://api.bouncie.dev/v1/trips",
        params={
            "imei": "xxxx",
            "gps-format": "geojson",
            "starts-after": start.strftime("%Y-%m-%d"),
            "ends-before": end.strftime("%Y-%m-%d"),
        },
        headers={"Authorization": token["access_token"], "Accept": "application/json"},
    ).json()
    for t in trips:
        records.append(
            {
                "transactionId": t.get("transactionId"),
                "hardBrakingCount": t.get("hardBrakingCount"),
                "hardAccelerationCount": t.get("hardAccelerationCount"),
                "distance": t.get("distance"),
                "gps": json.dumps(t.get("gps")),
                "startTime": datetime.fromisoformat(t.get("startTime")).astimezone(),
                "endTime": (
                    datetime.fromisoformat(t.get("endTime")).astimezone()
                    if t.get("endTime") != None
                    else datetime.fromisoformat(t.get("startTime")).astimezone()
                ),
                "startOdometer": t.get("startOdometer"),
                "endOdometer": t.get("endOdometer"),
                "averageSpeed": t.get("averageSpeed"),
                "maxSpeed": t.get("maxSpeed"),
                "fuelConsumed": t.get("fuelConsumed"),
                "timeZone": t.get("timeZone"),
                "totalIdleDuration": (
                    int(t.get("totalIdleDuration"))
                    if t.get("totalIdleDuration") != None
                    else 0
                ),
                "imei": t.get("imei"),
            }
        )
    end = start
    start = end - timedelta(days=6)
 pd.DataFrame(records).drop_duplicates(subset="transactionId").to_sql(
    "bouncietrips", engine, if_exists="replace", index=False
 )

 # Threads
 feed = r.get(
    'https://graph.threads.net/v1.0/me/threads',
    params={
        'fields': 'id,media_product_type,media_type,media_url,permalink,text,timestamp,shortcode,thumbnail_url,is_quote_post',
        'limit': 100,
        'access_token': 'xxxx'
    }
 ).json()
 records = []
 while True:
    for f in feed['data']:
        records.append({
            "id": f.get('id'),
            "media_product_type": f.get('media_product_type'),
            "media_type": f.get('media_type'),
            "media_url": f.get('media_url'),
            "permalink": f.get('permalink'),
            "owner": json.dumps(f.get('owner')),
            "username": f.get('username'),
            "text": f.get('text'),
            "timestamp": f.get('timestamp'),
            "shortcode": f.get('shortcode'),
            "is_quote_post": f.get('is_quote_post'),
            "children": json.dumps(f.get('children')) if f.get('children') else None
        })
    try:
        feed = r.get(feed.get('paging').get('next')).json()
    except:
        break
 pd.DataFrame(records).to_sql('threads',engine,if_exists='replace',index=False)

 # Overland (from CouchDB)
 data = r.get("http://macmini.local:5984/overland/_all_docs?include_docs=true").json()
 records = []
 for row in data['rows']:
    doc = row['doc']
    for location in doc['locations']:
        records.append({
            'latitude': location['geometry']['coordinates'][1],
            'longitude': location['geometry']['coordinates'][0],
            'battery_state': location['properties'].get('battery_state'),
            'motion': ','.join(location['properties'].get('motion')) if location['properties'].get('motion') != None and len(location['properties'].get('motion')) > 0 else None,
            'timestamp': datetime.fromisoformat(location['properties'].get('timestamp')).astimezone(),
            'battery_level': location['properties'].get('battery_level') if location['properties'].get('battery_level') != None and location['properties'].get('battery_level') >=0 else None,
            'speed_accuracy': location['properties'].get('speed_accuracy') if location['properties'].get('speed_accuracy') != None and location['properties'].get('speed_accuracy') >=0 else None,
            'horizontal_accuracy': location['properties'].get('horizontal_accuracy') if location['properties'].get('horizontal_accuracy') != None and location['properties'].get('horizontal_accuracy') >= 0 else None,
            'vertical_accuracy': location['properties'].get('vertical_accuracy') if location['properties'].get('vertical_accuracy') != None and location['properties'].get('vertical_accuracy') >= 0 else None,
            'wifi': location['properties'].get('wifi'),
            'course': location['properties'].get('course') if location['properties'].get('course') != None and location['properties'].get('course') >= 0 else None,
            'altitude': location['properties'].get('altitude'),
            'course_accuracy': location['properties'].get('course_accuracy') if location['properties'].get('course_accuracy') != None and location['properties'].get('course_accuracy') >= 0 else None
        })
 df = pd.DataFrame(records)
 df.to_sql('overland',engine,if_exists='replace',index=False)
	#!/usr/bin/env python3.12

	import pandas as pd
	import sqlalchemy
	import requests as r
	from datetime import datetime, timedelta
	import xml.etree.ElementTree as et
	import zipfile
	import json

	# set Folder for charts and the Apple Health export file ('iCloud' for Pythonista or a folder on the PC)
	folder = "xxxx"

	engine = sqlalchemy.create_engine('mariadb+pymysql://xxxx:[email protected]/personalinformatics?charset=utf8mb4')

	# Apple Health
	archive = zipfile.ZipFile(f"{folder}/export.zip", "r")
	tree = et.parse(archive.open("apple_health_export/export.xml"))
	root = tree.getroot()
	records = []
	for n in root.iter("Record"):
	if n.attrib["type"].startswith("HKQuantityTypeIdentifier"):
	records.append(
	{
	"type": n.attrib["type"],
	"unit": n.attrib["unit"],
	"sourceName": n.attrib["sourceName"],
	"creationDate": datetime.strptime(
	n.attrib["creationDate"], "%Y-%m-%d %H:%M:%S %z"
	).astimezone(),
	"startDate": datetime.strptime(
	n.attrib["startDate"], "%Y-%m-%d %H:%M:%S %z"
	).astimezone(),
	"endDate": datetime.strptime(
	n.attrib["endDate"], "%Y-%m-%d %H:%M:%S %z"
	).astimezone(),
	"value": float(n.attrib["value"]),
	}
	)
	pd.DataFrame(records).drop_duplicates(
	subset=["type", "sourceName", "startDate", "endDate"]
	).to_sql("applehealth", engine, if_exists="replace", index=False)

	records = []
	for n in root.iter("Record"):
	if n.attrib["type"].startswith("HKCategoryTypeIdentifier"):
	records.append(
	{
	"type": n.attrib["type"],
	"sourceName": n.attrib["sourceName"],
	"creationDate": datetime.strptime(
	n.attrib["creationDate"], "%Y-%m-%d %H:%M:%S %z"
	).astimezone(),
	"startDate": datetime.strptime(
	n.attrib["startDate"], "%Y-%m-%d %H:%M:%S %z"
	).astimezone(),
	"endDate": datetime.strptime(
	n.attrib["endDate"], "%Y-%m-%d %H:%M:%S %z"
	).astimezone(),
	"value": n.attrib["value"],
	}
	)
	pd.DataFrame(records).drop_duplicates(
	subset=["type", "sourceName", "startDate", "endDate"]
	).to_sql("applehealth_category", engine, if_exists="replace", index=False)

	# Swarm Checkins
	offset = 0
	data = r.get(
	"https://api.foursquare.com/v2/users/self/checkins?limit=250&v=20241001&oauth_token=xxxx"
	).json()
	records = []
	while len(data["response"]["checkins"]["items"]) > 0:
	for d in data["response"]["checkins"]["items"]:
	records.append(
	{
	"checkinId": d["id"],
	"createdAt": datetime.fromtimestamp(d["createdAt"]).astimezone(),
	"venue": d["venue"]["name"],
	"address": ", ".join(d["venue"]["location"].get("formattedAddress")),
	"latitude": d["venue"]["location"]["lat"],
	"longitude": d["venue"]["location"]["lng"],
	"category": d["venue"]["categories"][0]["name"],
	"privacy": d["venue"].get("private"),
	}
	)
	offset += 250
	data = r.get(
	f"https://api.foursquare.com/v2/users/self/checkins?offset={str(offset)}&limit=250&v=20241001&oauth_token=xxxx"
	).json()
	pd.DataFrame(records).to_sql("checkins", engine, if_exists="replace", index=False)

	# Bouncie Trips
	token = r.post(
	"https://auth.bouncie.com/oauth/token",
	headers={"Content-Type": "application/json"},
	json={
	"client_id": "xxxx",
	"client_secret": "xxxx",
	"grant_type": "authorization_code",
	"code": "xxxx",
	"redirect_uri": "xxxx",
	},
	).json()
	end = datetime.today().date() + timedelta(days=1)
	start = end - timedelta(days=6)
	records = []
	while start > datetime(2022, 1, 1).date():
	trips = r.get(
	f"https://api.bouncie.dev/v1/trips",
	params={
	"imei": "xxxx",
	"gps-format": "geojson",
	"starts-after": start.strftime("%Y-%m-%d"),
	"ends-before": end.strftime("%Y-%m-%d"),
	},
	headers={"Authorization": token["access_token"], "Accept": "application/json"},
	).json()
	for t in trips:
	records.append(
	{
	"transactionId": t.get("transactionId"),
	"hardBrakingCount": t.get("hardBrakingCount"),
	"hardAccelerationCount": t.get("hardAccelerationCount"),
	"distance": t.get("distance"),
	"gps": json.dumps(t.get("gps")),
	"startTime": datetime.fromisoformat(t.get("startTime")).astimezone(),
	"endTime": (
	datetime.fromisoformat(t.get("endTime")).astimezone()
	if t.get("endTime") != None
	else datetime.fromisoformat(t.get("startTime")).astimezone()
	),
	"startOdometer": t.get("startOdometer"),
	"endOdometer": t.get("endOdometer"),
	"averageSpeed": t.get("averageSpeed"),
	"maxSpeed": t.get("maxSpeed"),
	"fuelConsumed": t.get("fuelConsumed"),
	"timeZone": t.get("timeZone"),
	"totalIdleDuration": (
	int(t.get("totalIdleDuration"))
	if t.get("totalIdleDuration") != None
	else 0
	),
	"imei": t.get("imei"),
	}
	)
	end = start
	start = end - timedelta(days=6)
	pd.DataFrame(records).drop_duplicates(subset="transactionId").to_sql(
	"bouncietrips", engine, if_exists="replace", index=False
	)

	# Threads
	feed = r.get(
	'https://graph.threads.net/v1.0/me/threads',
	params={
	'fields': 'id,media_product_type,media_type,media_url,permalink,text,timestamp,shortcode,thumbnail_url,is_quote_post',
	'limit': 100,
	'access_token': 'xxxx'
	}
	).json()
	records = []
	while True:
	for f in feed['data']:
	records.append({
	"id": f.get('id'),
	"media_product_type": f.get('media_product_type'),
	"media_type": f.get('media_type'),
	"media_url": f.get('media_url'),
	"permalink": f.get('permalink'),
	"owner": json.dumps(f.get('owner')),
	"username": f.get('username'),
	"text": f.get('text'),
	"timestamp": f.get('timestamp'),
	"shortcode": f.get('shortcode'),
	"is_quote_post": f.get('is_quote_post'),
	"children": json.dumps(f.get('children')) if f.get('children') else None
	})
	try:
	feed = r.get(feed.get('paging').get('next')).json()
	except:
	break
	pd.DataFrame(records).to_sql('threads',engine,if_exists='replace',index=False)

	# Overland (from CouchDB)
	data = r.get("http://macmini.local:5984/overland/_all_docs?include_docs=true").json()
	records = []
	for row in data['rows']:
	doc = row['doc']
	for location in doc['locations']:
	records.append({
	'latitude': location['geometry']['coordinates'][1],
	'longitude': location['geometry']['coordinates'][0],
	'battery_state': location['properties'].get('battery_state'),
	'motion': ','.join(location['properties'].get('motion')) if location['properties'].get('motion') != None and len(location['properties'].get('motion')) > 0 else None,
	'timestamp': datetime.fromisoformat(location['properties'].get('timestamp')).astimezone(),
	'battery_level': location['properties'].get('battery_level') if location['properties'].get('battery_level') != None and location['properties'].get('battery_level') >=0 else None,
	'speed_accuracy': location['properties'].get('speed_accuracy') if location['properties'].get('speed_accuracy') != None and location['properties'].get('speed_accuracy') >=0 else None,
	'horizontal_accuracy': location['properties'].get('horizontal_accuracy') if location['properties'].get('horizontal_accuracy') != None and location['properties'].get('horizontal_accuracy') >= 0 else None,
	'vertical_accuracy': location['properties'].get('vertical_accuracy') if location['properties'].get('vertical_accuracy') != None and location['properties'].get('vertical_accuracy') >= 0 else None,
	'wifi': location['properties'].get('wifi'),
	'course': location['properties'].get('course') if location['properties'].get('course') != None and location['properties'].get('course') >= 0 else None,
	'altitude': location['properties'].get('altitude'),
	'course_accuracy': location['properties'].get('course_accuracy') if location['properties'].get('course_accuracy') != None and location['properties'].get('course_accuracy') >= 0 else None
	})
	df = pd.DataFrame(records)
	df.to_sql('overland',engine,if_exists='replace',index=False)