Skip to content

Instantly share code, notes, and snippets.

@gurdiga
Last active March 28, 2026 10:27
Show Gist options
  • Select an option

  • Save gurdiga/3be6a79e88c507485d68775dbfdf0641 to your computer and use it in GitHub Desktop.

Select an option

Save gurdiga/3be6a79e88c507485d68775dbfdf0641 to your computer and use it in GitHub Desktop.
Apple Health sleep cycle analysis

Health Data

Apple Health exports and analysis scripts.

Contents

  • apple_health_export/ — Apple Health export (XML + clinical records)
  • sleep_cycles.py — Parses sleep data and prints nightly sleep cycles
  • Makefile — Convenience targets
  • export.zip — Original export archive (Health app → profile picture → Export All Health Data)

sleep_cycles.py

Reads sleep stage records from an Apple Health export.xml and groups them into ~90-minute cycles using REM blocks as boundaries.

Usage

python3 sleep_cycles.py <export.xml> [--from DATE] [--to DATE] [--source <name>]
  • export.xml — path to the XML file inside the unzipped export archive
  • --from — date to start from, format YYYY-MM-DD; defaults to all available nights
  • --to — end date for a range (inclusive); if omitted, only --from is shown
  • --source — filter by device name (e.g. "Vlad's Apple Watch")

Example

python3 sleep_cycles.py apple_health_export/export.xml --from 2026-03-28
Sleep cycles for 2026-03-28

          Start    End    Duration  Composition
----------------------------------------------------------------------
Cycle 1   22:49  00:27      1h 37m  Deep 6m → Core 65m → REM 25m
Cycle 2   00:27  01:57      1h 29m  Deep 41m → Core 36m → REM 12m
Cycle 3   01:57  03:46      1h 48m  Deep 9m → Core 52m → REM 47m
Cycle 4   03:46  05:18      1h 31m  Deep 9m → Core 56m → REM 25m

                     Total sleep  6h 25m

Cycle duration (4 cycles): median 1h 36m, P90 1h 48m

Cycle duration stats (median and P90) are always printed, computed across all cycles in the period:

python3 sleep_cycles.py apple_health_export/export.xml --from 2026-03-27 --to 2026-03-28
...
Cycle duration (8 cycles): median 1h 31m, P90 1h 49m

Performance

On first run the script builds a SQLite cache (export.db) next to the XML file. Subsequent runs skip the XML entirely.

Run Time
First (builds cache) ~5s
Subsequent ~0.1s

The cache is automatically rebuilt if the XML is newer than the .db file.

Makefile

make          # print last night's sleep report
make push     # push to gist
.DS_Store
__pycache__/
*.db
apple_health_export/
export.zip
EXPORT = apple_health_export/export.xml
.PHONY: default report push
default: report
report:
python3 sleep_cycles.py $(EXPORT) --from $(shell date +%Y-%m-%d)
push:
git push gist main
#!/usr/bin/env python3
import sys
import argparse
import os
import sqlite3
try:
from lxml import etree as ET
except ImportError:
import xml.etree.ElementTree as ET
from collections import defaultdict
from datetime import datetime, timedelta
SLEEP_TYPE = "HKCategoryTypeIdentifierSleepAnalysis"
MIN_REM_CYCLE_MIN = 3 # minimum total REM to count as a cycle boundary
def main():
parser = argparse.ArgumentParser()
parser.add_argument("export", metavar="export.xml")
parser.add_argument("--from", dest="date_from", metavar="DATE")
parser.add_argument("--to", dest="date_to", metavar="DATE")
parser.add_argument("--source", help="Filter by source name (e.g. \"Vlad's Apple Watch\")")
args = parser.parse_args()
records_by_date = load_all_sleep_records(args.export, args.source)
if args.date_from:
date_to = args.date_to or args.date_from
dates = [d for d in sorted(records_by_date) if args.date_from <= d <= date_to]
else:
dates = sorted(records_by_date)
all_cycle_durations = []
for date in dates:
records = records_by_date.get(date)
if not records:
if args.date_from:
print(f"No sleep data for {date}")
continue
cycles = print_cycles(date, records)
all_cycle_durations.extend(dur for _, _, dur, _ in cycles)
if all_cycle_durations:
all_cycle_durations.sort()
n = len(all_cycle_durations)
p90 = all_cycle_durations[min(int(n * 0.9), n - 1)]
median = all_cycle_durations[n // 2]
def fmt(m): h, m = divmod(m, 60); return f"{h}h {m:02d}m"
print(f"Cycle duration ({n} cycles): median {fmt(median)}, P90 {fmt(p90)}")
def db_path(xml_path):
return os.path.splitext(xml_path)[0] + ".db"
def build_db(xml_path, path):
print(f"Building cache {path} ...", file=sys.stderr)
root = ET.parse(xml_path).getroot()
con = sqlite3.connect(path)
con.execute("""
CREATE TABLE sleep (
startDate TEXT,
endDate TEXT,
stage TEXT,
source TEXT
)
""")
rows = []
for r in root.findall("Record"):
if r.get("type") != SLEEP_TYPE:
continue
if "InBed" in r.get("value", ""):
continue
stage = (
r.get("value", "")
.replace("HKCategoryValueSleepAnalysisAsleep", "")
.replace("HKCategoryValueSleepAnalysis", "")
)
rows.append((r.get("startDate"), r.get("endDate"), stage, r.get("sourceName", "")))
con.executemany("INSERT INTO sleep VALUES (?,?,?,?)", rows)
con.execute("CREATE INDEX idx_end ON sleep (endDate)")
con.commit()
con.close()
def load_all_sleep_records(xml_path, source=None):
path = db_path(xml_path)
# Build DB if missing or older than the XML
if not os.path.exists(path) or os.path.getmtime(path) < os.path.getmtime(xml_path):
build_db(xml_path, path)
con = sqlite3.connect(path)
query = "SELECT startDate, endDate, stage FROM sleep"
params = []
if source:
query += " WHERE source = ?"
params.append(source)
rows = con.execute(query, params).fetchall()
con.close()
by_date = defaultdict(list)
for start_str, end_str, stage in rows:
s = parse_dt(start_str)
e = parse_dt(end_str)
# Records ending before noon belong to that morning's night;
# records ending at noon or later are evening/onset records for the next night.
if e.hour >= 12:
night = (e + timedelta(days=1)).strftime("%Y-%m-%d")
else:
night = e.strftime("%Y-%m-%d")
by_date[night].append((s, e, stage))
for records in by_date.values():
records.sort()
return by_date
def print_cycles(date, records):
cycles = find_cycles(records)
labels = [f"Cycle {i+1}" for i in range(len(cycles))]
print(f"Sleep cycles for {date}")
print()
print(f"{'':8} {'Start':>5} {'End':>5} {'Duration':>10} Composition")
print("-" * 70)
for label, (start, end, dur, recs) in zip(labels, cycles):
composition = summarize_stages(recs)
h, m = divmod(dur, 60)
dur_str = f"{h}h {m:02d}m" if h else f"{m}m"
print(f"{label:8} {start.strftime('%H:%M'):>5} {end.strftime('%H:%M'):>5} {dur_str:>10} {composition}")
total = sum(dur for _, _, dur, _ in cycles)
h, m = divmod(total, 60)
print(f"\n{'Total sleep':>32} {h}h {m:02d}m")
print()
return cycles
def find_cycles(records):
"""Split records into cycles using REM blocks as cycle boundaries."""
cycles = []
current_start = records[0][0] if records else None
i = 0
while i < len(records):
s, e, stage = records[i]
if stage == "REM":
# Merge consecutive REM blocks (possibly separated by tiny Core/Awake)
rem_end = e
rem_total = (e - s).total_seconds() / 60
j = i + 1
while j < len(records):
ns, ne, nstage = records[j]
gap = (ns - rem_end).total_seconds() / 60
if nstage == "REM" or (gap <= 5 and nstage in ("Core", "Awake")):
if nstage == "REM":
rem_end = ne
rem_total += (ne - ns).total_seconds() / 60
j += 1
else:
break
if rem_total < MIN_REM_CYCLE_MIN:
i = j
continue
# Collect all records up to and including this REM block
cycle_records = [r for r in records if current_start <= r[0] < rem_end]
dur = int((rem_end - current_start).total_seconds() / 60)
cycles.append((current_start, rem_end, dur, cycle_records))
# Find the next record starting at or after rem_end (don't skip
# records that the merge loop may have advanced j past).
j = next((k for k in range(i + 1, len(records)) if records[k][0] >= rem_end), len(records))
current_start = records[j][0] if j < len(records) else None
i = j
else:
i += 1
# Tail: remaining records after last REM
if current_start is not None:
tail = [r for r in records if r[0] >= current_start]
if tail:
tail_end = tail[-1][1]
dur = int((tail_end - current_start).total_seconds() / 60)
cycles.append((current_start, tail_end, dur, tail))
return cycles
def summarize_stages(cycle_records):
# Resolve overlaps: when records overlap, the later-starting one takes precedence.
# Build a list of non-overlapping segments by clipping earlier records.
segments = []
for s, e, stage in sorted(cycle_records):
if segments and s < segments[-1][1]:
ps, pe, pstage = segments[-1]
segments[-1] = (ps, s, pstage)
segments.append((s, e, stage))
totals = {}
for s, e, stage in segments:
dur = (e - s).total_seconds() / 60
if dur >= 1:
totals[stage] = totals.get(stage, 0) + dur
parts = []
for stage in ("Deep", "Core", "REM", "Awake"):
if stage in totals:
parts.append(f"{stage} {int(totals[stage])}m")
return " → ".join(parts)
def parse_dt(s):
return datetime.strptime(s, "%Y-%m-%d %H:%M:%S %z")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment