|
#!/usr/bin/env python3 |
|
# |
|
# apple_health_to_menstrudel.py |
|
# |
|
# Convert an Apple Health export into a Menstrudel "Logs & Periods" import file. |
|
# |
|
# Copyright (c) 2026 — released under the MIT License. |
|
# Menstrudel is an open-source app by J-shw: https://github.com/J-shw/Menstrudel |
|
# This script is an UNOFFICIAL, community tool and is not affiliated with it. |
|
# |
|
# Permission is hereby granted, free of charge, to any person obtaining a copy |
|
# of this software and associated documentation files (the "Software"), to deal |
|
# in the Software without restriction, including without limitation the rights |
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
# copies of the Software, and to permit persons to whom the Software is |
|
# furnished to do so, subject to the following conditions: |
|
# |
|
# The above copyright notice and this permission notice shall be included in all |
|
# copies or substantial portions of the Software. |
|
# |
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND. |
|
"""Apple Health -> Menstrudel importer. |
|
|
|
Reads an Apple Health "Export.xml" and produces a JSON file you can import into |
|
Menstrudel via: Settings -> Data Management -> Import -> "Logs & Periods". |
|
|
|
IMPORTANT: Menstrudel's import REPLACES all existing Logs & Periods data. |
|
Use a fresh install or make an in-app export backup first. |
|
|
|
What it converts |
|
---------------- |
|
Only menstrual-cycle records matter for the "Logs & Periods" import: |
|
|
|
HKCategoryTypeIdentifierMenstrualFlow -> period_logs (+ periods) |
|
HKCategoryTypeIdentifierIntermenstrualBleeding -> spotting logs (optional) |
|
|
|
The Menstrudel import format (reverse-engineered from the app's source, db v11):: |
|
|
|
{ |
|
"periods": [ {"id", "start_date"(epoch ms), "end_date"(epoch ms), "total_days"} ], |
|
"period_logs": [ {"id", "date"(ISO8601), "flow"(0-4), "painLevel", "period_id"} ], |
|
"log_symptoms": [ {"log_id_fk", "symptom"} ], |
|
"exported_at": ISO8601, |
|
"app_version": "x.y.z", |
|
"db_version": int |
|
} |
|
|
|
Flow encoding (Menstrudel FlowRate enum index): |
|
0 = none, 1 = spotting, 2 = light, 3 = medium, 4 = heavy |
|
|
|
Periods are reconstructed exactly the way Menstrudel does internally |
|
(PeriodsRepository.recalculateAndAssignPeriods): flow-days (>= spotting) are |
|
sorted by date and split into a new period whenever the gap to the previous |
|
flow-day is more than one day. |
|
|
|
This has been validated structurally against the app's source; test it on a |
|
backup before relying on it. Requires Python 3.7+ (standard library only). |
|
|
|
Usage |
|
----- |
|
python3 apple_health_to_menstrudel.py [EXPORT] [-o OUT] [--unspecified LEVEL] |
|
[--include-intermenstrual] |
|
|
|
EXPORT may be the Export.xml file or the unzipped apple_health_export/ folder. |
|
Defaults: ./Export.xml -> ./menstrudel_logsAndPeriods_<timestamp>.json |
|
""" |
|
|
|
import argparse |
|
import json |
|
import os |
|
import sys |
|
import xml.etree.ElementTree as ET |
|
from datetime import datetime |
|
|
|
# --- Configuration -------------------------------------------------------- |
|
|
|
# Menstrudel DB schema version this output targets. The app refuses to import |
|
# data from a *newer* db_version than it runs, so we declare the current one. |
|
DB_VERSION = 11 |
|
APP_VERSION_TAG = "applehealth-import" |
|
|
|
FLOW_NAMES = ["none", "spotting", "light", "medium", "heavy"] |
|
FLOW_INDEX = {name: i for i, name in enumerate(FLOW_NAMES)} |
|
|
|
MENSTRUAL_TYPE = "HKCategoryTypeIdentifierMenstrualFlow" |
|
INTERMENSTRUAL_TYPE = "HKCategoryTypeIdentifierIntermenstrualBleeding" |
|
|
|
# Apple Health value suffix -> Menstrudel FlowRate index. Apple's "Unspecified" |
|
# means "bleeding occurred, intensity not recorded"; Menstrudel has no |
|
# equivalent, so it is mapped via the --unspecified option (default: medium). |
|
_BASE_FLOW = {"None": 0, "Light": 2, "Medium": 3, "Heavy": 4} |
|
|
|
|
|
def build_flow_map(unspecified): |
|
"""Map every known Apple flow value (both naming schemes) to a FlowRate.""" |
|
flow_map = {} |
|
for prefix in ("HKCategoryValueMenstrualFlow", "HKCategoryValueVaginalBleeding"): |
|
for suffix, idx in _BASE_FLOW.items(): |
|
flow_map[prefix + suffix] = idx |
|
flow_map[prefix + "Unspecified"] = unspecified |
|
return flow_map |
|
|
|
|
|
# --- Parsing -------------------------------------------------------------- |
|
|
|
|
|
def resolve_input(path): |
|
"""Accept the Export.xml file or the unzipped apple_health_export/ folder.""" |
|
if os.path.isdir(path): |
|
for name in ("export.xml", "Export.xml"): |
|
candidate = os.path.join(path, name) |
|
if os.path.isfile(candidate): |
|
return candidate |
|
sys.exit(f"error: no export.xml found inside folder: {path}") |
|
return path |
|
|
|
|
|
def parse_apple_datetime(s): |
|
"""Parse Apple Health timestamps like '2011-10-12 12:00:00 +0200'.""" |
|
return datetime.strptime(s, "%Y-%m-%d %H:%M:%S %z") |
|
|
|
|
|
def read_records(xml_path, flow_map): |
|
"""Stream-parse the (large) export and yield relevant menstrual records. |
|
|
|
Yields dicts: {day: date, dt: tz-aware datetime, flow: int, kind: str} |
|
""" |
|
wanted = {MENSTRUAL_TYPE, INTERMENSTRUAL_TYPE} |
|
for _event, elem in ET.iterparse(xml_path, events=("end",)): |
|
if elem.tag != "Record": |
|
continue |
|
rtype = elem.get("type") |
|
if rtype in wanted: |
|
start = elem.get("startDate") |
|
if start: |
|
dt = parse_apple_datetime(start) |
|
if rtype == INTERMENSTRUAL_TYPE: |
|
flow = 1 # spotting |
|
else: |
|
flow = flow_map.get(elem.get("value", "")) |
|
if flow is not None: |
|
yield {"day": dt.date(), "dt": dt, "flow": flow, "kind": rtype} |
|
# Free memory: the file can be hundreds of MB. |
|
elem.clear() |
|
|
|
|
|
# --- Transform ------------------------------------------------------------ |
|
|
|
|
|
def build_logs(records, include_intermenstrual): |
|
"""Collapse records to one entry per calendar day (keeping the max flow). |
|
|
|
Returns a list of per-day dicts sorted ascending by date. |
|
""" |
|
by_day = {} |
|
for rec in records: |
|
if rec["kind"] == INTERMENSTRUAL_TYPE and not include_intermenstrual: |
|
continue |
|
day = rec["day"] |
|
existing = by_day.get(day) |
|
if existing is None or rec["flow"] > existing["flow"]: |
|
by_day[day] = { |
|
"day": day, |
|
"dt": rec["dt"], |
|
"flow": rec["flow"], |
|
"intermenstrual": rec["kind"] == INTERMENSTRUAL_TYPE, |
|
} |
|
return [by_day[d] for d in sorted(by_day)] |
|
|
|
|
|
def iso_log_date(dt): |
|
"""Match Dart's DateTime.toIso8601String() shape, anchored at noon. |
|
|
|
Anchoring at noon (as Apple does) avoids any timezone-induced day shift |
|
when the value is later re-parsed by the app. |
|
""" |
|
return dt.strftime("%Y-%m-%dT12:00:00.000") |
|
|
|
|
|
def epoch_ms(dt): |
|
return int(dt.timestamp() * 1000) |
|
|
|
|
|
def assign_periods(days): |
|
"""Replicate Menstrudel's clustering of flow-days into periods. |
|
|
|
A new period starts whenever the gap to the previous flow-day is more than |
|
one day. Days with flow == none (0) and intermenstrual spotting are not part |
|
of any period (period_id = None). |
|
|
|
Mutates each day dict, adding 'log_id' and 'period_id', and returns the list |
|
of period dicts (with 'id', 'start_date', 'end_date', 'total_days'). |
|
""" |
|
# Assign stable, 1-based log ids in date order. |
|
for i, d in enumerate(days, start=1): |
|
d["log_id"] = i |
|
d["period_id"] = None |
|
|
|
flow_days = [d for d in days if d["flow"] >= 1 and not d["intermenstrual"]] |
|
|
|
periods = [] |
|
cluster = [] |
|
|
|
def flush(cluster): |
|
if not cluster: |
|
return |
|
start, end = cluster[0], cluster[-1] |
|
pid = len(periods) + 1 |
|
periods.append( |
|
{ |
|
"id": pid, |
|
"start_date": epoch_ms(start["dt"]), |
|
"end_date": epoch_ms(end["dt"]), |
|
"total_days": (end["day"] - start["day"]).days + 1, |
|
} |
|
) |
|
for d in cluster: |
|
d["period_id"] = pid |
|
|
|
for d in flow_days: |
|
if not cluster: |
|
cluster = [d] |
|
elif (d["day"] - cluster[-1]["day"]).days > 1: |
|
flush(cluster) |
|
cluster = [d] |
|
else: |
|
cluster.append(d) |
|
flush(cluster) |
|
|
|
return periods |
|
|
|
|
|
def build_export(days, periods): |
|
period_logs = [ |
|
{ |
|
"id": d["log_id"], |
|
"date": iso_log_date(d["dt"]), |
|
"flow": d["flow"], |
|
"painLevel": 0, |
|
"period_id": d["period_id"], |
|
} |
|
for d in days |
|
] |
|
return { |
|
"periods": periods, |
|
"period_logs": period_logs, |
|
"log_symptoms": [], # Apple Health flow records carry no symptoms. |
|
"exported_at": datetime.now().astimezone().isoformat(), |
|
"app_version": APP_VERSION_TAG, |
|
"db_version": DB_VERSION, |
|
} |
|
|
|
|
|
# --- Main ----------------------------------------------------------------- |
|
|
|
|
|
def main(): |
|
ap = argparse.ArgumentParser( |
|
description="Convert an Apple Health export into a Menstrudel import file." |
|
) |
|
ap.add_argument( |
|
"input", |
|
nargs="?", |
|
default="Export.xml", |
|
help="Apple Health Export.xml, or the unzipped apple_health_export/ " |
|
"folder (default: ./Export.xml)", |
|
) |
|
ap.add_argument( |
|
"-o", |
|
"--output", |
|
help="Output JSON path (default: ./menstrudel_logsAndPeriods_<timestamp>.json)", |
|
) |
|
ap.add_argument( |
|
"--unspecified", |
|
choices=FLOW_NAMES, |
|
default="medium", |
|
help="How to map Apple's intensity-less 'unspecified' flow (default: medium)", |
|
) |
|
ap.add_argument( |
|
"--include-intermenstrual", |
|
action="store_true", |
|
help="Also import intermenstrual bleeding as standalone spotting logs", |
|
) |
|
args = ap.parse_args() |
|
|
|
xml_path = resolve_input(args.input) |
|
flow_map = build_flow_map(FLOW_INDEX[args.unspecified]) |
|
|
|
try: |
|
records = list(read_records(xml_path, flow_map)) |
|
except FileNotFoundError: |
|
sys.exit(f"error: input file not found: {xml_path}") |
|
except ET.ParseError as e: |
|
sys.exit(f"error: failed to parse XML: {e}") |
|
|
|
if not records: |
|
sys.exit("error: no menstrual-flow records found in the export.") |
|
|
|
days = build_logs(records, args.include_intermenstrual) |
|
periods = assign_periods(days) |
|
export = build_export(days, periods) |
|
|
|
out_path = ( |
|
args.output or f"menstrudel_logsAndPeriods_{datetime.now():%Y%m%d_%H%M%S}.json" |
|
) |
|
with open(out_path, "w", encoding="utf-8") as f: |
|
json.dump(export, f, ensure_ascii=False) |
|
|
|
# Summary |
|
flow_counts = {} |
|
for d in days: |
|
flow_counts[d["flow"]] = flow_counts.get(d["flow"], 0) + 1 |
|
|
|
print(f"Read {len(records)} menstrual records from {xml_path}") |
|
print(f"Wrote {out_path}") |
|
print(f" date range : {days[0]['day']} -> {days[-1]['day']}") |
|
print(f" log days : {len(days)}") |
|
print(f" periods : {len(periods)}") |
|
print(" flow breakdown:") |
|
for idx in sorted(flow_counts): |
|
print(f" {FLOW_NAMES[idx]:<9}: {flow_counts[idx]}") |
|
print( |
|
f"\nApple 'unspecified' flow was mapped to '{args.unspecified}' " |
|
"(change with --unspecified)." |
|
) |
|
print("Reminder: importing REPLACES existing Logs & Periods data in Menstrudel.") |
|
print( |
|
"Import via: Menstrudel -> Settings -> Data Management -> Import -> 'Logs & Periods'" |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |