pzeinlinger · June 1, 2026 18:00
diff --git a/README.md b/README.md
diff --git a/apple_health_to_menstrudel.py b/apple_health_to_menstrudel.py
 #!/usr/bin/env python3
 #
 # apple_health_to_menstrudel.py
 #
 # Convert an Apple Health export into a Menstrudel "Logs & Periods" import file.
 #
 # Copyright (c) 2026 — released under the MIT License.
 # Menstrudel is an open-source app by J-shw: https://github.com/J-shw/Menstrudel
 # This script is an UNOFFICIAL, community tool and is not affiliated with it.
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
 # in the Software without restriction, including without limitation the rights
 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 # copies of the Software, and to permit persons to whom the Software is
 # furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in all
 # copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
 """Apple Health  ->  Menstrudel importer.

 Reads an Apple Health "Export.xml" and produces a JSON file you can import into
 Menstrudel via:  Settings -> Data Management -> Import -> "Logs & Periods".

    IMPORTANT: Menstrudel's import REPLACES all existing Logs & Periods data.
    Use a fresh install or make an in-app export backup first.

 What it converts
 ----------------
 Only menstrual-cycle records matter for the "Logs & Periods" import:

    HKCategoryTypeIdentifierMenstrualFlow          -> period_logs (+ periods)
    HKCategoryTypeIdentifierIntermenstrualBleeding -> spotting logs (optional)

 The Menstrudel import format (reverse-engineered from the app's source, db v11)::

    {
      "periods":      [ {"id", "start_date"(epoch ms), "end_date"(epoch ms), "total_days"} ],
      "period_logs":  [ {"id", "date"(ISO8601), "flow"(0-4), "painLevel", "period_id"} ],
      "log_symptoms": [ {"log_id_fk", "symptom"} ],
      "exported_at":  ISO8601,
      "app_version":  "x.y.z",
      "db_version":   int
    }

 Flow encoding (Menstrudel FlowRate enum index):
    0 = none, 1 = spotting, 2 = light, 3 = medium, 4 = heavy

 Periods are reconstructed exactly the way Menstrudel does internally
 (PeriodsRepository.recalculateAndAssignPeriods): flow-days (>= spotting) are
 sorted by date and split into a new period whenever the gap to the previous
 flow-day is more than one day.

 This has been validated structurally against the app's source; test it on a
 backup before relying on it. Requires Python 3.7+ (standard library only).

 Usage
 -----
    python3 apple_health_to_menstrudel.py [EXPORT] [-o OUT] [--unspecified LEVEL]
                                          [--include-intermenstrual]

 EXPORT may be the Export.xml file or the unzipped apple_health_export/ folder.
 Defaults: ./Export.xml -> ./menstrudel_logsAndPeriods_<timestamp>.json
 """

 import argparse
 import json
 import os
 import sys
 import xml.etree.ElementTree as ET
 from datetime import datetime

 # --- Configuration --------------------------------------------------------

 # Menstrudel DB schema version this output targets. The app refuses to import
 # data from a *newer* db_version than it runs, so we declare the current one.
 DB_VERSION = 11
 APP_VERSION_TAG = "applehealth-import"

 FLOW_NAMES = ["none", "spotting", "light", "medium", "heavy"]
 FLOW_INDEX = {name: i for i, name in enumerate(FLOW_NAMES)}

 MENSTRUAL_TYPE = "HKCategoryTypeIdentifierMenstrualFlow"
 INTERMENSTRUAL_TYPE = "HKCategoryTypeIdentifierIntermenstrualBleeding"

 # Apple Health value suffix -> Menstrudel FlowRate index. Apple's "Unspecified"
 # means "bleeding occurred, intensity not recorded"; Menstrudel has no
 # equivalent, so it is mapped via the --unspecified option (default: medium).
 _BASE_FLOW = {"None": 0, "Light": 2, "Medium": 3, "Heavy": 4}


 def build_flow_map(unspecified):
    """Map every known Apple flow value (both naming schemes) to a FlowRate."""
    flow_map = {}
    for prefix in ("HKCategoryValueMenstrualFlow", "HKCategoryValueVaginalBleeding"):
        for suffix, idx in _BASE_FLOW.items():
            flow_map[prefix + suffix] = idx
        flow_map[prefix + "Unspecified"] = unspecified
    return flow_map


 # --- Parsing --------------------------------------------------------------


 def resolve_input(path):
    """Accept the Export.xml file or the unzipped apple_health_export/ folder."""
    if os.path.isdir(path):
        for name in ("export.xml", "Export.xml"):
            candidate = os.path.join(path, name)
            if os.path.isfile(candidate):
                return candidate
        sys.exit(f"error: no export.xml found inside folder: {path}")
    return path


 def parse_apple_datetime(s):
    """Parse Apple Health timestamps like '2011-10-12 12:00:00 +0200'."""
    return datetime.strptime(s, "%Y-%m-%d %H:%M:%S %z")


 def read_records(xml_path, flow_map):
    """Stream-parse the (large) export and yield relevant menstrual records.

    Yields dicts: {day: date, dt: tz-aware datetime, flow: int, kind: str}
    """
    wanted = {MENSTRUAL_TYPE, INTERMENSTRUAL_TYPE}
    for _event, elem in ET.iterparse(xml_path, events=("end",)):
        if elem.tag != "Record":
            continue
        rtype = elem.get("type")
        if rtype in wanted:
            start = elem.get("startDate")
            if start:
                dt = parse_apple_datetime(start)
                if rtype == INTERMENSTRUAL_TYPE:
                    flow = 1  # spotting
                else:
                    flow = flow_map.get(elem.get("value", ""))
                if flow is not None:
                    yield {"day": dt.date(), "dt": dt, "flow": flow, "kind": rtype}
        # Free memory: the file can be hundreds of MB.
        elem.clear()


 # --- Transform ------------------------------------------------------------


 def build_logs(records, include_intermenstrual):
    """Collapse records to one entry per calendar day (keeping the max flow).

    Returns a list of per-day dicts sorted ascending by date.
    """
    by_day = {}
    for rec in records:
        if rec["kind"] == INTERMENSTRUAL_TYPE and not include_intermenstrual:
            continue
        day = rec["day"]
        existing = by_day.get(day)
        if existing is None or rec["flow"] > existing["flow"]:
            by_day[day] = {
                "day": day,
                "dt": rec["dt"],
                "flow": rec["flow"],
                "intermenstrual": rec["kind"] == INTERMENSTRUAL_TYPE,
            }
    return [by_day[d] for d in sorted(by_day)]


 def iso_log_date(dt):
    """Match Dart's DateTime.toIso8601String() shape, anchored at noon.

    Anchoring at noon (as Apple does) avoids any timezone-induced day shift
    when the value is later re-parsed by the app.
    """
    return dt.strftime("%Y-%m-%dT12:00:00.000")


 def epoch_ms(dt):
    return int(dt.timestamp() * 1000)


 def assign_periods(days):
    """Replicate Menstrudel's clustering of flow-days into periods.

    A new period starts whenever the gap to the previous flow-day is more than
    one day. Days with flow == none (0) and intermenstrual spotting are not part
    of any period (period_id = None).

    Mutates each day dict, adding 'log_id' and 'period_id', and returns the list
    of period dicts (with 'id', 'start_date', 'end_date', 'total_days').
    """
    # Assign stable, 1-based log ids in date order.
    for i, d in enumerate(days, start=1):
        d["log_id"] = i
        d["period_id"] = None

    flow_days = [d for d in days if d["flow"] >= 1 and not d["intermenstrual"]]

    periods = []
    cluster = []

    def flush(cluster):
        if not cluster:
            return
        start, end = cluster[0], cluster[-1]
        pid = len(periods) + 1
        periods.append(
            {
                "id": pid,
                "start_date": epoch_ms(start["dt"]),
                "end_date": epoch_ms(end["dt"]),
                "total_days": (end["day"] - start["day"]).days + 1,
            }
        )
        for d in cluster:
            d["period_id"] = pid

    for d in flow_days:
        if not cluster:
            cluster = [d]
        elif (d["day"] - cluster[-1]["day"]).days > 1:
            flush(cluster)
            cluster = [d]
        else:
            cluster.append(d)
    flush(cluster)

    return periods


 def build_export(days, periods):
    period_logs = [
        {
            "id": d["log_id"],
            "date": iso_log_date(d["dt"]),
            "flow": d["flow"],
            "painLevel": 0,
            "period_id": d["period_id"],
        }
        for d in days
    ]
    return {
        "periods": periods,
        "period_logs": period_logs,
        "log_symptoms": [],  # Apple Health flow records carry no symptoms.
        "exported_at": datetime.now().astimezone().isoformat(),
        "app_version": APP_VERSION_TAG,
        "db_version": DB_VERSION,
    }


 # --- Main -----------------------------------------------------------------


 def main():
    ap = argparse.ArgumentParser(
        description="Convert an Apple Health export into a Menstrudel import file."
    )
    ap.add_argument(
        "input",
        nargs="?",
        default="Export.xml",
        help="Apple Health Export.xml, or the unzipped apple_health_export/ "
        "folder (default: ./Export.xml)",
    )
    ap.add_argument(
        "-o",
        "--output",
        help="Output JSON path (default: ./menstrudel_logsAndPeriods_<timestamp>.json)",
    )
    ap.add_argument(
        "--unspecified",
        choices=FLOW_NAMES,
        default="medium",
        help="How to map Apple's intensity-less 'unspecified' flow (default: medium)",
    )
    ap.add_argument(
        "--include-intermenstrual",
        action="store_true",
        help="Also import intermenstrual bleeding as standalone spotting logs",
    )
    args = ap.parse_args()

    xml_path = resolve_input(args.input)
    flow_map = build_flow_map(FLOW_INDEX[args.unspecified])

    try:
        records = list(read_records(xml_path, flow_map))
    except FileNotFoundError:
        sys.exit(f"error: input file not found: {xml_path}")
    except ET.ParseError as e:
        sys.exit(f"error: failed to parse XML: {e}")

    if not records:
        sys.exit("error: no menstrual-flow records found in the export.")

    days = build_logs(records, args.include_intermenstrual)
    periods = assign_periods(days)
    export = build_export(days, periods)

    out_path = (
        args.output or f"menstrudel_logsAndPeriods_{datetime.now():%Y%m%d_%H%M%S}.json"
    )
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(export, f, ensure_ascii=False)

    # Summary
    flow_counts = {}
    for d in days:
        flow_counts[d["flow"]] = flow_counts.get(d["flow"], 0) + 1

    print(f"Read {len(records)} menstrual records from {xml_path}")
    print(f"Wrote {out_path}")
    print(f"  date range : {days[0]['day']} -> {days[-1]['day']}")
    print(f"  log days   : {len(days)}")
    print(f"  periods    : {len(periods)}")
    print("  flow breakdown:")
    for idx in sorted(flow_counts):
        print(f"    {FLOW_NAMES[idx]:<9}: {flow_counts[idx]}")
    print(
        f"\nApple 'unspecified' flow was mapped to '{args.unspecified}' "
        "(change with --unspecified)."
    )
    print("Reminder: importing REPLACES existing Logs & Periods data in Menstrudel.")
    print(
        "Import via: Menstrudel -> Settings -> Data Management -> Import -> 'Logs & Periods'"
    )


 if __name__ == "__main__":
    main()
Apple Health value	Menstrudel flow
`…None`	none (0)
`…Unspecified`	configurable — medium by default
`…Light`	light (2)
`…Medium`	medium (3)
`…Heavy`	heavy (4)
`IntermenstrualBleeding`	spotting (1) — opt-in only
Flag	Description
`--unspecified {none,spotting,light,medium,heavy}`	How to map Apple's intensity-less flow (default: `medium`)
`--include-intermenstrual`	Import intermenstrual bleeding as standalone spotting logs (default: off)
`-o, --output PATH`	Output JSON path
	#!/usr/bin/env python3
	#
	# apple_health_to_menstrudel.py
	#
	# Convert an Apple Health export into a Menstrudel "Logs & Periods" import file.
	#
	# Copyright (c) 2026 — released under the MIT License.
	# Menstrudel is an open-source app by J-shw: https://github.com/J-shw/Menstrudel
	# This script is an UNOFFICIAL, community tool and is not affiliated with it.
	#
	# Permission is hereby granted, free of charge, to any person obtaining a copy
	# of this software and associated documentation files (the "Software"), to deal
	# in the Software without restriction, including without limitation the rights
	# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	# copies of the Software, and to permit persons to whom the Software is
	# furnished to do so, subject to the following conditions:
	#
	# The above copyright notice and this permission notice shall be included in all
	# copies or substantial portions of the Software.
	#
	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
	"""Apple Health -> Menstrudel importer.

	Reads an Apple Health "Export.xml" and produces a JSON file you can import into
	Menstrudel via: Settings -> Data Management -> Import -> "Logs & Periods".

	IMPORTANT: Menstrudel's import REPLACES all existing Logs & Periods data.
	Use a fresh install or make an in-app export backup first.

	What it converts
	----------------
	Only menstrual-cycle records matter for the "Logs & Periods" import:

	HKCategoryTypeIdentifierMenstrualFlow -> period_logs (+ periods)
	HKCategoryTypeIdentifierIntermenstrualBleeding -> spotting logs (optional)

	The Menstrudel import format (reverse-engineered from the app's source, db v11)::

	{
	"periods": [ {"id", "start_date"(epoch ms), "end_date"(epoch ms), "total_days"} ],
	"period_logs": [ {"id", "date"(ISO8601), "flow"(0-4), "painLevel", "period_id"} ],
	"log_symptoms": [ {"log_id_fk", "symptom"} ],
	"exported_at": ISO8601,
	"app_version": "x.y.z",
	"db_version": int
	}

	Flow encoding (Menstrudel FlowRate enum index):
	0 = none, 1 = spotting, 2 = light, 3 = medium, 4 = heavy

	Periods are reconstructed exactly the way Menstrudel does internally
	(PeriodsRepository.recalculateAndAssignPeriods): flow-days (>= spotting) are
	sorted by date and split into a new period whenever the gap to the previous
	flow-day is more than one day.

	This has been validated structurally against the app's source; test it on a
	backup before relying on it. Requires Python 3.7+ (standard library only).

	Usage
	-----
	python3 apple_health_to_menstrudel.py [EXPORT] [-o OUT] [--unspecified LEVEL]
	[--include-intermenstrual]

	EXPORT may be the Export.xml file or the unzipped apple_health_export/ folder.
	Defaults: ./Export.xml -> ./menstrudel_logsAndPeriods_<timestamp>.json
	"""

	import argparse
	import json
	import os
	import sys
	import xml.etree.ElementTree as ET
	from datetime import datetime

	# --- Configuration --------------------------------------------------------

	# Menstrudel DB schema version this output targets. The app refuses to import
	# data from a newer db_version than it runs, so we declare the current one.
	DB_VERSION = 11
	APP_VERSION_TAG = "applehealth-import"

	FLOW_NAMES = ["none", "spotting", "light", "medium", "heavy"]
	FLOW_INDEX = {name: i for i, name in enumerate(FLOW_NAMES)}

	MENSTRUAL_TYPE = "HKCategoryTypeIdentifierMenstrualFlow"
	INTERMENSTRUAL_TYPE = "HKCategoryTypeIdentifierIntermenstrualBleeding"

	# Apple Health value suffix -> Menstrudel FlowRate index. Apple's "Unspecified"
	# means "bleeding occurred, intensity not recorded"; Menstrudel has no
	# equivalent, so it is mapped via the --unspecified option (default: medium).
	_BASE_FLOW = {"None": 0, "Light": 2, "Medium": 3, "Heavy": 4}


	def build_flow_map(unspecified):
	"""Map every known Apple flow value (both naming schemes) to a FlowRate."""
	flow_map = {}
	for prefix in ("HKCategoryValueMenstrualFlow", "HKCategoryValueVaginalBleeding"):
	for suffix, idx in _BASE_FLOW.items():
	flow_map[prefix + suffix] = idx
	flow_map[prefix + "Unspecified"] = unspecified
	return flow_map


	# --- Parsing --------------------------------------------------------------


	def resolve_input(path):
	"""Accept the Export.xml file or the unzipped apple_health_export/ folder."""
	if os.path.isdir(path):
	for name in ("export.xml", "Export.xml"):
	candidate = os.path.join(path, name)
	if os.path.isfile(candidate):
	return candidate
	sys.exit(f"error: no export.xml found inside folder: {path}")
	return path


	def parse_apple_datetime(s):
	"""Parse Apple Health timestamps like '2011-10-12 12:00:00 +0200'."""
	return datetime.strptime(s, "%Y-%m-%d %H:%M:%S %z")


	def read_records(xml_path, flow_map):
	"""Stream-parse the (large) export and yield relevant menstrual records.

	Yields dicts: {day: date, dt: tz-aware datetime, flow: int, kind: str}
	"""
	wanted = {MENSTRUAL_TYPE, INTERMENSTRUAL_TYPE}
	for _event, elem in ET.iterparse(xml_path, events=("end",)):
	if elem.tag != "Record":
	continue
	rtype = elem.get("type")
	if rtype in wanted:
	start = elem.get("startDate")
	if start:
	dt = parse_apple_datetime(start)
	if rtype == INTERMENSTRUAL_TYPE:
	flow = 1 # spotting
	else:
	flow = flow_map.get(elem.get("value", ""))
	if flow is not None:
	yield {"day": dt.date(), "dt": dt, "flow": flow, "kind": rtype}
	# Free memory: the file can be hundreds of MB.
	elem.clear()


	# --- Transform ------------------------------------------------------------


	def build_logs(records, include_intermenstrual):
	"""Collapse records to one entry per calendar day (keeping the max flow).

	Returns a list of per-day dicts sorted ascending by date.
	"""
	by_day = {}
	for rec in records:
	if rec["kind"] == INTERMENSTRUAL_TYPE and not include_intermenstrual:
	continue
	day = rec["day"]
	existing = by_day.get(day)
	if existing is None or rec["flow"] > existing["flow"]:
	by_day[day] = {
	"day": day,
	"dt": rec["dt"],
	"flow": rec["flow"],
	"intermenstrual": rec["kind"] == INTERMENSTRUAL_TYPE,
	}
	return [by_day[d] for d in sorted(by_day)]


	def iso_log_date(dt):
	"""Match Dart's DateTime.toIso8601String() shape, anchored at noon.

	Anchoring at noon (as Apple does) avoids any timezone-induced day shift
	when the value is later re-parsed by the app.
	"""
	return dt.strftime("%Y-%m-%dT12:00:00.000")


	def epoch_ms(dt):
	return int(dt.timestamp() * 1000)


	def assign_periods(days):
	"""Replicate Menstrudel's clustering of flow-days into periods.

	A new period starts whenever the gap to the previous flow-day is more than
	one day. Days with flow == none (0) and intermenstrual spotting are not part
	of any period (period_id = None).

	Mutates each day dict, adding 'log_id' and 'period_id', and returns the list
	of period dicts (with 'id', 'start_date', 'end_date', 'total_days').
	"""
	# Assign stable, 1-based log ids in date order.
	for i, d in enumerate(days, start=1):
	d["log_id"] = i
	d["period_id"] = None

	flow_days = [d for d in days if d["flow"] >= 1 and not d["intermenstrual"]]

	periods = []
	cluster = []

	def flush(cluster):
	if not cluster:
	return
	start, end = cluster[0], cluster[-1]
	pid = len(periods) + 1
	periods.append(
	{
	"id": pid,
	"start_date": epoch_ms(start["dt"]),
	"end_date": epoch_ms(end["dt"]),
	"total_days": (end["day"] - start["day"]).days + 1,
	}
	)
	for d in cluster:
	d["period_id"] = pid

	for d in flow_days:
	if not cluster:
	cluster = [d]
	elif (d["day"] - cluster[-1]["day"]).days > 1:
	flush(cluster)
	cluster = [d]
	else:
	cluster.append(d)
	flush(cluster)

	return periods


	def build_export(days, periods):
	period_logs = [
	{
	"id": d["log_id"],
	"date": iso_log_date(d["dt"]),
	"flow": d["flow"],
	"painLevel": 0,
	"period_id": d["period_id"],
	}
	for d in days
	]
	return {
	"periods": periods,
	"period_logs": period_logs,
	"log_symptoms": [], # Apple Health flow records carry no symptoms.
	"exported_at": datetime.now().astimezone().isoformat(),
	"app_version": APP_VERSION_TAG,
	"db_version": DB_VERSION,
	}


	# --- Main -----------------------------------------------------------------


	def main():
	ap = argparse.ArgumentParser(
	description="Convert an Apple Health export into a Menstrudel import file."
	)
	ap.add_argument(
	"input",
	nargs="?",
	default="Export.xml",
	help="Apple Health Export.xml, or the unzipped apple_health_export/ "
	"folder (default: ./Export.xml)",
	)
	ap.add_argument(
	"-o",
	"--output",
	help="Output JSON path (default: ./menstrudel_logsAndPeriods_<timestamp>.json)",
	)
	ap.add_argument(
	"--unspecified",
	choices=FLOW_NAMES,
	default="medium",
	help="How to map Apple's intensity-less 'unspecified' flow (default: medium)",
	)
	ap.add_argument(
	"--include-intermenstrual",
	action="store_true",
	help="Also import intermenstrual bleeding as standalone spotting logs",
	)
	args = ap.parse_args()

	xml_path = resolve_input(args.input)
	flow_map = build_flow_map(FLOW_INDEX[args.unspecified])

	try:
	records = list(read_records(xml_path, flow_map))
	except FileNotFoundError:
	sys.exit(f"error: input file not found: {xml_path}")
	except ET.ParseError as e:
	sys.exit(f"error: failed to parse XML: {e}")

	if not records:
	sys.exit("error: no menstrual-flow records found in the export.")

	days = build_logs(records, args.include_intermenstrual)
	periods = assign_periods(days)
	export = build_export(days, periods)

	out_path = (
	args.output or f"menstrudel_logsAndPeriods_{datetime.now():%Y%m%d_%H%M%S}.json"
	)
	with open(out_path, "w", encoding="utf-8") as f:
	json.dump(export, f, ensure_ascii=False)

	# Summary
	flow_counts = {}
	for d in days:
	flow_counts[d["flow"]] = flow_counts.get(d["flow"], 0) + 1

	print(f"Read {len(records)} menstrual records from {xml_path}")
	print(f"Wrote {out_path}")
	print(f" date range : {days[0]['day']} -> {days[-1]['day']}")
	print(f" log days : {len(days)}")
	print(f" periods : {len(periods)}")
	print(" flow breakdown:")
	for idx in sorted(flow_counts):
	print(f" {FLOW_NAMES[idx]:<9}: {flow_counts[idx]}")
	print(
	f"\nApple 'unspecified' flow was mapped to '{args.unspecified}' "
	"(change with --unspecified)."
	)
	print("Reminder: importing REPLACES existing Logs & Periods data in Menstrudel.")
	print(
	"Import via: Menstrudel -> Settings -> Data Management -> Import -> 'Logs & Periods'"
	)


	if __name__ == "__main__":
	main()