Skip to content

Instantly share code, notes, and snippets.

@frinux
Last active June 12, 2025 13:25
Show Gist options
  • Save frinux/eae886fa8844048d66b5ef447c9798d0 to your computer and use it in GitHub Desktop.
Save frinux/eae886fa8844048d66b5ef447c9798d0 to your computer and use it in GitHub Desktop.
Detect and merge users having the same email in PostHog, using $merge_dangerously according to documentation (https://posthog.com/docs/product-analytics/identify)
import requests
from collections import defaultdict
import argparse
import sys
import csv
# CONFIGURATION
POSTHOG_API_URL = "https://eu.i.posthog.com" # Or your self-hosted PostHog URL
# 1. Get all users (persons) from PostHog
def get_all_users(project_id, personal_api_key):
users = []
url = f"{POSTHOG_API_URL}/api/person/?project_id={project_id}"
headers = {"Authorization": f"Bearer {personal_api_key}"}
while url:
resp = requests.get(url, headers=headers)
resp.raise_for_status()
data = resp.json()
users.extend(data["results"])
url = data.get("next")
if url and 'project_id=' not in url:
# Ensure project_id is always present in pagination
url += f"&project_id={project_id}"
return users
# 2. Group users by email
def group_users_by_email(users):
email_to_distinct_ids = defaultdict(list)
for user in users:
emails = user.get("properties", {}).get("email")
if not emails:
continue
# emails can be a list or a string
if isinstance(emails, str):
emails = [emails]
for email in emails:
for distinct_id in user.get("distinct_ids", []):
email_to_distinct_ids[email].append(distinct_id)
return email_to_distinct_ids
# 3. Merge users with the same email
def merge_users(email_to_distinct_ids, project_api_key, dry_run=False):
csv_rows = []
for email, distinct_ids in email_to_distinct_ids.items():
if len(distinct_ids) < 2:
continue # Nothing to merge
print(f"Merging {len(distinct_ids)} users for email: {email}")
main_id = distinct_ids[0]
for alias_id in distinct_ids[1:]:
if dry_run:
print(f"[DRY RUN] Would merge {alias_id} into {main_id}")
csv_rows.append({"email": email, "main_id": main_id, "alias_id": alias_id})
else:
send_merge_event(main_id, alias_id, project_api_key)
if dry_run and csv_rows:
with open("dry_run_merges.csv", "w", newline="") as csvfile:
fieldnames = ["email", "main_id", "alias_id"]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for row in csv_rows:
writer.writerow(row)
# 4. Send $merge_dangerously event
def send_merge_event(main_id, alias_id, project_api_key):
url = f"{POSTHOG_API_URL}/capture/"
payload = {
"api_key": project_api_key,
"distinct_id": main_id,
"event": "$merge_dangerously",
"properties": {
"alias": alias_id
}
}
resp = requests.post(url, json=payload)
if resp.status_code == 200:
print(f"Successfully merged {alias_id} into {main_id}")
else:
print(f"Failed to merge {alias_id} into {main_id}: {resp.text}")
def main():
parser = argparse.ArgumentParser(description="Merge PostHog users with the same email.")
parser.add_argument('--dry-run', action='store_true', help='Show what would be merged without applying changes')
parser.add_argument('--project-id', required=True, help='PostHog project ID to operate on (required)')
parser.add_argument('--personal-api-key', required=True, help='Personal API key for retrieving users (required)')
parser.add_argument('--project-api-key', required=True, help='Project API key for sending merge events (required)')
args = parser.parse_args()
users = get_all_users(args.project_id, args.personal_api_key)
email_to_distinct_ids = group_users_by_email(users)
merge_users(email_to_distinct_ids, args.project_api_key, dry_run=args.dry_run)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment