Last active
June 12, 2025 13:25
-
-
Save frinux/eae886fa8844048d66b5ef447c9798d0 to your computer and use it in GitHub Desktop.
Detect and merge users having the same email in PostHog, using $merge_dangerously according to documentation (https://posthog.com/docs/product-analytics/identify)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from collections import defaultdict | |
import argparse | |
import sys | |
import csv | |
# CONFIGURATION | |
POSTHOG_API_URL = "https://eu.i.posthog.com" # Or your self-hosted PostHog URL | |
# 1. Get all users (persons) from PostHog | |
def get_all_users(project_id, personal_api_key): | |
users = [] | |
url = f"{POSTHOG_API_URL}/api/person/?project_id={project_id}" | |
headers = {"Authorization": f"Bearer {personal_api_key}"} | |
while url: | |
resp = requests.get(url, headers=headers) | |
resp.raise_for_status() | |
data = resp.json() | |
users.extend(data["results"]) | |
url = data.get("next") | |
if url and 'project_id=' not in url: | |
# Ensure project_id is always present in pagination | |
url += f"&project_id={project_id}" | |
return users | |
# 2. Group users by email | |
def group_users_by_email(users): | |
email_to_distinct_ids = defaultdict(list) | |
for user in users: | |
emails = user.get("properties", {}).get("email") | |
if not emails: | |
continue | |
# emails can be a list or a string | |
if isinstance(emails, str): | |
emails = [emails] | |
for email in emails: | |
for distinct_id in user.get("distinct_ids", []): | |
email_to_distinct_ids[email].append(distinct_id) | |
return email_to_distinct_ids | |
# 3. Merge users with the same email | |
def merge_users(email_to_distinct_ids, project_api_key, dry_run=False): | |
csv_rows = [] | |
for email, distinct_ids in email_to_distinct_ids.items(): | |
if len(distinct_ids) < 2: | |
continue # Nothing to merge | |
print(f"Merging {len(distinct_ids)} users for email: {email}") | |
main_id = distinct_ids[0] | |
for alias_id in distinct_ids[1:]: | |
if dry_run: | |
print(f"[DRY RUN] Would merge {alias_id} into {main_id}") | |
csv_rows.append({"email": email, "main_id": main_id, "alias_id": alias_id}) | |
else: | |
send_merge_event(main_id, alias_id, project_api_key) | |
if dry_run and csv_rows: | |
with open("dry_run_merges.csv", "w", newline="") as csvfile: | |
fieldnames = ["email", "main_id", "alias_id"] | |
writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | |
writer.writeheader() | |
for row in csv_rows: | |
writer.writerow(row) | |
# 4. Send $merge_dangerously event | |
def send_merge_event(main_id, alias_id, project_api_key): | |
url = f"{POSTHOG_API_URL}/capture/" | |
payload = { | |
"api_key": project_api_key, | |
"distinct_id": main_id, | |
"event": "$merge_dangerously", | |
"properties": { | |
"alias": alias_id | |
} | |
} | |
resp = requests.post(url, json=payload) | |
if resp.status_code == 200: | |
print(f"Successfully merged {alias_id} into {main_id}") | |
else: | |
print(f"Failed to merge {alias_id} into {main_id}: {resp.text}") | |
def main(): | |
parser = argparse.ArgumentParser(description="Merge PostHog users with the same email.") | |
parser.add_argument('--dry-run', action='store_true', help='Show what would be merged without applying changes') | |
parser.add_argument('--project-id', required=True, help='PostHog project ID to operate on (required)') | |
parser.add_argument('--personal-api-key', required=True, help='Personal API key for retrieving users (required)') | |
parser.add_argument('--project-api-key', required=True, help='Project API key for sending merge events (required)') | |
args = parser.parse_args() | |
users = get_all_users(args.project_id, args.personal_api_key) | |
email_to_distinct_ids = group_users_by_email(users) | |
merge_users(email_to_distinct_ids, args.project_api_key, dry_run=args.dry_run) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment