Created
November 30, 2022 14:09
-
-
Save mateuszkwiatkowski/21a296da982230e963b66842c1ae6643 to your computer and use it in GitHub Desktop.
Posthog cleanup
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import logging | |
import os | |
from datetime import datetime, timedelta | |
import django | |
django.setup() | |
from posthog.models import Event, ElementGroup, Person | |
from django.utils import timezone | |
max_age_days = int(os.getenv("POSTHOG_CLEANUP_OLDER_THAN_DAYS", 30)) | |
step_size = int(os.getenv("POSTHOG_CLEANUP_BATCH_SIZE", 100)) | |
dry_run = True if os.getenv("POSTHOG_CLEANUP_DRY_RUN", "False").lower() in ["true", "yes", "1"] else False | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %H:%M:%S ') | |
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "posthog.settings") | |
def get_events_older_than(older_than): | |
return Event.objects.filter(timestamp__lt=timezone.make_aware(datetime.now() - timedelta(older_than))).values_list( | |
'pk', flat=True) | |
def get_non_referenced_event_groups(): | |
event_group_hashes = Event.objects.all().values_list('elements_hash', flat=True) | |
return ElementGroup.objects.exclude(hash__in=list(event_group_hashes)) | |
def get_all_persons(): | |
return Person.objects.all() | |
def delete_items(item_type, items): | |
if dry_run: | |
logging.info("Skipping delete of items in dry run mode...") | |
return | |
item_type.objects.filter(id__in=list(items)).delete() | |
def delete_items_batched(item_type, items, logging_indent=6 * " "): | |
number_of_items = len(items) | |
logging.info("%sDeleting %d items of type %s using batches of %d size:", logging_indent, number_of_items, | |
item_type.__name__, step_size) | |
last_id = 0 | |
while last_id + step_size <= number_of_items: | |
delete_items(item_type, items[last_id:last_id + step_size]) | |
logging.info("%s %d%%", logging_indent, int(last_id / number_of_items * 100)) | |
last_id += step_size | |
delete_items(item_type, items[last_id:]) | |
logging.info("%s 100%%", logging_indent) | |
if __name__ == "__main__": | |
logging.info("Running cleanup of PostHog...") | |
start_time = datetime.now() | |
logging.info(" - Deleting all events older than %d days:", max_age_days) | |
delete_items_batched(Event, get_events_older_than(max_age_days)) | |
logging.info(" - Deleting all Person objects") | |
delete_items_batched(Person, get_all_persons()) | |
logging.info(" - Deleting all elements and element groups not referenced by any event anymore:") | |
delete_items_batched(ElementGroup, get_non_referenced_event_groups()) | |
logging.info("Cleanup finished, total duration: %s", datetime.now() - start_time) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment