Skip to content

Instantly share code, notes, and snippets.

@giang-pham
Forked from siiramone/download_sentry_data.py
Last active December 21, 2022 15:45
Show Gist options
  • Save giang-pham/a85b2d57f94ef5ce9e4e1e999593656f to your computer and use it in GitHub Desktop.
Save giang-pham/a85b2d57f94ef5ce9e4e1e999593656f to your computer and use it in GitHub Desktop.
Download sentry events with a query for a project. Useful for data processing.
"""Download sentry data.
usage:
1. create auth token (https://sentry.io/settings/account/api/auth-tokens/).
2. make a directory for export if not exists.
$ mkdir data
3. fix query (L29).
4. install python and run pip3 install requests then execute
$ python download_sentry_data.py <org>/<project> <auth_token>
5. a CSV file with a name formatted as "./data/issues_YYYYmmDDHHMMSS.csv" is created.
"""
import requests
import csv
import sys
import datetime
import re
if __name__ == '__main__':
dt_now = datetime.datetime.now()
now = dt_now.strftime('%Y%m%d%H%M%S')
with open('./data/issues_{0}.csv'.format(now), 'w', encoding='utf-8') as csvfile:
fieldnames = ['id', 'title', 'permalink']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames, extrasaction='ignore')
writer.writeheader()
url = f'https://app.getsentry.com/api/0/projects/{sys.argv[1]}/issues/'
org = sys.argv[1].split('/', 1)[0]
project = sys.argv[1].split('/', 1)[1]
print(f'org: {org}')
print(f'project: {project}')
# time-range absolute date
#params = {'query': 'event.timestamp:>=2020-04-01T00:00:00 event.timestamp:<2020-05-01T00:00:00'}
params = {'query': 'title:"*EBenFetchError*"'}
title_set = set()
title_regex1 = '\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{3}Z' #timestamp
# title_regex2 = 'org\/\d+/' #specific org
while True:
response = requests.get(
url,
params=params,
headers={'Authorization': 'Bearer {TOKEN}'.format(TOKEN=sys.argv[2])}
)
data = response.json()
for issue in data:
id = issue['id']
event_url = f'https://app.getsentry.com/api/0/issues/{id}/events/'
event_response = requests.get(
event_url,
headers={'Authorization': 'Bearer {TOKEN}'.format(TOKEN=sys.argv[2])}
)
print({event_url})
print({event_response})
# tags = {item['key']: item['value'] for item in event['tags']}
event_data = event_response.json()
for event in event_data:
event_id = event['id']
event_title = event['title']
event_title = re.sub(title_regex1, '', event_title)
# event_title = re.sub(title_regex2, '', event_title)
if event_title not in title_set:
title_set.add(event_title)
event_detail_url = f'https://app.getsentry.com/api/0/projects/{org}/{project}/events/{event_id}/'
event_detail_response = requests.get(
event_detail_url,
headers={'Authorization': 'Bearer {TOKEN}'.format(TOKEN=sys.argv[2])}
)
event_detail_data = event_detail_response.json()
long_event_title = event_detail_data['metadata']['value']
permalink = f'https://sentry.io/organizations/{org}/issues/{id}/events/{event_id}'
writer.writerow({'id': event_id,'title': long_event_title, 'permalink': permalink})
link = response.headers.get('Link')
print(f"Last event date: {data[-1]['lastSeen']}")
if link and 'rel="next"; results="true"' in link:
print("Getting next page...")
url = link.split()[4][1:-2]
else:
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment