Forked from unbracketed/export_repo_issues_to_csv.py
Last active
December 18, 2023 20:20
-
-
Save marcelkornblum/21be3c13b2271d1d5a89bf08cbfa500e to your computer and use it in GitHub Desktop.
Export Issues from Github repo to CSV (API v3)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This is strongly based on https://gist.github.com/unbracketed/3380407; | |
thanks to @unbracketed and the various commenters on the page. | |
I've mainly cleaned up the code into basic methods, and included the | |
various suggestions in the comments. Hope this is useful to someone. | |
Make sure you have `requests` and `csv` installed via pip then run it: | |
`python export_gh_issues_to_csv.py` | |
--- | |
Exports Issues from a specified repository to a CSV file | |
Uses basic authentication (Github username + password) or token to retrieve Issues | |
from a repository that username has access to. Supports Github API v3. | |
""" | |
import csv | |
import requests | |
GITHUB_USER = '' | |
GITHUB_PASSWORD = '' | |
GITHUB_TOKEN = '' | |
REPO = '' # format is username/repo | |
ISSUES_FOR_REPO_URL = 'https://api.github.com/repos/%s/issues' % REPO | |
# Update your filter here. See https://developer.github.com/v3/issues/#list-issues-for-a-repository | |
# Note that filtering is powerful and there are lots of things available. Also that issues and PRs | |
# arrive in the same results set | |
params_payload = {'filter' : 'all', 'state' : 'open', 'type': 'issue' } | |
def write_issues(response, csvout): | |
"output a list of issues to csv" | |
print " : Writing %s issues" % len(response.json()) | |
for issue in response.json(): | |
labels = issue['labels'] | |
label_string = '' | |
for label in labels: | |
label_string = "%s, %s" % (label_string, label['name']) | |
label_string = label_string[2:] | |
csvout.writerow([issue['number'], issue['title'].encode('utf-8'), issue['body'].encode('utf-8'), label_string.encode('utf-8'), issue['created_at'], issue['updated_at']]) | |
def get_issues(url): | |
kwargs = { | |
'headers': { | |
'Content-Type': 'application/vnd.github.v3.raw+json', | |
'User-Agent': 'GitHub issue exporter' | |
}, | |
'params': params_payload | |
} | |
if GITHUB_TOKEN != '': | |
kwargs['headers']['Authorization'] = 'token %s' % GITHUB_TOKEN | |
else: | |
kwargs['auth'] = (GITHUB_USER, GITHUB_PASSWORD) | |
print "GET %s" % url | |
resp = requests.get(url, **kwargs) | |
print " : => %s" % resp.status_code | |
# import ipdb; ipdb.set_trace() | |
if resp.status_code != 200: | |
raise Exception(resp.status_code) | |
return resp | |
def next_page(response): | |
#more pages? examine the 'link' header returned | |
if 'link' in response.headers: | |
pages = dict( | |
[(rel[6:-1], url[url.index('<')+1:-1]) for url, rel in | |
[link.split(';') for link in | |
response.headers['link'].split(',')]]) | |
# import ipdb; ipdb.set_trace() | |
if 'last' in pages and 'next' in pages: | |
return pages['next'] | |
return None | |
def process(csvout, url=ISSUES_FOR_REPO_URL): | |
resp = get_issues(url) | |
write_issues(resp, csvout) | |
next_ = next_page(resp) | |
if next_ is not None: | |
process(csvout, next_) | |
def main(): | |
csvfile = '%s-issues.csv' % (REPO.replace('/', '-')) | |
csvout = csv.writer(open(csvfile, 'wb')) | |
csvout.writerow(('id', 'Title', 'Body', 'Labels', 'Created At', 'Updated At')) | |
process(csvout) | |
csvfile.close() | |
main() |
Thanks !
I have added extra columns like assignee ,milestone and changed the order of columns according to my requirement .
Find changes here .
assignees = issue['assignees']
assigne_string = ''
for asignee in assignees:
assigne_string = "%s %s" % (assigne_string, asignee['login'])
assigne_string = assigne_string[0:]
milestone_string='';
milestones = issue['milestone']
if isinstance(milestones, dict):
milestone_string = "%s %s" % (milestone_string, milestones['title'])
csvout.writerow([issue['number'], issue['title'].encode('utf-8'), issue['body'].encode('utf-8'), label_string.encode('utf-8'),assigne_string,milestone_string, issue['created_at'], issue['updated_at']])
Thanks for this very useful code.
I had to change the open parameter from 'wb' to 'w' otherwise I was getting the error "TypeError: a bytes-like object is required, not 'str'".
Hi Guys Here's the code for doing it, with a JSON Approeach
You will have to type the Jira Project Name and Key, and create it before hand.
I'm also adding all the comments from GitHub .
Using Python 3.5.4
import requests
import json
GITHUB_USER = ''
GITHUB_PASSWORD = ''
GITHUB_TOKEN = ''
REPO = '' # format is username/repo
ISSUES_FOR_REPO_URL = 'https://api.github.com/repos/%s/issues' % REPO
JIRA_PROJECT_NAME = "Example Name APP V1"
JIRA_PROJECT_KEY = "EXP"
DEFAULT_ISSUE_TYPE = "Bug"
# Update your filter here. See https://developer.github.com/v3/issues/#list-issues-for-a-repository
# Note that filtering is powerful and there are lots of things available. Also that issues and PRs
# arrive in the same results set
params_payload = {'filter' : 'all', 'state' : 'open', 'type': 'issue' }
def parse_json(response):
data = []
for full_issue in response.json():
# Issue Dict
labels = []
for label in full_issue['labels']:
labels.append(label['name'])
if full_issue['comments'] > 0:
comment_data = get_issues(full_issue['comments_url'])
for full_comment in comment_data.json():
body = '\n\n-------------------------------------------'
body += "\nGithub Comment from %s" % full_comment['user']['login']
body += '\n'+full_comment['body']
full_issue['body'] += body;
issue = {
# "priority":
"description": full_issue['body'],
# "status": "Closed",
# "reporter" : "alice",
"labels": labels,
# "watchers" : [ "bob" ],
# "issueType": DEFAULT_ISSUE_TYPE,
# "resolution": "Resolved",
"created": full_issue['created_at'],
"updated": full_issue['updated_at'],
# "affectedVersions" : [ "1.0" ],
"summary": full_issue['title'],
"assignee": full_issue['assignee'] and full_issue['assignee']['login'] or "",
# "fixedVersions" : [ "1.0", "2.0" ],
# components" : ["Component", "AnotherComponent"],
"externalId": full_issue['number'],
#"history": [
# {
# "author": "alice",
# "created": "2012-08-31T15:59:02.161+0100",
# "items": [
# {
# "fieldType": "jira",
# "field": "status",
# "from": "1",
# "fromString": "Open",
# "to": "5",
# "toString": "Resolved"
# }
# ]
# }
#],
#"customFieldValues": [
# {
# "fieldName": "Story Points",
# "fieldType": "com.atlassian.jira.plugin.system.customfieldtypes:float",
# "value": "15"
# },
# {
# "fieldName": "Business Value",
# "fieldType": "com.atlassian.jira.plugin.system.customfieldtypes:float",
# "value": "34"
# }
#],
#"attachments": [
# {
# "name": "battarang.jpg",
# "attacher": "admin",
# "created": "2012-08-31T17:59:02.161+0100",
# "uri": "http://optimus-prime/~batman/images/battarang.jpg",
# "description": "This is optimus prime"
# }
#]
}
data.append(issue)
return data
def get_issues(url):
kwargs = {
'headers': {
'Content-Type': 'application/vnd.github.v3.raw+json',
'User-Agent': 'GitHub issue exporter'
},
'params': params_payload
}
if GITHUB_TOKEN != '':
kwargs['headers']['Authorization'] = 'token %s' % GITHUB_TOKEN
else:
kwargs['auth'] = (GITHUB_USER, GITHUB_PASSWORD)
#print("GET %s" % url)
resp = requests.get(url, **kwargs)
print(" : => %s" % resp.status_code)
# print(vars(resp))
# print("RESPONSE : => %s" % resp)
# import ipdb; ipdb.set_trace()
if resp.status_code != 200:
raise Exception(resp.status_code)
return resp
def next_page(response):
#more pages? examine the 'link' header returned
if 'link' in response.headers:
pages = dict(
[(rel[6:-1], url[url.index('<')+1:-1]) for url, rel in
[link.split(';') for link in
response.headers['link'].split(',')]])
# import ipdb; ipdb.set_trace()
if 'last' in pages and 'next' in pages:
return pages['next']
return None
def process_json(jdata, url=ISSUES_FOR_REPO_URL):
resp = get_issues(url)
jdata['projects'][0]['issues'] += parse_json(resp)
next_ = next_page(resp)
if next_ is not None:
return process_json(jdata, next_)
else:
return jdata
def main():
file_name = '%s-issues.json' % (REPO.replace('/', '-'))
data = {
'projects': [
{
'name': JIRA_PROJECT_NAME,
'key': JIRA_PROJECT_KEY,
'issues': []
}
]
}
data = process_json(data)
with open(file_name, 'w') as outfile:
json.dump(data, outfile)
main()
how to get projects part of this JSON objects?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I ended up using this as I wanted to dump a whole organization's repos: https://github.com/josegonzalez/python-github-backup