-
-
Save marcelkornblum/21be3c13b2271d1d5a89bf08cbfa500e to your computer and use it in GitHub Desktop.
""" | |
This is strongly based on https://gist.github.com/unbracketed/3380407; | |
thanks to @unbracketed and the various commenters on the page. | |
I've mainly cleaned up the code into basic methods, and included the | |
various suggestions in the comments. Hope this is useful to someone. | |
Make sure you have `requests` and `csv` installed via pip then run it: | |
`python export_gh_issues_to_csv.py` | |
--- | |
Exports Issues from a specified repository to a CSV file | |
Uses basic authentication (Github username + password) or token to retrieve Issues | |
from a repository that username has access to. Supports Github API v3. | |
""" | |
import csv | |
import requests | |
GITHUB_USER = '' | |
GITHUB_PASSWORD = '' | |
GITHUB_TOKEN = '' | |
REPO = '' # format is username/repo | |
ISSUES_FOR_REPO_URL = 'https://api.github.com/repos/%s/issues' % REPO | |
# Update your filter here. See https://developer.github.com/v3/issues/#list-issues-for-a-repository | |
# Note that filtering is powerful and there are lots of things available. Also that issues and PRs | |
# arrive in the same results set | |
params_payload = {'filter' : 'all', 'state' : 'open', 'type': 'issue' } | |
def write_issues(response, csvout): | |
"output a list of issues to csv" | |
print " : Writing %s issues" % len(response.json()) | |
for issue in response.json(): | |
labels = issue['labels'] | |
label_string = '' | |
for label in labels: | |
label_string = "%s, %s" % (label_string, label['name']) | |
label_string = label_string[2:] | |
csvout.writerow([issue['number'], issue['title'].encode('utf-8'), issue['body'].encode('utf-8'), label_string.encode('utf-8'), issue['created_at'], issue['updated_at']]) | |
def get_issues(url): | |
kwargs = { | |
'headers': { | |
'Content-Type': 'application/vnd.github.v3.raw+json', | |
'User-Agent': 'GitHub issue exporter' | |
}, | |
'params': params_payload | |
} | |
if GITHUB_TOKEN != '': | |
kwargs['headers']['Authorization'] = 'token %s' % GITHUB_TOKEN | |
else: | |
kwargs['auth'] = (GITHUB_USER, GITHUB_PASSWORD) | |
print "GET %s" % url | |
resp = requests.get(url, **kwargs) | |
print " : => %s" % resp.status_code | |
# import ipdb; ipdb.set_trace() | |
if resp.status_code != 200: | |
raise Exception(resp.status_code) | |
return resp | |
def next_page(response): | |
#more pages? examine the 'link' header returned | |
if 'link' in response.headers: | |
pages = dict( | |
[(rel[6:-1], url[url.index('<')+1:-1]) for url, rel in | |
[link.split(';') for link in | |
response.headers['link'].split(',')]]) | |
# import ipdb; ipdb.set_trace() | |
if 'last' in pages and 'next' in pages: | |
return pages['next'] | |
return None | |
def process(csvout, url=ISSUES_FOR_REPO_URL): | |
resp = get_issues(url) | |
write_issues(resp, csvout) | |
next_ = next_page(resp) | |
if next_ is not None: | |
process(csvout, next_) | |
def main(): | |
csvfile = '%s-issues.csv' % (REPO.replace('/', '-')) | |
csvout = csv.writer(open(csvfile, 'wb')) | |
csvout.writerow(('id', 'Title', 'Body', 'Labels', 'Created At', 'Updated At')) | |
process(csvout) | |
csvfile.close() | |
main() |
I also had a slight issue that was pretty easy to solve.
ISSUES_FOR_REPO_URL = 'https://api.github.com/repos/%s/issues' % REPO
This gave me an error, but when I removed % REPO it was fine. Looks like maybe a comment that didn't get properly deleted?
I am getting :
simplejson.scanner.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
Any help here would really be appreciated.
The full traceback :
Traceback (most recent call last):
File "extract_gitcsv.py", line 97, in
main()
File "extract_gitcsv.py", line 92, in main
process(csvout)
File "extract_gitcsv.py", line 82, in process
write_issues(resp, csvout)
File "extract_gitcsv.py", line 28, in write_issues
print " : Writing %s issues" % len(response.json())
File "/Library/Python/2.7/site-packages/requests-2.14.2-py2.7.egg/requests/models.py", line 885, in json
return complexjson.loads(self.text, **kwargs)
File "/Library/Python/2.7/site-packages/simplejson/init.py", line 516, in loads
return _default_decoder.decode(s)
File "/Library/Python/2.7/site-packages/simplejson/decoder.py", line 370, in decode
obj, end = self.raw_decode(s)
File "/Library/Python/2.7/site-packages/simplejson/decoder.py", line 400, in raw_decode
return self.scan_once(s, idx=_w(s, idx).end())
simplejson.scanner.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
Got this error:
GET https://api.github.com/repos/foo/bar/issues
: => 200
: Writing 1 issues
Traceback (most recent call last):
File "issue_copy.py", line 95, in <module>
main()
File "issue_copy.py", line 91, in main
csvfile.close()
AttributeError: 'str' object has no attribute 'close'
Fixed by replacing this:
def main():
csvfile = '%s-issues.csv' % (REPO.replace('/', '-'))
csvout = csv.writer(open(csvfile, 'wb'))
csvout.writerow(('id', 'Title', 'Body', 'Labels', 'Created At', 'Updated At'))
process(csvout)
csvfile.close()
With this:
def main():
csvfile = '%s-issues.csv' % (REPO.replace('/', '-'))
with open(csvfile, 'wb') as f:
csvout = csv.writer(f)
csvout.writerow(('id', 'Title', 'Body', 'Labels', 'Created At', 'Updated At'))
process(csvout)
In any case I'm going to use something else as this only dumps the initial issue and not any comments/discussion that follows.
I ended up using this as I wanted to dump a whole organization's repos: https://github.com/josegonzalez/python-github-backup
Thanks !
I have added extra columns like assignee ,milestone and changed the order of columns according to my requirement .
Find changes here .
assignees = issue['assignees']
assigne_string = ''
for asignee in assignees:
assigne_string = "%s %s" % (assigne_string, asignee['login'])
assigne_string = assigne_string[0:]
milestone_string='';
milestones = issue['milestone']
if isinstance(milestones, dict):
milestone_string = "%s %s" % (milestone_string, milestones['title'])
csvout.writerow([issue['number'], issue['title'].encode('utf-8'), issue['body'].encode('utf-8'), label_string.encode('utf-8'),assigne_string,milestone_string, issue['created_at'], issue['updated_at']])
Thanks for this very useful code.
I had to change the open parameter from 'wb' to 'w' otherwise I was getting the error "TypeError: a bytes-like object is required, not 'str'".
Hi Guys Here's the code for doing it, with a JSON Approeach
You will have to type the Jira Project Name and Key, and create it before hand.
I'm also adding all the comments from GitHub .
Using Python 3.5.4
import requests
import json
GITHUB_USER = ''
GITHUB_PASSWORD = ''
GITHUB_TOKEN = ''
REPO = '' # format is username/repo
ISSUES_FOR_REPO_URL = 'https://api.github.com/repos/%s/issues' % REPO
JIRA_PROJECT_NAME = "Example Name APP V1"
JIRA_PROJECT_KEY = "EXP"
DEFAULT_ISSUE_TYPE = "Bug"
# Update your filter here. See https://developer.github.com/v3/issues/#list-issues-for-a-repository
# Note that filtering is powerful and there are lots of things available. Also that issues and PRs
# arrive in the same results set
params_payload = {'filter' : 'all', 'state' : 'open', 'type': 'issue' }
def parse_json(response):
data = []
for full_issue in response.json():
# Issue Dict
labels = []
for label in full_issue['labels']:
labels.append(label['name'])
if full_issue['comments'] > 0:
comment_data = get_issues(full_issue['comments_url'])
for full_comment in comment_data.json():
body = '\n\n-------------------------------------------'
body += "\nGithub Comment from %s" % full_comment['user']['login']
body += '\n'+full_comment['body']
full_issue['body'] += body;
issue = {
# "priority":
"description": full_issue['body'],
# "status": "Closed",
# "reporter" : "alice",
"labels": labels,
# "watchers" : [ "bob" ],
# "issueType": DEFAULT_ISSUE_TYPE,
# "resolution": "Resolved",
"created": full_issue['created_at'],
"updated": full_issue['updated_at'],
# "affectedVersions" : [ "1.0" ],
"summary": full_issue['title'],
"assignee": full_issue['assignee'] and full_issue['assignee']['login'] or "",
# "fixedVersions" : [ "1.0", "2.0" ],
# components" : ["Component", "AnotherComponent"],
"externalId": full_issue['number'],
#"history": [
# {
# "author": "alice",
# "created": "2012-08-31T15:59:02.161+0100",
# "items": [
# {
# "fieldType": "jira",
# "field": "status",
# "from": "1",
# "fromString": "Open",
# "to": "5",
# "toString": "Resolved"
# }
# ]
# }
#],
#"customFieldValues": [
# {
# "fieldName": "Story Points",
# "fieldType": "com.atlassian.jira.plugin.system.customfieldtypes:float",
# "value": "15"
# },
# {
# "fieldName": "Business Value",
# "fieldType": "com.atlassian.jira.plugin.system.customfieldtypes:float",
# "value": "34"
# }
#],
#"attachments": [
# {
# "name": "battarang.jpg",
# "attacher": "admin",
# "created": "2012-08-31T17:59:02.161+0100",
# "uri": "http://optimus-prime/~batman/images/battarang.jpg",
# "description": "This is optimus prime"
# }
#]
}
data.append(issue)
return data
def get_issues(url):
kwargs = {
'headers': {
'Content-Type': 'application/vnd.github.v3.raw+json',
'User-Agent': 'GitHub issue exporter'
},
'params': params_payload
}
if GITHUB_TOKEN != '':
kwargs['headers']['Authorization'] = 'token %s' % GITHUB_TOKEN
else:
kwargs['auth'] = (GITHUB_USER, GITHUB_PASSWORD)
#print("GET %s" % url)
resp = requests.get(url, **kwargs)
print(" : => %s" % resp.status_code)
# print(vars(resp))
# print("RESPONSE : => %s" % resp)
# import ipdb; ipdb.set_trace()
if resp.status_code != 200:
raise Exception(resp.status_code)
return resp
def next_page(response):
#more pages? examine the 'link' header returned
if 'link' in response.headers:
pages = dict(
[(rel[6:-1], url[url.index('<')+1:-1]) for url, rel in
[link.split(';') for link in
response.headers['link'].split(',')]])
# import ipdb; ipdb.set_trace()
if 'last' in pages and 'next' in pages:
return pages['next']
return None
def process_json(jdata, url=ISSUES_FOR_REPO_URL):
resp = get_issues(url)
jdata['projects'][0]['issues'] += parse_json(resp)
next_ = next_page(resp)
if next_ is not None:
return process_json(jdata, next_)
else:
return jdata
def main():
file_name = '%s-issues.json' % (REPO.replace('/', '-'))
data = {
'projects': [
{
'name': JIRA_PROJECT_NAME,
'key': JIRA_PROJECT_KEY,
'issues': []
}
]
}
data = process_json(data)
with open(file_name, 'w') as outfile:
json.dump(data, outfile)
main()
how to get projects part of this JSON objects?
Thanks! I had to make some changes to get it to work (worked backwards from issues guessing).
Also since body could be None
Also
#csvfile.close()
gave trouble, so just commented it out!