Last active
December 22, 2016 05:48
-
-
Save karlcow/cdc0075a334790aeefada4a6b8f9292e to your computer and use it in GitHub Desktop.
take a dump of Github JSON issues data on https://github.com/webcompat/web-bugs/ See issue https://github.com/webcompat/webcompat.com/issues/1233
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
RESULTS = ''' | |
new format: {new} | |
old format: {old} # can't conclude anything | |
--------------------- | |
INVALID: On {invalid} > empty: {empty_invalid} | |
VALID: On {valid} > empty: {empty_valid} | |
''' | |
def body_trim(body): | |
'''Extract only the string starting with Expected Behavior.''' | |
search = body.partition('**Steps to Reproduce**')[2] | |
search = search.replace('\n', '').replace('\r', '') | |
return search | |
def is_empty(reduced_body): | |
'''Check if the issue has an empty description.''' | |
new_format = True | |
emptiness = False | |
if 'Expected Behavior' in reduced_body: | |
# More chances to be without description | |
if 'Expected Behavior:Actual Behavior:' in reduced_body: | |
desc = reduced_body.partition('Expected Behavior') | |
# but the description can be after actual behavior | |
if desc[2] == ':Actual Behavior:': | |
emptiness = True | |
# or the description can be in the steps to reproduce | |
if desc[0].endswith('2) \xe2\x80\xa6'): | |
emptiness = True | |
else: | |
emptiness = False | |
else: | |
new_format = False | |
emptiness = False | |
return emptiness, new_format | |
with open('issues.json') as issuesdb: | |
issues = json.load(issuesdb) | |
old_counter, new_counter = 0, 0 | |
invalid, valid = 0, 0 | |
empty_invalid, empty_valid = 0, 0 | |
for issue in issues: | |
status_list = [label['name'] for label in issue['labels'] | |
if label['name'].startswith('status-')] | |
# We just need the part of the body which starts after Steps to reproduce | |
body = issue['body'].encode('utf-8') | |
reduced_body = body_trim(body) | |
# Checking for invalid issues only | |
if 'status-invalid' in status_list: | |
invalid += 1 | |
invalid_empty, new_format = is_empty(reduced_body) | |
if invalid_empty: | |
empty_invalid += 1 | |
# bug is considered valid | |
else: | |
valid += 1 | |
valid_empty, new_format = is_empty(reduced_body) | |
if valid_empty: | |
empty_valid += 1 | |
if new_format: | |
new_counter += 1 | |
else: | |
old_counter += 1 | |
print(RESULTS.format( | |
new=new_counter, old=old_counter, | |
invalid=invalid, valid=valid, | |
empty_invalid=empty_invalid, empty_valid=empty_valid)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment