Created
June 7, 2013 05:32
-
-
Save whosaysni/5727233 to your computer and use it in GitHub Desktop.
Fetching issues feed from GoogleCode hosted project.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import os | |
import sys | |
import urllib | |
import tarfile | |
import time | |
from xml.etree import ElementTree as ET | |
from StringIO import StringIO | |
PROJECT_NAME = 'sahanadocsjp' | |
def FileTarInfo(name, buf, mode='0755', mtime=None, type_=tarfile.REGTYPE): | |
"""Creates TarInfo for given name/buf. | |
""" | |
tar_info = tarfile.TarInfo(name) | |
tar_info.size = len(buf) | |
if mtime is None: | |
mtime = time.time() | |
tar_info.mtime = mtime | |
tar_info.mode = int(mode, 8) | |
tar_info.type = type_ | |
tar_info.uid = os.getuid() | |
tar_info.gid = os.getgid() | |
return tar_info | |
def do_job(project_name, archive_filename=None, **kargs): | |
"""Creates issues archive for specified project_name. | |
""" | |
# preparation | |
base_url = ( | |
'https://code.google.com/feeds/issues/p/%s/issues/' %(project_name)) | |
issues_query_bits = 'full?max-results=200' | |
# url for issue feed | |
issues_url = base_url+issues_query_bits | |
issue_comments_query_bits = '%s/comments/full' | |
# url template for issue comments feed | |
issue_comments_url_t = base_url+issue_comments_query_bits | |
atom_namespace_prefix = '{http://www.w3.org/2005/Atom}' | |
atom_entry_tagname = atom_namespace_prefix+'entry' | |
# xpath for issue_id search | |
atom_id_xpath= '/'.join([atom_entry_tagname, atom_namespace_prefix+'id']) | |
# archive_filename | |
if bool(archive_filename)==False: | |
archive_filename = '%s_issues.tgz' %(project_name) | |
# load issue feed | |
sys.stderr.write('Loading issues: %s\n' %(issues_url)) | |
issues_feed_buf = urllib.urlopen(issues_url).read() | |
archive_file = tarfile.open(archive_filename, 'w:gz') | |
issues_file = StringIO(issues_feed_buf) | |
issues_info = FileTarInfo('issues.xml', issues_feed_buf) | |
archive_file.addfile(issues_info, issues_file) | |
sys.stderr.write('Saved issues.xml\n') | |
# analyze issues tree to extract entry ids | |
issues_tree = ET.fromstring(issues_feed_buf) | |
entries = issues_tree.findall(atom_id_xpath) | |
sys.stderr.write('Found %d entries.\n' %(len(entries))) | |
for entry in entries: | |
issue_feed_path = entry.text | |
issue_id_bits = issue_feed_path[len(base_url+'full'):] | |
# load | |
comments_url = issue_comments_url_t %(issue_id_bits) | |
sys.stderr.write( | |
'Loading comments for issue %s (%s)...\n' | |
%(issue_id_bits, comments_url)) | |
comments_feed_buf = urllib.urlopen(comments_url).read() | |
# write | |
comments_filename = 'issue_comments_%s.xml' %(issue_id_bits) | |
comments_file = StringIO(comments_feed_buf) | |
comments_info = FileTarInfo(comments_filename, comments_feed_buf) | |
archive_file.addfile(comments_info, comments_file) | |
sys.stderr.write('Saved %s\n' %(comments_filename)) | |
archive_file.close() | |
if __name__=='__main__': | |
if sys.argv[1:2]: | |
do_job(*sys.argv[1:]) | |
else: | |
sys.stderr.write('Usage: %s <project_name> [archive_filename]\n' %sys.argv[0]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment