Created
December 10, 2008 00:22
-
-
Save zerok/34160 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
This small script takes the URL to one of your Pownce Export XML-files | |
and downloads it including (as far as I can tell) all your files into a | |
folder, which you can specify with the -o option:: | |
$ python pownce-files.py <url_to_export> | |
Requirements: Python >= 2.5, < 3.0 | |
""" | |
from __future__ import with_statement | |
from xml.etree import ElementTree as ET | |
import os.path, os, logging, urllib2, sys, optparse | |
def main(folder, export_url): | |
in_note = False | |
in_file = False | |
current_url = None | |
current_filename = None | |
if not os.path.isdir(folder): | |
logging.info("Storage folder doesn't exist yet. Creating it now") | |
os.makedirs(folder) | |
fp = urllib2.urlopen(export_url) | |
export_file = os.path.join(folder, 'export.xml') | |
with open(export_file, 'w+') as file_: | |
while True: | |
data = fp.read(500000) | |
if data is "": | |
break | |
file_.write(data) | |
fp.close() | |
with open(export_file, 'r') as fp: | |
for event, elem in ET.iterparse(fp, events=('start', 'end')): | |
if event == 'start' and elem.tag == 'note': | |
in_note = True | |
if event == 'end' and elem.tag == 'note': | |
in_note = False | |
elem.clear() | |
if in_note and event == 'start' and elem.tag == 'file': | |
in_file = True | |
if in_note and event == 'end' and elem.tag == 'file': | |
in_file = False | |
fpath = os.path.join(folder, current_filename) | |
logging.info("Downloading %s into %s" % (current_filename,folder,)) | |
if os.path.exists(fpath): | |
logging.info("%s already downloaded" % (current_filename,)) | |
continue | |
input_ = urllib2.urlopen(current_url) | |
out = open(fpath, 'wb+') | |
try: | |
while True: | |
data = input_.read(500000) | |
if data is "": | |
break | |
out.write(data) | |
out.close() | |
input_.close() | |
except: | |
out.close() | |
input_.close() | |
os.unlink(fpath) | |
if in_file and event == 'end' and elem.tag == 'storage_name': | |
current_filename = elem.text | |
if in_file and event == 'end' and elem.tag == 'url': | |
current_url = elem.text | |
if __name__ == '__main__': | |
opts = optparse.OptionParser() | |
opts.add_option('-d', '--debug', action='store_true', dest='debug', | |
default=False, help='Enable debug output') | |
opts.add_option('-o', '--output-directory', action='store', | |
dest='directory', default='pownce-files', help='Directory used for all downloaded content') | |
options, args = opts.parse_args() | |
log_level = options.debug and logging.DEBUG or logging.INFO | |
logging.basicConfig(level=log_level) | |
if not len(args): | |
logging.error("You have to specify at least one argument for the URL of the export file") | |
sys.exit(1) | |
input_url = args[0] | |
main(options.directory, input_url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment