Skip to content

Instantly share code, notes, and snippets.

@tomoconnor
Created September 14, 2011 15:55
Show Gist options
  • Select an option

  • Save tomoconnor/1216940 to your computer and use it in GitHub Desktop.

Select an option

Save tomoconnor/1216940 to your computer and use it in GitHub Desktop.
WikiArchiver v2 - Requires gdata (google api library)
import gdata.docs.data
import gdata.docs.client
import gdata
import os
import datetime
import mimetypes
import logging
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
fh = logging.FileHandler('output.log')
fh.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.ERROR)
logger.addHandler(ch)
logger.addHandler(fh)
client = gdata.docs.client.DocsClient(source='wikiDocker-v2')
client.ssl = True
client.http_client.debug = False
EXPORT_DIR = "<wget --mirror root>"
uploaded_files = []
client.ClientLogin('<google id>', '<google password>', client.source)
mimetypes.init()
def handleFile(p,f,folder):
logger.info("FILE: " + p)
def handleObject(p,f,folder):
if f in uploaded_files:
logger.info("DUPLICATE: " + f)
return False
mime = mimetypes.guess_type(p)[0]
if mime is None:
mime = 'text/html'
ms = gdata.data.MediaSource(file_path=p,content_type=mime)
try:
new_page = client.Upload(ms,f)
client.move(new_page,folder)
uploaded_files.append(f)
except:
logger.info("FAILED: " + p)
return False
return True
now = datetime.datetime.now()
datestring = "%d-%d-%d-%d%d%d" % (now.year,now.month,now.day,now.hour,now.minute,now.second)
folder_id = "WikiArchive-%s" % datestring
wiki_folder = client.Create(gdata.docs.data.FOLDER_LABEL, folder_id)
logger.info('Folder "%s" created' % wiki_folder.title.text)
for dirname, dirnames, filenames in os.walk(EXPORT_DIR):
for subdirname in dirnames:
for filename in filenames:
fpath = os.path.join(dirname, filename)
logger.debug("PROCESSING: "+ fpath)
if not 'Category:' in fpath:
if not 'Template:' in fpath:
if not 'Special:' in fpath:
if not 'User:' in fpath:
if not 'User_talk:' in fpath:
if 'Image:' in fpath:
handleFile(fpath,filename,wiki_folder)
elif 'File:' in fpath:
handleFile(fpath,filename,wiki_folder)
else:
handleObject(fpath,filename,wiki_folder)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment