Created
December 11, 2017 18:14
-
-
Save twerp/95f087e2e11ca9badbc96160004e680e to your computer and use it in GitHub Desktop.
WIP tool to "analyze" dokuwiki contents (currently prints out folders, pages and page revisions)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import datetime | |
TOPFOLDER = r'..\wiki\data\attic' | |
class Folder(): | |
_page_blacklist = ['sidebar'] | |
blacklist = ['tag', 'testi', 'wiki'] | |
def __init__(self, name, parent=None): | |
self.name = name if name else '<root>' | |
self.parent = parent | |
self.subfolders = [] | |
self._pages = [] | |
def __str__(self): | |
return self.name | |
def __repr__(self): | |
return self.name | |
def getpage(self, pagename): | |
page = None | |
for p in self._pages: | |
if p.name == pagename: | |
page = p | |
break | |
return page | |
def addpage(self, page): | |
if page not in self._pages and page.name not in Folder._page_blacklist: | |
self._pages.append(page) | |
def printpages(self): | |
print(self._pages) | |
def getpages(self): | |
for p in self._pages: | |
yield p | |
def printfiles(self, pagename): | |
page = self.getpage(pagename) | |
print(page.files) | |
def getfolder(self, foldername): | |
folder = self | |
for f in self.subfolders: | |
if f.name == foldername: | |
folder = f | |
break | |
return folder | |
def printall(self): | |
if self.parent: | |
print("".join([self.parent.name, ':', self.name])) | |
else: | |
print(self) | |
self.printpages() | |
for f in self.subfolders: | |
f.printall() | |
def findfolder(self, names): | |
# names = ['web-ohjelmointi', 'javascript'] | |
for f in self.subfolders: | |
if f.name == names[0]: | |
names = names[1:] | |
if names: | |
return f.findfolder(names) | |
else: | |
return f | |
class Page(): | |
def __init__(self, name, folder=None): | |
self.name = name | |
self.folder = folder | |
self.files = [] | |
def __str__(self): | |
return "".join([self.name, ' (', str(len(self.files)), ')']) | |
def __repr__(self): | |
return "".join([self.name, ' (', str(len(self.files)), ')']) | |
class File(): | |
def __init__(self, name, date=None, page=None): | |
self.name = name | |
self.date = date | |
self.page = page | |
def __str__(self): | |
return self.name | |
def __repr__(self): | |
return self.name | |
if __name__ == '__main__': | |
first = None | |
rootfolder = None | |
for root, dirs, files in os.walk(TOPFOLDER): | |
foldername = root[len(TOPFOLDER)+1:] | |
if foldername in Folder.blacklist: | |
continue | |
if first: | |
rootfolder = first.findfolder(foldername.split('\\')) | |
else: | |
rootfolder = Folder(foldername) | |
first = rootfolder | |
for d in dirs: | |
if d in Folder.blacklist: | |
continue | |
folder = Folder(d, parent=rootfolder) | |
rootfolder.subfolders.append(folder) | |
for f in files: | |
if not f.endswith('.txt.gz'): | |
continue | |
pagename = f[:f.index('.')] | |
page = rootfolder.getpage(pagename) | |
if page: | |
file = File(f, page=page) | |
page.files.append(file) | |
else: | |
folder = rootfolder.getfolder(pagename) | |
page = Page(pagename, folder=folder) | |
rootfolder.addpage(page) | |
first.printall() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment