|
# -*- coding: utf-8 -*- |
|
|
|
import argparse |
|
import re |
|
import sys |
|
|
|
|
|
class Outline: |
|
""" Outline top item. """ |
|
def __init__(self): |
|
self.children = [] |
|
# todo, last child which can be used without deep search |
|
|
|
def append_child(self, item): |
|
if item.level == 1: |
|
self.children.append(item) |
|
else: |
|
parent = self.get_last_child() |
|
# find |
|
parent = self._find_child(parent, item.level -1) |
|
parent.children.append(item) |
|
|
|
def get_last_child(self): |
|
if len(self.children) == 0: |
|
item = Item(1, "", "") |
|
self.children.append(item) |
|
return self.children[-1] |
|
|
|
def _find_child(self, parent, level): |
|
if parent.level == level: |
|
return parent |
|
item = parent.get_last_child() |
|
if item is None: |
|
item = Item(parent.level +1, "", "") |
|
parent.append_child(item) |
|
if item.level == level: |
|
return item |
|
return self._find_child(item, item.level +1) |
|
|
|
|
|
class Item: |
|
""" Individual entry in outline entries. """ |
|
def __init__(self, level, title, text=None): |
|
self.level = level |
|
self.title = title |
|
self.text = text |
|
self.children = [] |
|
|
|
def append_child(self, child): |
|
self.children.append(child) |
|
|
|
def get_last_child(self): |
|
if len(self.children) == 0: |
|
return None |
|
else: |
|
return self.children[-1] |
|
|
|
def __str__(self): |
|
return "<Item lv=\"{}\" title=\"{}\" text=\"{}\">".format( |
|
self.level, self.title, self.text[0:10]) |
|
|
|
|
|
class Writer: |
|
""" Writer base. """ |
|
pass |
|
|
|
|
|
class OPMLWriter(Writer): |
|
""" Writer for OPML file. """ |
|
|
|
def __init__(self, title_attr="title", text_attr="text"): |
|
self.title_attr = title_attr |
|
self.text_attr = text_attr |
|
|
|
def open(self, f): |
|
self.f = f |
|
from xml.sax.xmlreader import AttributesImpl |
|
self.AttributesImpl = AttributesImpl |
|
import xml.sax.saxutils |
|
self.x = xml.sax.saxutils.XMLGenerator(self.f, encoding="utf-8") |
|
x = self.x |
|
x.startDocument() |
|
|
|
x.startElement("opml", AttributesImpl({"version": "2.0"})) |
|
x.startElement("head", {}) |
|
# todo head contents |
|
x.startElement("title", {}) |
|
|
|
x.endElement("title") |
|
#x.startElement("dateCreated", {}) |
|
#x.endElement("dateCreated") |
|
x.endElement("head") |
|
x.startElement("body", {}) |
|
|
|
def close(self): |
|
self.x.endElement("body") |
|
self.x.endElement("opml") |
|
self.x.endDocument() |
|
self.f.close() |
|
|
|
def write(self, outline): |
|
for item in outline.children: |
|
self._write_item(item) |
|
|
|
def _write_item(self, item): |
|
self.x.startElement("outline", self.AttributesImpl({self.title_attr: item.title, self.text_attr: item.text})) |
|
self.write(item) |
|
self.x.endElement("outline") |
|
|
|
|
|
class HierWriter(Writer): |
|
""" Writer for hierarchical outline text. """ |
|
|
|
def __init__(self, level_char="."): |
|
""" Initialize. |
|
@param level_char defines level of an entry. Each character +1 level. |
|
""" |
|
self.level_char = level_char |
|
|
|
def open(self, f): |
|
self.f = f |
|
|
|
def close(self): |
|
self.f.close() |
|
|
|
def write(self, outline): |
|
for item in outline.children: |
|
self._write_item(item) |
|
|
|
def _write_item(self, item): |
|
f = self.f |
|
f.write(self.level_char * item.level) |
|
f.write(item.title) |
|
f.write("\n") |
|
f.write(item.text) |
|
f.write("\n") |
|
self.write(item) |
|
|
|
|
|
class Reader: |
|
""" Reader base. """ |
|
pass |
|
|
|
|
|
class OPMLReader(Reader): |
|
""" Reader for OPML file. """ |
|
|
|
def __init__(self, f, title_attr="title", text_attr="text"): |
|
self.contents = Outline() |
|
self.title_attr = title_attr |
|
self.text_attr = text_attr |
|
|
|
import xml.dom.minidom |
|
dom = xml.dom.minidom.parse(f) |
|
body = dom.getElementsByTagName("body") |
|
if body is None: |
|
raise Exception("Empty document") |
|
|
|
for node in body[0].childNodes: |
|
if node.nodeType == node.ELEMENT_NODE: |
|
self._read_outline(node, 1) |
|
|
|
def _append_item(self, level, title, text): |
|
item = Item(level, title, text) |
|
self.contents.append_child(item) |
|
|
|
def _read_outline(self, node, level): |
|
title = node.getAttribute(self.title_attr) |
|
text = node.getAttribute(self.text_attr) |
|
self._append_item(level, title if title else "", text if text else "") |
|
for child in node.childNodes: |
|
if child.nodeType == child.ELEMENT_NODE: |
|
self._read_outline(child, level + 1) |
|
|
|
|
|
class HierReader(Reader): |
|
""" Reader for hierarchical outline file. """ |
|
|
|
def __init__(self, f, level_char="."): |
|
self.level_exp = "(" + re.escape(level_char) + "+)" |
|
exp = re.compile(self.level_exp) |
|
self.contents = Outline() |
|
|
|
it = iter(f.readlines()) |
|
# skip until first one |
|
line = self._skip_head(exp, it) |
|
# read lines |
|
self._read_lines(exp, line, it) |
|
|
|
def _append_item(self, level, title, text): |
|
item = Item(level, title.rstrip("\n"), text.rstrip("\n")) |
|
self.contents.append_child(item) |
|
|
|
def _skip_head(self, exp, it): |
|
for line in it: |
|
if exp.match(line): |
|
break |
|
return line |
|
|
|
def _read_lines(self, exp, line, it): |
|
lines = [] |
|
title = None |
|
text = None |
|
level = None |
|
try: |
|
while True: |
|
m = exp.match(line) |
|
if m: |
|
if not title is None: |
|
self._append_item(level, title[level:], "".join(lines)) |
|
lines = [] |
|
level = len(m.group(1)) |
|
title = line |
|
else: |
|
lines.append(line) |
|
line = next(it) |
|
except StopIteration: |
|
pass |
|
if not title is None: |
|
m = exp.match(title) |
|
if m: |
|
self._append_item(level, title[level:], "".join(lines)) |
|
|
|
|
|
def main(): |
|
version = "0.1.1" |
|
input_formats = ["hier", "opml"] |
|
output_formats = input_formats |
|
|
|
p = argparse.ArgumentParser(prog="olconv", |
|
description="Converts file between WZ and OPML for Python3") |
|
p.add_argument("--version", action="version", version="%(prog) " + version) |
|
p.add_argument("--char", default=".", help="Level specification in regular expression") |
|
p.add_argument("-f", help="Input file") |
|
p.add_argument("-o", help="Output file") |
|
p.add_argument("-i", choices=input_formats, help="Input type") |
|
p.add_argument("-x", choices=output_formats, help="Output type") |
|
p.add_argument("--title", default="title", help="Title attribute in OPML file, default is title") |
|
p.add_argument("--text", default="text", help="Text attribute in OPML file, default is text") |
|
args = p.parse_args() |
|
|
|
input_format = args.i |
|
output_format = args.x |
|
reader = {"hier": HierReader, "opml": OPMLReader}[input_format] |
|
writer = {"hier": HierWriter, "opml": OPMLWriter}[output_format] |
|
|
|
reader_args = {} |
|
if input_format == "hier": |
|
reader_args["level_char"] = args.char |
|
elif input_format == "opml": |
|
reader_args["title_attr"] = args.title |
|
reader_args["text_attr"] = args.text |
|
writer_args = {} |
|
if output_format == "hier": |
|
writer_args["level_char"] = args.char |
|
elif output_format == "opml": |
|
writer_args["title_attr"] = args.title |
|
writer_args["text_attr"] = args.text |
|
|
|
input_path = args.f |
|
input_file = open(input_path, "r", encoding="utf-8") if input_path else sys.stdin |
|
|
|
out_path = args.o |
|
out_file = open(out_path, "w", encoding="utf-8") if out_path else sys.stdout |
|
try: |
|
r = reader(input_file, **reader_args) |
|
w = writer(**writer_args) |
|
w.open(out_file) |
|
w.write(r.contents) |
|
w.close() |
|
except Exception: |
|
import traceback |
|
traceback.print_exc() |
|
finally: |
|
out_file.close() |
|
input_file.close() |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |