jamesstout · April 22, 2018 11:32
diff --git a/opml2md.py b/opml2md.py
 # Based on https://gist.github.com/domdavis/9988867
 # Changed to handle podcast export OPML from Overcast. e.g. 
 # <outline type="rss" text="Road Work" title="Road Work" xmlUrl="http://feeds.5by5.tv/roadwork" htmlUrl="http://5by5.tv/roadwork"/>
 # I wanted to grab the xmlUrl and htmlUrl and output a <ul> with links to page and feed.
 # also strips usernames and passwords if set in the xmlUrl - add yours to the passwd var
 # $ pip install opml
 # $ python opml2md.py some_outline.opml
 # -> some_outline.md

 import codecs
 import opml
 import sys
 
 INPUT = sys.argv[1]
 OUTPUT = '.'.join(INPUT.split('.')[:-1] + ['md'])
 
 with codecs.open(INPUT, 'r') as f:
    outline = opml.from_string(f.read())

 blocks = []
 passwd = "username:password@"
 adFreeStr = " (Ad-Free)"

 # * [The Talk Show With John Gruber](htmlUrl) [RSS](xmlUrl) 

 def substring_after(s, delim):
    return s.partition(delim)[2]

 def substring_before(s, delim):
    return s.partition(delim)[0]

 def strip_end(text, suffix):
    if not text.endswith(suffix):
        return text
    return text[:len(text)-len(suffix)]

 def _extractBlocks(indent, node):
    xmlURL = ""
    textStr = ""
    for child in node:
        if indent == 0:
            # strip password if present
            if passwd in child.xmlUrl:
                prefix = substring_before(child.xmlUrl,passwd)
                suffix = substring_after(child.xmlUrl,passwd)
                xmlURL = prefix + suffix
            else:
                xmlURL = child.xmlUrl
            # strip  (Ad-Free) if present - makes lines too long
            if adFreeStr in child.text:
                textStr = strip_end(child.text, adFreeStr)
            else:
                textStr = child.text
            # alternative output without the FontAwesome SVG stuff
            # text = "* [" + child.text + "](" + child.htmlUrl + ") [RSS](" + xmlURL + ")\n";
            text = "* [" + textStr + "](" + child.htmlUrl + ") " + '<a style="color:#fa9b39" href="' + xmlURL +'" itemprop="sameAs"> <i class="fas fa-fw fa-rss-square" aria-hidden="true"></i></a>';
        else:
            depth = 4 * (indent - 1)
            text = (" " * depth) + "* " + child.text

        blocks.append(text)
        if len(child) > 0:
            depth = indent + 1
            _extractBlocks(depth, child)
 
 _extractBlocks(0, outline)
 
 output_content = '\n'.join(sorted(blocks))
 with codecs.open(OUTPUT, 'w', 'utf-8') as f:
    f.write(output_content)
 
 print('->', OUTPUT)
	# Based on https://gist.github.com/domdavis/9988867
	# Changed to handle podcast export OPML from Overcast. e.g.
	# <outline type="rss" text="Road Work" title="Road Work" xmlUrl="http://feeds.5by5.tv/roadwork" htmlUrl="http://5by5.tv/roadwork"/>
	# I wanted to grab the xmlUrl and htmlUrl and output a <ul> with links to page and feed.
	# also strips usernames and passwords if set in the xmlUrl - add yours to the passwd var
	# $ pip install opml
	# $ python opml2md.py some_outline.opml
	# -> some_outline.md

	import codecs
	import opml
	import sys

	INPUT = sys.argv[1]
	OUTPUT = '.'.join(INPUT.split('.')[:-1] + ['md'])

	with codecs.open(INPUT, 'r') as f:
	outline = opml.from_string(f.read())

	blocks = []
	passwd = "username:password@"
	adFreeStr = " (Ad-Free)"

	# * [The Talk Show With John Gruber](htmlUrl) [RSS](xmlUrl)

	def substring_after(s, delim):
	return s.partition(delim)[2]

	def substring_before(s, delim):
	return s.partition(delim)[0]

	def strip_end(text, suffix):
	if not text.endswith(suffix):
	return text
	return text[:len(text)-len(suffix)]

	def _extractBlocks(indent, node):
	xmlURL = ""
	textStr = ""
	for child in node:
	if indent == 0:
	# strip password if present
	if passwd in child.xmlUrl:
	prefix = substring_before(child.xmlUrl,passwd)
	suffix = substring_after(child.xmlUrl,passwd)
	xmlURL = prefix + suffix
	else:
	xmlURL = child.xmlUrl
	# strip (Ad-Free) if present - makes lines too long
	if adFreeStr in child.text:
	textStr = strip_end(child.text, adFreeStr)
	else:
	textStr = child.text
	# alternative output without the FontAwesome SVG stuff
	# text = "* [" + child.text + "](" + child.htmlUrl + ") [RSS](" + xmlURL + ")\n";
	text = "* [" + textStr + "](" + child.htmlUrl + ") " + '<a style="color:#fa9b39" href="' + xmlURL +'" itemprop="sameAs"> <i class="fas fa-fw fa-rss-square" aria-hidden="true"></i></a>';
	else:
	depth = 4 * (indent - 1)
	text = (" " * depth) + "* " + child.text

	blocks.append(text)
	if len(child) > 0:
	depth = indent + 1
	_extractBlocks(depth, child)

	_extractBlocks(0, outline)

	output_content = '\n'.join(sorted(blocks))
	with codecs.open(OUTPUT, 'w', 'utf-8') as f:
	f.write(output_content)

	print('->', OUTPUT)