Skip to content

Instantly share code, notes, and snippets.

@hideaki-t
Last active December 18, 2015 15:09
Show Gist options
  • Save hideaki-t/5802690 to your computer and use it in GitHub Desktop.
Save hideaki-t/5802690 to your computer and use it in GitHub Desktop.
filter the OPML, which was imported to Feedeen, using its import notification mail.
# coding: utf-8
from __future__ import print_function
import sys
import xml.etree.ElementTree as ET
import urllib.request
import html
def remove_node(root, node):
title = html.escape(node.attrib['title'])
for parent in root.findall('.//outline[@title="{}"]/..'.format(title)):
if parent.findall('outline[@title="{}"]'.format(title)) == [node]:
parent.remove(outline)
titleset = set()
for l in sys.stdin:
titleset.add(l.strip())
opml = sys.argv[1]
tree = ET.parse(opml)
root = tree.getroot()
for outline in root.findall('.//outline[@type="rss"]'):
title = html.escape(outline.attrib['title'])
url = outline.attrib['xmlUrl']
print('title = {}, url={}'.format(title, url), file=sys.stderr)
if title not in titleset:
remove_node(root, outline)
else:
try:
with urllib.request.urlopen(url) as f:
print("{}: {}".format(url, f.status), file=sys.stderr)
except Exception as ex:
print("{} {}".format(ex, url), file=sys.stderr)
remove_node(root, outline)
tree.write(sys.stdout.buffer, encoding='utf-8')
python -c 'import sys,email;print(str(email.message_from_file(sys.stdin).get_payload(decode=1),"utf-8"))' < mail.txt | sed -n 's/^ \* //p' | python filteropml.py subscriptions.xml > newopml.xml
# mail.txt is the orignal mail from feedeen(Content-Type: text/plain; charset=UTF-8 and Content-Transfer-Encoding: base64)
# if you have a decoded mail body, do not need the mail decode process on the first.
# subscriptions.xml is the OPML you want to import
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment