Skip to content

Instantly share code, notes, and snippets.

@svetlyak40wt
Created November 11, 2017 13:09
Show Gist options
  • Save svetlyak40wt/4d48c8594fed14faa740bb2113e6cd17 to your computer and use it in GitHub Desktop.
Save svetlyak40wt/4d48c8594fed14faa740bb2113e6cd17 to your computer and use it in GitHub Desktop.
Пример использования python-processor для преобразования Email в RSS фид.
#!env/bin/python3
import re
import logging
import os
from html2text import html2text
from processor import run_pipeline, sources, outputs
from twiggy_goodies.setup import setup_logging
# Что тут есть?
# один только процессинг индекса и перекладывание тасков в org-mode
# чтобы работало, надо установить hy из репозитория:
# git+git://github.com/hylang/hy.git@5af86d691d9caad52bff83b681e8891333a09889
# [email protected]:six8/pytailer.git
# tailer
# jsail
def markdown2org(text):
def remove_newlines(text):
return text.replace('\n', '')
def repl(m):
url = remove_newlines(m.group('url'))
text = remove_newlines(m.group('text'))
if text:
pattern = '[[{url}][{text}]]'
else:
pattern = '[[{url}]]'
return pattern.format(
url=url, text=text)
text = re.sub(r'!?\[(?P<text>.*?)\]\((?P<url>.*?)\)',
repl,
text,
flags=re.DOTALL)
return text
def remove_unnecessary_text(text):
text = re.sub(r'Download the official Twitter app.*',
'', text)
text = re.sub(r'Sent from my.*', '', text)
text = re.sub(r'^\W+$', r'', text, flags=re.MULTILINE)
text = re.sub(r'\n+', r'\n', text)
return text
def swap_twitter_subject(subject, body):
"""If subject starts from 'Tweet from...'
then we need to get first meaning line from the body."""
if subject.startswith('Tweet from'):
lines = body.split('\n')
for idx, line in enumerate(lines):
if re.match(r'.*, ?\d{2}:\d{2}]]', line) is not None:
try:
subject = lines[idx + 1]
except IndexError:
pass
break
return subject, body
def mail_to_rss(item):
headers = item['headers']
html_body = item.get('html-body')
if html_body:
body = html2text(html_body)
else:
body = item.get('plain-body')
body = markdown2org(body)
body = remove_unnecessary_text(body)
subject, body = swap_twitter_subject(
headers.get('subject', 'Untitled'), body)
return dict(title=subject,
date=headers['date'],
body=body)
def add_TODO_to_title(item):
return dict(item, title=u'TODO ' + item['title'])
for_any_message = lambda msg: True
NAME = 'rss-feed-for-org-mode'
setup_logging('logs/' + NAME + '.log')
os.environ['PROCESSOR_DB'] = NAME + '.db'
try:
logging.info('Starting')
run_pipeline(
sources.imap("imap.gmail.com",
"svet*",
"******",
"Autoprocessing/OrgModeInbox"),
[mail_to_rss, add_TODO_to_title, outputs.rss('rss-feed.xml',
limit=100)])
logging.info('Done')
except:
logging.exception('Unhandled exception')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment