Created
          November 11, 2017 13:09 
        
      - 
      
- 
        Save svetlyak40wt/4d48c8594fed14faa740bb2113e6cd17 to your computer and use it in GitHub Desktop. 
    Пример использования python-processor для преобразования Email в RSS фид.
  
        
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | #!env/bin/python3 | |
| import re | |
| import logging | |
| import os | |
| from html2text import html2text | |
| from processor import run_pipeline, sources, outputs | |
| from twiggy_goodies.setup import setup_logging | |
| # Что тут есть? | |
| # один только процессинг индекса и перекладывание тасков в org-mode | |
| # чтобы работало, надо установить hy из репозитория: | |
| # git+git://github.com/hylang/hy.git@5af86d691d9caad52bff83b681e8891333a09889 | |
| # [email protected]:six8/pytailer.git | |
| # tailer | |
| # jsail | |
| def markdown2org(text): | |
| def remove_newlines(text): | |
| return text.replace('\n', '') | |
| def repl(m): | |
| url = remove_newlines(m.group('url')) | |
| text = remove_newlines(m.group('text')) | |
| if text: | |
| pattern = '[[{url}][{text}]]' | |
| else: | |
| pattern = '[[{url}]]' | |
| return pattern.format( | |
| url=url, text=text) | |
| text = re.sub(r'!?\[(?P<text>.*?)\]\((?P<url>.*?)\)', | |
| repl, | |
| text, | |
| flags=re.DOTALL) | |
| return text | |
| def remove_unnecessary_text(text): | |
| text = re.sub(r'Download the official Twitter app.*', | |
| '', text) | |
| text = re.sub(r'Sent from my.*', '', text) | |
| text = re.sub(r'^\W+$', r'', text, flags=re.MULTILINE) | |
| text = re.sub(r'\n+', r'\n', text) | |
| return text | |
| def swap_twitter_subject(subject, body): | |
| """If subject starts from 'Tweet from...' | |
| then we need to get first meaning line from the body.""" | |
| if subject.startswith('Tweet from'): | |
| lines = body.split('\n') | |
| for idx, line in enumerate(lines): | |
| if re.match(r'.*, ?\d{2}:\d{2}]]', line) is not None: | |
| try: | |
| subject = lines[idx + 1] | |
| except IndexError: | |
| pass | |
| break | |
| return subject, body | |
| def mail_to_rss(item): | |
| headers = item['headers'] | |
| html_body = item.get('html-body') | |
| if html_body: | |
| body = html2text(html_body) | |
| else: | |
| body = item.get('plain-body') | |
| body = markdown2org(body) | |
| body = remove_unnecessary_text(body) | |
| subject, body = swap_twitter_subject( | |
| headers.get('subject', 'Untitled'), body) | |
| return dict(title=subject, | |
| date=headers['date'], | |
| body=body) | |
| def add_TODO_to_title(item): | |
| return dict(item, title=u'TODO ' + item['title']) | |
| for_any_message = lambda msg: True | |
| NAME = 'rss-feed-for-org-mode' | |
| setup_logging('logs/' + NAME + '.log') | |
| os.environ['PROCESSOR_DB'] = NAME + '.db' | |
| try: | |
| logging.info('Starting') | |
| run_pipeline( | |
| sources.imap("imap.gmail.com", | |
| "svet*", | |
| "******", | |
| "Autoprocessing/OrgModeInbox"), | |
| [mail_to_rss, add_TODO_to_title, outputs.rss('rss-feed.xml', | |
| limit=100)]) | |
| logging.info('Done') | |
| except: | |
| logging.exception('Unhandled exception') | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment