Last active
August 29, 2015 14:05
-
-
Save irq0/de023779d93ee79e7f36 to your computer and use it in GitHub Desktop.
rss2email post-processor: Attach images
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import requests | |
from email.mime.multipart import MIMEMultipart | |
from email.mime.text import MIMEText | |
from email.mime.image import MIMEImage | |
URL_REGEX = re.compile(r"""((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|(([^\s()<>]+|(([^\s()<>]+)))*))+(?:(([^\s()<>]+|(([^\s()<>]+)))*)|[^\s`!()[]{};:'".,<>?«»“”‘’]))""", re.DOTALL) | |
def fetch_img(url): | |
result = None | |
try: | |
r = requests.get(url) | |
if r.status_code == 200: | |
result = MIMEImage(r.content) | |
except: | |
pass | |
return result | |
def process(feed, parsed, entry, guid, message): | |
txt = message.get_payload(decode=True).decode("utf-8","ignore") | |
urls = [ url[0] for url in URL_REGEX.findall(txt) ] | |
urls_img = [ url for url in urls | |
if re.search(r"\.(gif|jpg|jpeg|png|pdf)$", url) ] | |
mm = MIMEMultipart() | |
mm.attach(message) | |
for k, v in message.items(): | |
mm[k] = v | |
for url in urls_img: | |
img = fetch_img(url) | |
if img: | |
mm.attach(img) | |
return mm |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Linking this file from /usr/lib/python3/dist-packages/rss2email/post_process and configuring as
post-process = rss2email.post_process.attach_image process
works, but is probably not the best approach.
The images get attached to the mail, but they are not referenced inside the mail's text which retains the original links.