Skip to content

Instantly share code, notes, and snippets.

@xrat
Created June 22, 2022 20:01
Show Gist options
  • Save xrat/d8b92e32e22af0cf8e05c1e7538cd2f5 to your computer and use it in GitHub Desktop.
Save xrat/d8b92e32e22af0cf8e05c1e7538cd2f5 to your computer and use it in GitHub Desktop.
Awk script to flatten an rss2email multipart digest
# Flatten an rss2email multipart digest
#
# Tested only with GNU Awk, and rss2email up to 3.13.1.
# This script expects the encodings setting in r2e to prefer UTF-8.
# It also expects that input was run through `formail -c`, and it
# requires reformime (on e.g. Debian part of package maildrop)
function output() {
# print entry
print "-----------------"
#print "From: " from
print "> " subject ; print ""
print text; #print ""
#print url # the url is included in text
print id " (p. " date ")"
print ""
}
# preserve mail header but change content-type
NR==1,/^$/ {
if($0~/^$/) {
print "Content-Type: text/plain; charset=\"utf-8\""
print "Content-Transfer-Encoding: 8bit"
print; next
}
if($0!~/^(Content-Type):/) print
headerdone=1
}
# print entries at boundaries
/^--=========/ && id { output(); next }
(headerdone) {
if($1=="Content-Type:") {
if($2=="text/plain;"&&$3=="charset=\"utf-8\"") contenttype="utf8"
else if($2=="text/plain;"&&$3~/iso-8859-1/) contenttype="l9"
else contenttype=substr($0,15)
}
if($1=="Subject:") {
subject=substr($0,10)
while(getline && $0~/^[ ]/) { subject= subject $0 }
cmd="reformime -c utf-8 -h '" subject "'"; cmd|getline subject; close(cmd)
}
if($1=="Date:") { # example: Date: Fri, 24 Sep 2021 16:15:00 -0000
date=substr($0,7)
cmd="date -d'" date "' +'%Y-%m-%d %H:%M'"; cmd|getline date; close(cmd)
}
# if($1=="From:") {
# from=substr($0,7)
# cmd="reformime -c utf-8 -h '" from "'"; cmd|getline from; close(cmd)
# }
# I like X-RSS-ID b/c it sometimes has short urls
if($1=="X-RSS-ID:") {
id=$2
}
if($1=="X-RSS-URL:") {
url=$2 # but this is futile b/c the URL is part of text anyway
text=""
# process body of message
lastline=""
while(getline && $0!~/^--=========/) {
# squeeze empty lines to 1 (will also skip leading empty lines)
if(/^$/ && lastline=="") continue
# skip images
if(/^\[!\[\]\(.*\)$/) continue
# remove "URL:" (quick and dirty but will work 99.99%)
if($0~"^URL: " url) $0=substr($0,6)
# compile text
if (text) { text=text "\n" $0 } else { text=$0 }
lastline=$0
}
if($0~/^--=========/) { output(); next }
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment