Last active
February 25, 2024 13:36
-
-
Save moisseev/a0b032842225a7d0b17a to your computer and use it in GitHub Desktop.
Train Rspamd using Dovecot folders ( * the script expunges folders *)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
USER="[email protected]" | |
FOLDER_PREFIX="train_" | |
FOLDER_HAM="${FOLDER_PREFIX}ham" | |
FOLDER_PROB="${FOLDER_PREFIX}prob" | |
FOLDER_SPAM="${FOLDER_PREFIX}spam" | |
DOVEADM="/usr/local/bin/doveadm" | |
RSPAMADM="/usr/local/bin/rspamadm" | |
RSPAMC_CONNECT="-h localhost:11334" # rspamc (--connect|-h) option | |
SETTINGS='{"groups_enabled":["fuzzy","statistics"],"symbols_enabled":["LOCAL_NO_LOG_STAT"]}' | |
#------------------------------- | |
MODE_BULK= | |
MODE_SCAN= | |
COLOR= | |
PROGNAME=`basename $0` | |
PROGDESCR="$PROGNAME - learn Rspamd using Dovecot folders" | |
usage () { | |
cat >&2 <<EOF | |
$PROGDESCR | |
usage: $PROGNAME [-b|-s|-c|-h] | |
-b bulk learning (do not scan, do not synchronize fuzzy after each learn) | |
-s scan messages, do not learn | |
-c colorize output (requires histring) | |
-h brief help | |
EOF | |
} | |
while getopts bsch opt; do | |
case "$opt" in | |
b) | |
MODE_BULK=1;; | |
s) | |
MODE_SCAN=1;; | |
c) | |
COLOR=1;; | |
# \? - unknown flag | |
h|?) | |
usage | |
[ $opt = "h" ] && exit 0 | |
exit 1;; | |
esac | |
done | |
fn_fetch_text() { | |
$DOVEADM fetch -u $USER text $ID | |
} | |
fn_filter_learn_output() { | |
egrep -v '^(Results for file: stdin \(0\.[[:digit:]]{3} seconds\)|filename = "stdin";|scan_time = 0\.[[:digit:]]{6};|hashes \[| {4}"[[:xdigit:]]{128}",|]|^$)$' | |
} | |
fn_fuzzy_sync() { | |
$RSPAMADM control fuzzy_sync > /dev/null | |
} | |
fn_list_symbols() { | |
fn_fetch_text | rspamc $RSPAMC_CONNECT --header=settings:$SETTINGS symbols | egrep -v '^Results for file: stdin|^\[Metric: default\]|^Action: |^Spam: |^Score: |^Urls: |^Emails: |^$' | |
echo | |
} | |
fn_list_symbols_short() { | |
fn_fetch_text | rspamc $RSPAMC_CONNECT --header=settings:$SETTINGS symbols | egrep -v '^Results for file: stdin|^\[Metric: default\]|^Action: |^Spam: |^Score: |^Urls: |^Emails: |^$|^Message-ID: ' | |
echo | |
} | |
fn_highlight() { | |
[ $COLOR ] && ( | |
histring -c green -fE '^success ' | | |
histring -c white -fE '^error = "all learn conditions denied learning (ham|spam) in default classifier"' | | |
histring -c white -fE '^HTTP error: 404, No content to generate fuzzy for flag [0-9]+' | | |
histring -fE '^error[ :]' | |
) || cat | |
} | |
fn_highlight_cathegory() { | |
[ $COLOR ] && ( | |
histring -c green -fE ' ham$' | | |
histring -E ' spam$' | |
) || cat | |
} | |
[ $MODE_SCAN ] && { | |
for EACH_FOLDER in $FOLDER_HAM $FOLDER_PROB $FOLDER_SPAM; do | |
echo -e "==> $EACH_FOLDER" | |
echo | |
$DOVEADM search -u $USER mailbox $EACH_FOLDER UNDELETED | \ | |
while read GUID UID; do | |
ID="mailbox-guid $GUID uid $UID" | |
fn_list_symbols | |
done | |
done | |
exit 0; | |
} | |
$DOVEADM search -u $USER \( mailbox $FOLDER_HAM OR mailbox $FOLDER_PROB OR mailbox $FOLDER_SPAM \) UNDELETED | \ | |
while read GUID UID; do | |
ID="mailbox-guid $GUID uid $UID" | |
CATHEGORY=`$DOVEADM -f flow fetch -u $USER mailbox $ID \ | |
| sed -En "s|^mailbox=${FOLDER_PREFIX}||p"` | |
echo -e "==> $CATHEGORY" | fn_highlight_cathegory | |
case "$CATHEGORY" in | |
spam) | |
# FUZZY_DENIED | |
FUZZY_FLAG=11 | |
FUZZY_WEIGHT=20;; | |
prob) | |
CATHEGORY="spam" | |
# FUZZY_PROB | |
FUZZY_FLAG=12; | |
FUZZY_WEIGHT=6;; | |
ham) | |
# FUZZY_WHITE | |
FUZZY_FLAG=13; | |
FUZZY_WEIGHT=20;; | |
*) | |
echo "Unknown cathegory: $CATHEGORY" | |
exit 1;; | |
esac | |
[ $MODE_BULK ] || fn_list_symbols | |
printf '%-12s' "learn_$CATHEGORY:" | |
fn_fetch_text | rspamc $RSPAMC_CONNECT learn_$CATHEGORY | fn_filter_learn_output | fn_highlight | |
printf '%-12s' "fuzzy_add:" | |
fn_fetch_text | rspamc $RSPAMC_CONNECT -w $FUZZY_WEIGHT -f $FUZZY_FLAG fuzzy_add | fn_filter_learn_output | fn_highlight | |
echo | |
[ $MODE_BULK ] || { | |
fn_fuzzy_sync | |
fn_list_symbols_short | |
} | |
$DOVEADM flags add -u $USER '\Deleted' $ID | |
done | |
[ $MODE_BULK ] && fn_fuzzy_sync | |
#$DOVEADM expunge -u $USER mailbox Trash ALL | |
# Workaround for dovecot2-2.2.21 | |
$DOVEADM expunge -u $USER mailbox Trash 1:* | |
$DOVEADM expunge -u $USER \ | |
\( \ | |
mailbox $FOLDER_HAM \ | |
OR mailbox $FOLDER_PROB \ | |
OR mailbox $FOLDER_SPAM \ | |
OR mailbox Inbox \ | |
OR mailbox Junk \ | |
\) \ | |
DELETED |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment