Last active
January 14, 2023 17:15
-
-
Save davmillar/3801e3ef5000b6bbddb4db2505d54bf4 to your computer and use it in GitHub Desktop.
Puzzle Subscription Email Handler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use MIME::Parser; | |
use URI::Find; | |
use File::Fetch; | |
use HTML::LinkExtractor; | |
use Data::Dumper; | |
use File::Copy qw(move); | |
# Configure path for the output. | |
my $output_dir = '/home/dmillar/Dropbox/To-Solve/'; | |
### Create a new parser object: | |
my $parser = new MIME::Parser; | |
$parser->output_dir("/tmp/"); | |
my $entity = $parser->parse(\*STDIN); | |
my @message; | |
for my $part ($entity->parts) { | |
# dig into the parts | |
if($part->mime_type eq 'multipart/alternative') { | |
map { print "$_->mime_type\n" } $part->parts; | |
for my $a ($part->parts) { | |
if($a->mime_type eq 'text/plain') { | |
$part = $a; | |
last; | |
} | |
} | |
} | |
my $mime = $part->mime_type; | |
if($mime =~ m'text/.+') { | |
my $fh = $part->open('r'); | |
if($fh) { | |
while(defined(my $line = $fh->getline)) { | |
$line =~ s/[\r\n]//g; | |
push(@message, $line); | |
} | |
$fh->close; | |
} | |
} | |
} | |
# Make the html and plaintext all one giant string. | |
my $full_message = join "", @message; | |
my $LX = new HTML::LinkExtractor(); | |
$LX->parse(\$full_message); | |
my @online_links; | |
my @found_links = grep { $$_{tag} eq 'a' } @{ $LX->links }; | |
for my $found_link (@found_links) { | |
# Find Patreon attachment links ending in pdf, jpz, and docx. | |
# Fetch gets them all as 1.(extension) so rename based on link text. | |
if ($found_link->{'href'} =~ /pdf|jpz|docx/) { | |
if ($found_link->{'href'} =~ /patreon/) { | |
my $link_text = $found_link->{'_TEXT'}; | |
$link_text =~ s|<.+?>||g; | |
$link_text =~ s|^(\s+)||g; | |
$link_text =~ s|(\s+)$||g; | |
my $ff = File::Fetch->new(uri => $found_link->{'href'}); | |
my $file_path = $ff->fetch( to => '/tmp' ) or die $ff->error; | |
move($file_path, $output_dir.$link_text); | |
} | |
} | |
# Find all Penpa+, Logic Masters DE, CtC, and common short links used by those folks. | |
if ($found_link->{'href'} =~ /swaroop|tinyurl\.com|logic\-masters\.de|app\.crackingthe|the\-cryptic\.web\.app|git\.io|puzz\.link/) { | |
push(@online_links, $found_link->{'href'}); | |
} | |
} | |
# Add all Penpa+, Logic Masters DE, CtC, and common short links used by those folks to html file. | |
# Bash script will de-dupe and sort it after. | |
open(FH, ">>", $output_dir."Online Puzzles.html") or die "File couldn't be opened"; | |
for my $online_link (@online_links) { | |
print FH "<a href='$online_link'>$online_link</a><br/>\n"; | |
} | |
close FH or "couldn't close"; | |
# Find all non-patreon links that end in .pdf and fetch directly. (PuzzleSnacks) | |
my @message_links = grep { $_ =~ /http.*pdf/ } @message; | |
my @message_links_clean = map { $_ =~ /(http.*pdf[^)"]+)/ } @message_links; | |
for my $pdf_link (@message_links_clean) { | |
unless ($pdf_link =~ /patreon/) { | |
my $ff = File::Fetch->new(uri => $pdf_link); | |
my $file_path = $ff->fetch( to => $output_dir ) or die $ff->error; | |
} | |
} | |
# Find Google Drive share links from Redhead64's Patreon posts, parse out the IDs, and download. | |
# Fetch calls the PDFs all 'uc' so rename them to the ID. | |
my @google_links = grep { $_ =~ /https:\/\/drive\.google\.com[^=]+/ } @message; | |
my @google_link_ids = map { $_ =~ /d\/([0-9A-Z\_\-]+)\/view/i } @google_links; | |
for my $link_id (@google_link_ids) { | |
my $link_text = "$link_id.pdf"; | |
my $pdf_link = "https://drive.google.com/u/0/uc?id=$link_id&export=download"; | |
my $ff = File::Fetch->new(uri => $pdf_link); | |
my $file_path = $ff->fetch( to => "/tmp" ) or die "Failed to save Google doc $link_id -- " . $ff->error; | |
move($file_path, $output_dir.$link_text); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
cd "$(dirname "$0")" | |
cat | perl email_parse.pl &>> crud.log | |
sort -u -o "/home/dmillar/Dropbox/To-Solve/Online Puzzles.html" "/home/dmillar/Dropbox/To-Solve/Online Puzzles.html" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This is my setup for parsing out contents from puzzle subscriptions so they all save to a To-Solve folder in my Dropbox for me to enjoy later. The sources I use are: | |
- PuzzleSnacks by Eric Berlin | |
- RedHead64's Patreon | |
- Cracking the Cryptic's Patreon | |
- Pavel Curtis's (soon to conclude) Ada's Aenigmas Patreon | |
- Nathan Curtis's Patreon | |
To use this, I set up the mail client Evolution and added the following filters: | |
1. If sender contains (puzzlesnacks email) then... (see below) | |
2. If sender contains "[email protected]" and regex match message body matches ".pdf|.jpz|.docx" then... | |
3. If sender contains "[email protected]" and subject contains Redhead64 then... | |
4. If sender contains "[email protected]" and regex match message body matches "swaroop|tinyurl.com|logic-masters.de|app.cracking|cracking-the-cryptic.web.app|git.io|puzz.link" then... | |
...then pipe to program evolution-friend.sh, set status 'read', and move to folder 'All Mail' (Gmail's archive). | |
This could probably be simplified or some of the effort could be redone using the Perl script, but it's working well enough for now. :) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment