Skip to content

Instantly share code, notes, and snippets.

@azumakuniyuki
Created January 21, 2020 22:45
Show Gist options
  • Save azumakuniyuki/29c64497abda55d4624c342a03053b63 to your computer and use it in GitHub Desktop.
Save azumakuniyuki/29c64497abda55d4624c342a03053b63 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
# Try to improve p5-Sisimai/lib/Sisimai/Message.pm
use strict;
use warnings;
use feature ':5.10';
use Benchmark ':all';
use Test::More 'no_plan';
use Data::Dumper;
use Encode;
use Sisimai::MIME;
use Sisimai::Order;
use Sisimai::String;
use Sisimai::RFC5322;
use Sisimai::RFC3834;
my $Head = << 'EOH';
From MAILER-DAEMON Thu Apr 29 23:34:45 2008
Return-Path: <>
X-Original-To: [email protected]
Delivered-To: [email protected]
Received: from nyaan2.example.com (nyaan2.example.com [203.0.113.129])
by nekonyaan.example.com (Postfix) with ESMTP id 00000000EE
for <[email protected]>; Thu, 29 Apr 2008 23:34:45 -0800 (PST)
Received: by nyaan2.example.com (Postfix)
id 00000000AA; Thu, 29 Apr 2008 23:34:45 -0800 (PST)
Date: Thu, 29 Apr 2008 23:34:45 -0800 (PST)
From: [email protected] (Mail Delivery System)
Subject: Undelivered Mail Returned to Sender
To: [email protected]
MIME-Version: 1.0
X-Neko-Nyaan: 2
Content-Type: multipart/report; report-type=delivery-status;
boundary="FF00FEEEAA0B.1222022324/nyaan2.example.com"
Message-Id: <[email protected]>
EOH
my $TryOnFirst = [];
my $ExtHeaders = Sisimai::Order->headers;
my $SubjectTab = Sisimai::Order->by('subject');
my $RFC822Head = Sisimai::RFC5322->HEADERFIELDS;
my @RFC3834Set = @{ Sisimai::RFC3834->headerlist };
my @HeaderList = (qw|from to date subject content-type reply-to message-id
received content-transfer-encoding return-path x-mailer|);
my $IsMultiple = { 'received' => 1 };
sub emheaders {
my $heads = shift || return undef;
my $field = shift || [];
my $currheader = '';
my $allheaders = {};
my $structured = {};
my @headslices = split("\n", $$heads);
map { $allheaders->{ $_ } = 1 } (@HeaderList, @RFC3834Set, keys %$ExtHeaders);
map { $allheaders->{ lc $_ } = 1 } @$field if scalar @$field;
map { $structured->{ $_ } = undef } @HeaderList;
map { $structured->{ $_ } = [] } keys %$IsMultiple;
SPLIT_HEADERS: while( my $e = shift @headslices ) {
# Convert email headers to hash
if( $e =~ /\A[ \t]+(.+)\z/ ) {
# Continued (foled) header value from the previous line
next unless exists $allheaders->{ $currheader };
# Header line continued from the previous line
if( ref $structured->{ $currheader } eq 'ARRAY' ) {
# Concatenate a header which have multi-lines such as 'Received'
$structured->{ $currheader }->[-1] .= ' '.$1;
} else {
$structured->{ $currheader } .= ' '.$1;
}
} else {
# split the line into a header name and a header content
my($lhs, $rhs) = split(/:[ ]*/, $e, 2);
$currheader = lc $lhs;
next unless exists $allheaders->{ $currheader };
if( exists $IsMultiple->{ $currheader } ) {
# Such as 'Received' header, there are multiple headers in a single
# email message.
$rhs =~ y/\t/ /;
push @{ $structured->{ $currheader } }, $rhs;
} else {
# Other headers except "Received" and so on
if( $ExtHeaders->{ $currheader } ) {
# MTA specific header
for my $p ( @{ $ExtHeaders->{ $currheader } } ) {
next if grep { $p eq $_ } @$TryOnFirst;
push @$TryOnFirst, $p;
}
}
$structured->{ $currheader } = $rhs;
}
}
}
return $structured;
}
sub useregex1 {
my $heads = shift || return {};
my $field = shift || [];
my $table = {};
my $p = [$$heads =~ /^Received:[ ]*(.*?)\n(?![\s\t])/gms];
my $q = {$$heads =~ /^([\w-]+):[ ]*(.*?)\n(?![\s\t])/gms};
map { $table->{ lc $_ } = $q->{ $_ } } keys %$q;
map { $_ =~ s/\n\s+/ /; $_ =~ y/\t/ /s } values %$table;
map { $_ =~ s/\n\s+/ /; $_ =~ y/\t/ /s } @$p;
$table->{'received'} = $p if scalar @$p;
return $table;
}
sub useregex2 {
my $heads = shift || return {};
my $field = shift || [];
my $table = {};
my $c = {};
if( scalar @$field ) {
my $a = sprintf("(%s)", join('|', @$field));
my $b = qr/$a/imx;
$c = {$$heads =~ /^$b:[ ]*(.*?)\n(?![\s\t])/gms};
}
my $p = [$$heads =~ /^Received:[ ]*(.*?)\n(?![\s\t])/gms];
my $q = {$$heads =~ /^([\w-]+):[ ]*(.*?)\n(?![\s\t])/gms};
map { $table->{ lc $_ } = $q->{ $_ } } (keys %$q, keys %$c);
map { $_ =~ s/\n\s+/ /; $_ =~ y/\t/ /s } values %$table;
map { $_ =~ s/\n\s+/ /; $_ =~ y/\t/ /s } @$p;
$table->{'received'} = $p if scalar @$p;
return $table;
}
sub useregex3 {
my $heads = shift || return {};
my $field = shift || [];
my $table = {};
my $allheaders = {};
map { $allheaders->{ $_ } = 1 } (@HeaderList, @RFC3834Set, keys %$ExtHeaders);
map { $allheaders->{ lc $_ } = 1 } @$field if scalar @$field;
my $a = sprintf("(%s)", join('|', sort keys %$allheaders));
my $b = qr/$a/imx;
my $p = [$$heads =~ /^Received:[ ]*(.*?)\n(?![\s\t])/gms];
my $q = {$$heads =~ /^$b:[ ]*(.*?)\n(?![\s\t])/gms};
map { $table->{ lc $_ } = $q->{ $_ } } keys %$q;
map { $_ =~ s/\n\s+/ /; $_ =~ y/\t/ /s } values %$table;
map { $_ =~ s/\n\s+/ /; $_ =~ y/\t/ /s } @$p;
$table->{'received'} = $p if scalar @$p;
return $table;
}
my $p = [
emheaders(\$Head, ['X-Neko-Nyaan']),
useregex1(\$Head, ['X-Neko-Nyaan']),
useregex2(\$Head, ['X-Neko-Nyaan']),
useregex3(\$Head, ['X-Neko-Nyaan']),
];
for my $e ( @$p ) {
isa_ok $e, 'HASH';
for my $ee ( qw|from subject date to return-path message-id| ) {
ok length $e->{ $ee };
}
isa_ok $e->{'received'}, 'ARRAY';
is scalar @{ $e->{'received'} }, 2;
like $e->{'received'}->[0], qr/ for /;
ok length $e->{'x-neko-nyaan'};
is $e->{'x-neko-nyaan'}, 2;
like $e->{'from'}, qr/MAILER-DAEMON/;
like $e->{'subject'}, qr/Undelivered/;
like $e->{'date'}, qr/Thu, 29 Apr/;
like $e->{'to'}, qr/example[.]com/;
like $e->{'return-path'}, qr/<>/;
like $e->{'message-id'}, qr/E236/;
}
printf("Running with Perl %s on %s\n%s\n", $^V, $^O, '-' x 80);
cmpthese(6e5, {
'emheaders' => sub { emheaders(\$Head, ['X-Neko-Nyaan']) },
'useregex1' => sub { useregex1(\$Head, ['X-Neko-Nyaan']) },
'useregex2' => sub { useregex2(\$Head, ['X-Neko-Nyaan']) },
'useregex3' => sub { useregex3(\$Head, ['X-Neko-Nyaan']) },
}
);
__END__
Running with Perl v5.30.0 on darwin
--------------------------------------------------------------------------------
Rate useregex3 emheaders useregex2 useregex1
useregex3 13780/s -- -41% -43% -52%
emheaders 23301/s 69% -- -3% -20%
useregex2 24058/s 75% 3% -- -17%
useregex1 29000/s 110% 24% 21% --
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment