Created
January 21, 2020 22:45
-
-
Save azumakuniyuki/29c64497abda55d4624c342a03053b63 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# Try to improve p5-Sisimai/lib/Sisimai/Message.pm | |
use strict; | |
use warnings; | |
use feature ':5.10'; | |
use Benchmark ':all'; | |
use Test::More 'no_plan'; | |
use Data::Dumper; | |
use Encode; | |
use Sisimai::MIME; | |
use Sisimai::Order; | |
use Sisimai::String; | |
use Sisimai::RFC5322; | |
use Sisimai::RFC3834; | |
my $Head = << 'EOH'; | |
From MAILER-DAEMON Thu Apr 29 23:34:45 2008 | |
Return-Path: <> | |
X-Original-To: [email protected] | |
Delivered-To: [email protected] | |
Received: from nyaan2.example.com (nyaan2.example.com [203.0.113.129]) | |
by nekonyaan.example.com (Postfix) with ESMTP id 00000000EE | |
for <[email protected]>; Thu, 29 Apr 2008 23:34:45 -0800 (PST) | |
Received: by nyaan2.example.com (Postfix) | |
id 00000000AA; Thu, 29 Apr 2008 23:34:45 -0800 (PST) | |
Date: Thu, 29 Apr 2008 23:34:45 -0800 (PST) | |
From: [email protected] (Mail Delivery System) | |
Subject: Undelivered Mail Returned to Sender | |
To: [email protected] | |
MIME-Version: 1.0 | |
X-Neko-Nyaan: 2 | |
Content-Type: multipart/report; report-type=delivery-status; | |
boundary="FF00FEEEAA0B.1222022324/nyaan2.example.com" | |
Message-Id: <[email protected]> | |
EOH | |
my $TryOnFirst = []; | |
my $ExtHeaders = Sisimai::Order->headers; | |
my $SubjectTab = Sisimai::Order->by('subject'); | |
my $RFC822Head = Sisimai::RFC5322->HEADERFIELDS; | |
my @RFC3834Set = @{ Sisimai::RFC3834->headerlist }; | |
my @HeaderList = (qw|from to date subject content-type reply-to message-id | |
received content-transfer-encoding return-path x-mailer|); | |
my $IsMultiple = { 'received' => 1 }; | |
sub emheaders { | |
my $heads = shift || return undef; | |
my $field = shift || []; | |
my $currheader = ''; | |
my $allheaders = {}; | |
my $structured = {}; | |
my @headslices = split("\n", $$heads); | |
map { $allheaders->{ $_ } = 1 } (@HeaderList, @RFC3834Set, keys %$ExtHeaders); | |
map { $allheaders->{ lc $_ } = 1 } @$field if scalar @$field; | |
map { $structured->{ $_ } = undef } @HeaderList; | |
map { $structured->{ $_ } = [] } keys %$IsMultiple; | |
SPLIT_HEADERS: while( my $e = shift @headslices ) { | |
# Convert email headers to hash | |
if( $e =~ /\A[ \t]+(.+)\z/ ) { | |
# Continued (foled) header value from the previous line | |
next unless exists $allheaders->{ $currheader }; | |
# Header line continued from the previous line | |
if( ref $structured->{ $currheader } eq 'ARRAY' ) { | |
# Concatenate a header which have multi-lines such as 'Received' | |
$structured->{ $currheader }->[-1] .= ' '.$1; | |
} else { | |
$structured->{ $currheader } .= ' '.$1; | |
} | |
} else { | |
# split the line into a header name and a header content | |
my($lhs, $rhs) = split(/:[ ]*/, $e, 2); | |
$currheader = lc $lhs; | |
next unless exists $allheaders->{ $currheader }; | |
if( exists $IsMultiple->{ $currheader } ) { | |
# Such as 'Received' header, there are multiple headers in a single | |
# email message. | |
$rhs =~ y/\t/ /; | |
push @{ $structured->{ $currheader } }, $rhs; | |
} else { | |
# Other headers except "Received" and so on | |
if( $ExtHeaders->{ $currheader } ) { | |
# MTA specific header | |
for my $p ( @{ $ExtHeaders->{ $currheader } } ) { | |
next if grep { $p eq $_ } @$TryOnFirst; | |
push @$TryOnFirst, $p; | |
} | |
} | |
$structured->{ $currheader } = $rhs; | |
} | |
} | |
} | |
return $structured; | |
} | |
sub useregex1 { | |
my $heads = shift || return {}; | |
my $field = shift || []; | |
my $table = {}; | |
my $p = [$$heads =~ /^Received:[ ]*(.*?)\n(?![\s\t])/gms]; | |
my $q = {$$heads =~ /^([\w-]+):[ ]*(.*?)\n(?![\s\t])/gms}; | |
map { $table->{ lc $_ } = $q->{ $_ } } keys %$q; | |
map { $_ =~ s/\n\s+/ /; $_ =~ y/\t/ /s } values %$table; | |
map { $_ =~ s/\n\s+/ /; $_ =~ y/\t/ /s } @$p; | |
$table->{'received'} = $p if scalar @$p; | |
return $table; | |
} | |
sub useregex2 { | |
my $heads = shift || return {}; | |
my $field = shift || []; | |
my $table = {}; | |
my $c = {}; | |
if( scalar @$field ) { | |
my $a = sprintf("(%s)", join('|', @$field)); | |
my $b = qr/$a/imx; | |
$c = {$$heads =~ /^$b:[ ]*(.*?)\n(?![\s\t])/gms}; | |
} | |
my $p = [$$heads =~ /^Received:[ ]*(.*?)\n(?![\s\t])/gms]; | |
my $q = {$$heads =~ /^([\w-]+):[ ]*(.*?)\n(?![\s\t])/gms}; | |
map { $table->{ lc $_ } = $q->{ $_ } } (keys %$q, keys %$c); | |
map { $_ =~ s/\n\s+/ /; $_ =~ y/\t/ /s } values %$table; | |
map { $_ =~ s/\n\s+/ /; $_ =~ y/\t/ /s } @$p; | |
$table->{'received'} = $p if scalar @$p; | |
return $table; | |
} | |
sub useregex3 { | |
my $heads = shift || return {}; | |
my $field = shift || []; | |
my $table = {}; | |
my $allheaders = {}; | |
map { $allheaders->{ $_ } = 1 } (@HeaderList, @RFC3834Set, keys %$ExtHeaders); | |
map { $allheaders->{ lc $_ } = 1 } @$field if scalar @$field; | |
my $a = sprintf("(%s)", join('|', sort keys %$allheaders)); | |
my $b = qr/$a/imx; | |
my $p = [$$heads =~ /^Received:[ ]*(.*?)\n(?![\s\t])/gms]; | |
my $q = {$$heads =~ /^$b:[ ]*(.*?)\n(?![\s\t])/gms}; | |
map { $table->{ lc $_ } = $q->{ $_ } } keys %$q; | |
map { $_ =~ s/\n\s+/ /; $_ =~ y/\t/ /s } values %$table; | |
map { $_ =~ s/\n\s+/ /; $_ =~ y/\t/ /s } @$p; | |
$table->{'received'} = $p if scalar @$p; | |
return $table; | |
} | |
my $p = [ | |
emheaders(\$Head, ['X-Neko-Nyaan']), | |
useregex1(\$Head, ['X-Neko-Nyaan']), | |
useregex2(\$Head, ['X-Neko-Nyaan']), | |
useregex3(\$Head, ['X-Neko-Nyaan']), | |
]; | |
for my $e ( @$p ) { | |
isa_ok $e, 'HASH'; | |
for my $ee ( qw|from subject date to return-path message-id| ) { | |
ok length $e->{ $ee }; | |
} | |
isa_ok $e->{'received'}, 'ARRAY'; | |
is scalar @{ $e->{'received'} }, 2; | |
like $e->{'received'}->[0], qr/ for /; | |
ok length $e->{'x-neko-nyaan'}; | |
is $e->{'x-neko-nyaan'}, 2; | |
like $e->{'from'}, qr/MAILER-DAEMON/; | |
like $e->{'subject'}, qr/Undelivered/; | |
like $e->{'date'}, qr/Thu, 29 Apr/; | |
like $e->{'to'}, qr/example[.]com/; | |
like $e->{'return-path'}, qr/<>/; | |
like $e->{'message-id'}, qr/E236/; | |
} | |
printf("Running with Perl %s on %s\n%s\n", $^V, $^O, '-' x 80); | |
cmpthese(6e5, { | |
'emheaders' => sub { emheaders(\$Head, ['X-Neko-Nyaan']) }, | |
'useregex1' => sub { useregex1(\$Head, ['X-Neko-Nyaan']) }, | |
'useregex2' => sub { useregex2(\$Head, ['X-Neko-Nyaan']) }, | |
'useregex3' => sub { useregex3(\$Head, ['X-Neko-Nyaan']) }, | |
} | |
); | |
__END__ | |
Running with Perl v5.30.0 on darwin | |
-------------------------------------------------------------------------------- | |
Rate useregex3 emheaders useregex2 useregex1 | |
useregex3 13780/s -- -41% -43% -52% | |
emheaders 23301/s 69% -- -3% -20% | |
useregex2 24058/s 75% 3% -- -17% | |
useregex1 29000/s 110% 24% 21% -- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment