Created
March 10, 2015 01:01
-
-
Save irisfofs/4194865b8a55af9c14aa to your computer and use it in GitHub Desktop.
A patch for pisg to improve its smiley parsing with a lot more smileys.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From aa0d9705564580ad6059e9e9e793a25764dd3a9f Mon Sep 17 00:00:00 2001 | |
From: Andrew <[email protected]> | |
Date: Mon, 9 Mar 2015 20:53:59 -0400 | |
Subject: [PATCH] Updated smiley parsing code | |
--- | |
pisg/modules/Pisg/Parser/Logfile.pm | 51 ++++++++++++++++++++++++++++++----- | |
1 file changed, 44 insertions(+), 7 deletions(-) | |
diff --git a/pisg/modules/Pisg/Parser/Logfile.pm b/pisg/modules/Pisg/Parser/Logfile.pm | |
index 63c756c..ad5d913 100644 | |
--- a/pisg/modules/Pisg/Parser/Logfile.pm | |
+++ b/pisg/modules/Pisg/Parser/Logfile.pm | |
@@ -393,6 +393,7 @@ sub _parse_file | |
if ($saying !~ /[a-z]/o && $saying =~ /[A-Z]/o) { | |
# Ignore single smileys on a line. eg. '<user> :P' | |
+ # TODO: Use the same smiley check as our smiley counter below | |
if ($saying !~ /^[8;:=][ ^-o]?[)pPD\}\]>]$/o) { | |
$stats->{allcaps}{$nick}++; | |
push @{ $lines->{allcaplines}{$nick} }, $line; | |
@@ -405,18 +406,54 @@ sub _parse_file | |
} | |
# Who smiles the most? | |
- my $e = '[8;:=%]'; # eyes | |
- my $n = '[-oc*^]'; # nose | |
- # smileys including asian-style (^^ ^_^' ^^; \o/) | |
- if ($saying =~ /(>?$e'?$n[\)pPD\}\]>]|[\(\{\[<]$n'?$e<?|[;:][\)pPD\}\]\>]|\([;:]|\^[_o-]*\^[';]|\\[o.]\/)/o) { | |
+ my $e = '[8;:=%Xx]'; # eyes | |
+ my $n = '[-oc*^v]'; # nose | |
+ my $nm_a = '[-_o.~3]'; # eastern nose / mouth middles | |
+ my $a_sw = '[\'";]'; # eastern face modifiers, like sweat drops | |
+ | |
+ # TODO: allow matching of lines with URLs and smileys in them | |
+ my $url_regex = qr/\w+:\/\//; | |
+ | |
+ my $wholeword_start = '(?<![\w?&])'; # don't allow in middle of word or as query strings | |
+ my $wholeword_end = '(?![\w])'; # prevent matching things that start `words`: (xD) will match xD, not (x | |
+ | |
+ # [\)pPD\}\]>]|[\(\{\[<] | |
+ my $bi_hm = '*'; # happy mouths that work either way | |
+ my $hm = '[DPp\)\]\}>3'.$bi_hm.']'; # Directional happy mouths (LTR -->) | |
+ my $rhm = '[CcL\(\[\{<'.$bi_hm.']'; # Directional happy mouths (RTL <--) | |
+ my $he_a = '\^'; # happy eastern eyes | |
+ | |
+ # Happy faces, smileys including eastern-style (^^ ^_^' ^^; \o/) | |
+ # Assumed here that tears are happy tears. | |
+ if ($saying =~ qr/$wholeword_start(>?$e'?$n?$hm|$rhm$n?'?$e<?|($he_a)$nm_a*\g{-1}$a_sw?|\\[o.]\/)$wholeword_end/) { | |
$stats->{smiles}{$nick}++; | |
$stats->{smileys}{$1}++; | |
$stats->{smileynicks}{$1} = $nick; | |
} | |
- # asian frown: ;_; | |
- if ($saying =~ /($e'?$n[\(\[\\\/\{|]|[\)\]\\\/\}|]$n'?$e|[;:][\(\/]|[\)D]:|;_+;|T_+T|-[._]+-)/o and | |
- $saying !~ /\w+:\/\//o) { | |
+ # Ambiguous faces. Neither happy nor sad. | |
+ # There are no tears because tears + ambiguous = sad. | |
+ # Ambiguous mouths would be like. :I :O :o :0 :B :F :U :u :V :v | |
+ my $bi_am = 'FIOo0TUuVvn'; # ambiguous mouths | |
+ my $am = '[BF'.$bi_am.']'; # directional ambiguous mouths (LTR -->) | |
+ my $ram = '['.$bi_am.']'; # directional ambiguous mouths (RTL <--) | |
+ my $o_eyes = '[oO0]'; # for mismatched faces | |
+ my $ae_a = '[.oO09]'; # ambiguous eastern eyes. | |
+ # 'o' is a tricky one; its meaning changes with mouth (o_o, o3o o~o) but I'm not dealing with that | |
+ | |
+ if ($saying =~ qr/$wholeword_start(>?$e$n?$am|$ram$n?$e<?|($ae_a)(?!\g{-1})$nm_a+\g{-1}$a_sw?|($o_eyes)(?!\g{-1})$nm_a+$o_eyes)$wholeword_end/) { | |
+ $stats->{smileys}{$1}++; | |
+ $stats->{smileynicks}{$1} = $nick; | |
+ } | |
+ | |
+ # Sad faces | |
+ # X x \ / L S s ( { [ < C c | |
+ my $bi_sm = 'XxSs\/|\\'; # sad mouths that work either way | |
+ my $sm = '[CcL\(\[\{<'.$bi_sm.']'; # directional sad mouths (LTR -->) | |
+ my $rsm = '[\)\]\}>3'.$bi_sm.']'; # directional sad mouths (RTL <--) | |
+ my $se_a = '[-;TQ><]'; # sad eastern eyes | |
+ # Frowny faces. >mouth< is hardcoded at end because it's weird | |
+ if ($saying =~ qr/$wholeword_start(>?$e'?$n?$sm|$rsm$n?'?$e<?|($se_a)(?!\g{-1})$nm_a+\g{-1}$a_sw?|>$nm_a+<)$wholeword_end/) { | |
$stats->{frowns}{$nick}++; | |
$stats->{smileys}{$1}++; | |
$stats->{smileynicks}{$1} = $nick; | |
-- | |
1.7.10.4 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment