Skip to content

Instantly share code, notes, and snippets.

@TheAthlete
Last active June 11, 2020 16:28
Show Gist options
  • Save TheAthlete/01e0d82f2b592a238d4eb80537d26944 to your computer and use it in GitHub Desktop.
Save TheAthlete/01e0d82f2b592a238d4eb80537d26944 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
use strict;
use warnings;
use feature 'say';
use utf8;
use open qw/:std :utf8/;
use FindBin qw/$Bin/;
use File::Slurper qw/read_lines/;
use DDP;
use List::Util qw/any/;
# TransliterateFilter
my @whitelist_urls = map { s/^\s+//g; s/\s+$//g; $_ } read_lines "$Bin/antimat-data/url.txt";
sub is_whitelist_url($) {
my $url = shift;
return any { $url =~ /^\Q$_\E/ } @whitelist_urls;
}
my $url_re = qr{
\b
(
(https?://)?
[^,\s()<>]+
\.
(?:
(?:[\w\d]+)
|
(?:
[^,[[:punct:]]\s]
|
/
)+
)
)
}x;
sub transliterate_process($) {
my $input = shift;
my @urls;
$input =~ s/$url_re/
if (is_whitelist_url($1)) { push @urls, $1; '!@#$%' . $#urls; } else { $1 }
/exg;
my %gost = (yo => "ё", ch => "ч", sh => "ш", ya => "я", Yo => "Ё", Ch => "Ч", Sh => "Ш", Ya => "Я", ye => "ие", YE => "Є",);
$input =~ s/$_/$gost{$_}/g for keys %gost;
$input =~ tr/abvgdejziklmnoprstyfhcuABVGDEJZIKLMNOPRSTYFHCUwW/абвгдежзиклмнопрстуфхцуАБВГДЕЖЗИКЛМНОПРСТЮФХЦУвВ/;
$input =~ s{!@#\$%(\d+)}{ $urls[$1] ? $urls[$1] : $& }eg if @urls;
return $input;
}
# say "$_ : " . is_whitelist_url($_) for qw/mail.ru abcde super.man/;
# CensorFilter
sub censor_process($) {
my $input = shift;
my @censor_patterns = map { s/^\s+//g; s/\s+$//g; $_ } read_lines "$Bin/antimat-data/pattern.txt";
for (@censor_patterns) {
my $pattern = substr(s/(.)/$1+\\s*/gr, 0, -3);
$input =~ s/$pattern/ '*' x length($_) /eigx;
}
return $input;
}
# CommonFilter
sub common_process($$) {
my ($filename, $input) = @_;
my @common_patterns = map { s/^\s+//g; s/\s+$//g; $_ } read_lines $filename;
for (@common_patterns) {
$input =~ s/\Q$_\E/ '*' x length($&) /eg
}
return $input;
}
sub phone_process($) {
my $input = shift;
my $phone_fn = "$Bin/antimat-data/phone.txt";
common_process($phone_fn, $input);
}
say phone_process censor_process transliterate_process "abcde mail.ru def";
say phone_process censor_process transliterate_process "abcde ya.ru def";
say phone_process censor_process transliterate_process "Опа-опа, срослась pizda и жопа, ya.ru этого не может быть, между ними должен быть хххууйййййй. P.S. Позвони мне по номеру +79261111111 и лайкни меня на mail.ru.";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment