Created
November 9, 2011 04:22
-
-
Save jberger/1350371 to your computer and use it in GitHub Desktop.
Perl Regex for Phil
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use Data::Dumper; | |
my $text = <<'END'; | |
John has Acquired Immunodeficiency Synodrom (AIDS), while Steve has Human Immunodeficiency Synodrom (HIV). | |
END | |
my @split = $text =~ / | |
( #capture (full name of disease) | |
(?: #group | |
\b #word boundary | |
\p{Lu}{1} #exactly one unicode uppercase letter | |
\w* #any additional word characters | |
\b #word boundary | |
\s*? #(minimally) any whitespace | |
)+ #one or more occurances of group (end group) | |
) #end capture | |
\s* #any whitespace | |
\( #open paren | |
(\w+) #capture one or more word chars (abbreviation) | |
\) #close paren | |
/gx; | |
die "Uneven List" if @split % 2; | |
my %abbr; | |
while ( @split ) { | |
my $value = shift @split; | |
my $key = shift @split; | |
if (exists $abbr{$key}) { | |
warn "Overwriting abbreviation $key! Was: $abbr{$key}.\n"; | |
} | |
$abbr{$key} = $value; | |
} | |
print Dumper \%abbr; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment