jberger · November 9, 2011 04:22
diff --git a/regex.pl b/regex.pl
 #!/usr/bin/env perl

 use Data::Dumper;

 my $text = <<'END';
 John has Acquired Immunodeficiency Synodrom (AIDS), while Steve has Human Immunodeficiency Synodrom (HIV).
 END

 my @split = $text =~ /
  (		#capture (full name of disease)
    (?:			#group
      \b			#word boundary
      \p{Lu}{1}			#exactly one unicode uppercase letter
      \w*			#any additional word characters
      \b			#word boundary
      \s*?			#(minimally) any whitespace
    )+			#one or more occurances of group (end group)
  )		#end capture
  \s*		#any whitespace
  \(		#open paren
  (\w+)		#capture one or more word chars (abbreviation)
  \)		#close paren 
 /gx;

 die "Uneven List" if @split % 2;

 my %abbr;
 while ( @split ) {
  my $value = shift @split;
  my $key = shift @split;

  if (exists $abbr{$key}) {
    warn "Overwriting abbreviation $key! Was: $abbr{$key}.\n";
  }

  $abbr{$key} = $value;
 }

 print Dumper \%abbr;
	#!/usr/bin/env perl

	use Data::Dumper;

	my $text = <<'END';
	John has Acquired Immunodeficiency Synodrom (AIDS), while Steve has Human Immunodeficiency Synodrom (HIV).
	END

	my @split = $text =~ /
	( #capture (full name of disease)
	(?: #group
	\b #word boundary
	\p{Lu}{1} #exactly one unicode uppercase letter
	\w* #any additional word characters
	\b #word boundary
	\s*? #(minimally) any whitespace
	)+ #one or more occurances of group (end group)
	) #end capture
	\s* #any whitespace
	\( #open paren
	(\w+) #capture one or more word chars (abbreviation)
	\) #close paren
	/gx;

	die "Uneven List" if @split % 2;

	my %abbr;
	while ( @split ) {
	my $value = shift @split;
	my $key = shift @split;

	if (exists $abbr{$key}) {
	warn "Overwriting abbreviation $key! Was: $abbr{$key}.\n";
	}

	$abbr{$key} = $value;
	}

	print Dumper \%abbr;
No results found