zed9h · July 17, 2009 04:55
diff --git a/doublets.pl b/doublets.pl
 #!/usr/bin/perl
 use strict;
 use warnings;
 my $VERSION=2;

 # TODO append results to file (doublet-w1-w2.txt:$0 @ARGV\n...)

 printf "Lewis Carroll's Doublet Automated v%d\n", $VERSION;
 print "$0 @ARGV\n";

 use Getopt::Std;
 my %weight = (
 	other=>1,
 	verb =>5,
 	plural=>2,
 #	gender=>3, # TODO avoid gender switch, mark someway on dictionary load (_g => other_gender)
 #	stats=>1,  # TODO build statistics from sample.txt (machado de assis texts)
 );
 getopt('VOPGS', \%weight);
 my ($start, $end) = @ARGV;

 die "word length does not match" unless length $start == length $end;

 # DICTIONARY

 my %word = ();

 my $accent = do "accent.data";
 # { 'á'=>'a','ä'=>'a','à'=>'a','â'=>'a','ã'=>'a', 'å'=>'a',
 sub norm($)
 {
 	local $_ = shift;
 	s{[\x7f-\xff]}{exists $accent->{$&} ? $accent->{$&} : $&}ge;
 	lc $_
 }

 if($weight{other}) {
 	open WORD, "wordlist.txt" or die "$!";
 	# abdicaç:ão,ões
 	# abdome:,s
 	# Uruaçu
 	# únic:a,as,o,os
 	while(<WORD>) {
 		chomp;
 		next if /^[A-Z]/;
 		@_ = split /:/;
 		if(@_ == 2) {
 			foreach (split /,/, $_[1]) {
 				$_ = $_[0].$_;
 				my $__ = norm $_;
 				if(/s$/) {
 					next if not $weight{plural};
 					$word{$__}->{_p}++;
 				}
 				$word{$__}->{$_}++;
 			}
 		} else {
 			$word{norm($_)}->{$_}++;
 		}
 	}
 	close WORD;
 }

 if($weight{verb}) {
 	open VERB, "verblist.txt" or die "$!";
 	# confluía:confluías:confluía:confluíamos:confluíeis:confluíam
 	# confluir:confluindo:confluído
 	# ser:sendo:sido
 	# aceito
 	while(<VERB>) {
 		chomp;
 		foreach (split /:/) {
 			$_ = norm($_);
 			$word{$_}->{_v}++;
 			$word{$_}->{$_}++;
 		}
 	}
 	close VERB;
 }

 printf "Dictionary loaded with %d words.\n", scalar keys %word;

 foreach ($start, $end) {
 	die "$_ not on dictionary" unless exists $word{norm($_)};
 }

 # GRAPH

 use Graph;
 my $g = Graph::Undirected->new;

 my $n = 0;
 foreach my $w1 (sort keys %word) {
 	next unless length $start == length $w1;
 	for(my $i=0; $i < length $w1; $i++) {
 		my @w2 = (substr($w1, 0, $i), substr($w1,$i,1), substr($w1, $i+1));
 		foreach my $c ('a'..'z') {
 			next if $c eq $w2[1];
 			my $w2 = $w2[0].$c.$w2[2];
 			next unless exists $word{$w2};
 			my $h = 
 				exists $word{$w1}->{_v} || exists $word{$w2}->{_v} ? $weight{verb} : 
 				exists $word{$w1}->{_p} || exists $word{$w2}->{_p} ? $weight{plural} : 
 #				!$w2[2] && ( 
 #					($c eq 'a' && $w2[1] eq 'o') || 
 #					($c eq 'o' && $w2[1] eq 'a') ) ? $weight{gender} :
 				$weight{other};
 			$g->add_weighted_edge($w1,$w2,$h);
 		}
 	}
 	printf "\r%d words linked", ++$n;
 }
 print ".\n";

 my @path = $g->SP_Dijkstra(norm($start), norm($end));
 print "Normal solution: ", join(",", @path), "\n";
 print "Dictionary Solutions:\n";

 foreach my $w (@path) {
 	print join(" | ", sort grep {!/^_/} keys %{$word{$w}}), "\n";
 }
	#!/usr/bin/perl
	use strict;
	use warnings;
	my $VERSION=2;

	# TODO append results to file (doublet-w1-w2.txt:$0 @ARGV\n...)

	printf "Lewis Carroll's Doublet Automated v%d\n", $VERSION;
	print "$0 @ARGV\n";

	use Getopt::Std;
	my %weight = (
	other=>1,
	verb =>5,
	plural=>2,
	# gender=>3, # TODO avoid gender switch, mark someway on dictionary load (_g => other_gender)
	# stats=>1, # TODO build statistics from sample.txt (machado de assis texts)
	);
	getopt('VOPGS', \%weight);
	my ($start, $end) = @ARGV;

	die "word length does not match" unless length $start == length $end;

	# DICTIONARY

	my %word = ();

	my $accent = do "accent.data";
	# { 'á'=>'a','ä'=>'a','à'=>'a','â'=>'a','ã'=>'a', 'å'=>'a',
	sub norm($)
	{
	local $_ = shift;
	s{[\x7f-\xff]}{exists $accent->{$&} ? $accent->{$&} : $&}ge;
	lc $_
	}

	if($weight{other}) {
	open WORD, "wordlist.txt" or die "$!";
	# abdicaç:ão,ões
	# abdome:,s
	# Uruaçu
	# únic:a,as,o,os
	while(<WORD>) {
	chomp;
	next if /^[A-Z]/;
	@_ = split /:/;
	if(@_ == 2) {
	foreach (split /,/, $_[1]) {
	$_ = $_[0].$_;
	my $__ = norm $_;
	if(/s$/) {
	next if not $weight{plural};
	$word{$__}->{_p}++;
	}
	$word{$__}->{$_}++;
	}
	} else {
	$word{norm($_)}->{$_}++;
	}
	}
	close WORD;
	}

	if($weight{verb}) {
	open VERB, "verblist.txt" or die "$!";
	# confluía:confluías:confluía:confluíamos:confluíeis:confluíam
	# confluir:confluindo:confluído
	# ser:sendo:sido
	# aceito
	while(<VERB>) {
	chomp;
	foreach (split /:/) {
	$_ = norm($_);
	$word{$_}->{_v}++;
	$word{$_}->{$_}++;
	}
	}
	close VERB;
	}

	printf "Dictionary loaded with %d words.\n", scalar keys %word;

	foreach ($start, $end) {
	die "$_ not on dictionary" unless exists $word{norm($_)};
	}

	# GRAPH

	use Graph;
	my $g = Graph::Undirected->new;

	my $n = 0;
	foreach my $w1 (sort keys %word) {
	next unless length $start == length $w1;
	for(my $i=0; $i < length $w1; $i++) {
	my @w2 = (substr($w1, 0, $i), substr($w1,$i,1), substr($w1, $i+1));
	foreach my $c ('a'..'z') {
	next if $c eq $w2[1];
	my $w2 = $w2[0].$c.$w2[2];
	next unless exists $word{$w2};
	my $h =
	exists $word{$w1}->{_v} \|\| exists $word{$w2}->{_v} ? $weight{verb} :
	exists $word{$w1}->{_p} \|\| exists $word{$w2}->{_p} ? $weight{plural} :
	# !$w2[2] && (
	# ($c eq 'a' && $w2[1] eq 'o') \|\|
	# ($c eq 'o' && $w2[1] eq 'a') ) ? $weight{gender} :
	$weight{other};
	$g->add_weighted_edge($w1,$w2,$h);
	}
	}
	printf "\r%d words linked", ++$n;
	}
	print ".\n";

	my @path = $g->SP_Dijkstra(norm($start), norm($end));
	print "Normal solution: ", join(",", @path), "\n";
	print "Dictionary Solutions:\n";

	foreach my $w (@path) {
	print join(" \| ", sort grep {!/^_/} keys %{$word{$w}}), "\n";
	}