Created
December 2, 2013 16:25
-
-
Save pts/7752110 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!perl -w | |
# let's learn pod! | |
=head1 NAME | |
ladder - Finds a word letter between two words | |
=head1 SYNOPSIS | |
B<ladder> I<word1> I<word2> [I<dictfile>] | |
=head1 DESCRIPTION | |
This is a solution for Expert Perl Quiz of the Week 22 | |
(L<http://perl.plover.com/qotw/e/022>). | |
It builds a word ladder between two words given as arguments. | |
The program needs a I<sorted> plain text dictionary file to build the word ladder. | |
The name of this dictionary can be given as an optional third argument. | |
There is a default dictionary, of which the path can be set in the source. | |
=head1 SWITCHES | |
=over | |
=item B<-l> | |
Do not use capitalized words from the dictionary. | |
If this switch is not used, case differences are ignored. | |
=item B<-u> | |
Do not do the first-letter optimization. | |
The optimization should make the program run faster in most cases, | |
but when it makes it slower, you can disable it with this switch. | |
The optimization relies on that the dictionary is sorted (case-insensitively), | |
so when you use an unsorted dictionary, you should use this switch. | |
=item B<-w> | |
Spit out various debugging information, mostly useful for developping. | |
=back | |
=head1 IMPLEMENTATION | |
This program uses a very simple algorithm: a two-way breadth-first search. | |
Thus, we first read the dictionary. | |
Then we take the graph of words with the edges being those pair of words that | |
differ in one letter only. | |
Then we do a breadth-first search from both endpoints simultanously. | |
If we find a word that is reachable from both endpoints, we return success. | |
If the search from either endpoint ends, we return a failure. | |
I first wanted to implement a more complicated (and hopefully faster) | |
search algorithm, but when I implemented this one, | |
it turned out that the search itself is much faster than reading the dictionary, | |
so I rather optimized I/O. | |
Thus, the program now tries to read only those parts of the dictionary that start | |
with the same letter as one of the given words, and try to find a word ladder | |
among those. | |
Furthermore, there is yet one more level of optimization concerning the first | |
two letters of the words. | |
=head1 EXAMPLE | |
You run: | |
perl ladder find lose | |
You get: | |
find | |
fine | |
fise | |
Lise | |
lose | |
=head1 AUTHOR | |
L<ambrus@...> | |
=cut | |
# settings | |
our $default_dictname = "/home/ambrus/a/qotw/e22/words-b"; | |
# 0. Declarations | |
use warnings; use strict; | |
use IO::Handle; | |
# 1. Parse argv | |
our($dictname, $noproper, $word1o, $word2o, $debug, $unoptimize, $unoptimize2); | |
{ | |
use Getopt::Std; | |
getopts "luUw", \my %switch; | |
($noproper, $debug, $unoptimize, $unoptimize2) = @switch{qw(l w u U)}; | |
2<=@ARGV && @ARGV<=3 or die "Usage: ladder -d dictfile word1 word2"; | |
($word1o, $word2o, $dictname) = @ARGV; | |
defined($dictname) or $dictname = $default_dictname; | |
} | |
# 2. Locale-dependent functions | |
{ | |
use locale; | |
my $re = qr/\A[[:alpha:]]+\z/; | |
$noproper and $re = qr/\A[[:lower:]]+\z/; | |
sub checkword { | |
my($w) = @_; | |
$w=~$re ? 1 : 0; | |
} | |
# You might want to treat some letters the same (especially in French). | |
# In that case, change the following function accordingly. The function must | |
# not change the lenght of the word or else the program will break. | |
sub normalize { | |
lc $_[0]; | |
} | |
} | |
# 3. Do some simple checking on the words | |
our($length, $word1, $word2); | |
sub addword; | |
do { | |
checkword($word1o) && checkword($word2o) or | |
die "one of the given words contain illegal characters"; | |
length($word1o)==length($word2o) or | |
die "it is impossible to build a word ladder, as the two words are of different length\n"; | |
$length = length($word1o); | |
$word1 = normalize($word1o); | |
addword $word1, $word1o; | |
$word2 = normalize($word2o); | |
addword $word2, $word2o; | |
$word1 eq $word2 and do { | |
print $word1, "\n"; | |
exit 0; | |
}; | |
}; | |
# 4. Read and process the dictionary | |
# this should be really simple, but the code somehow became a mess. | |
our %word; # this hash contains the original forms of a word | |
our $dict; # a filehandle | |
sub addword; | |
do { | |
open $dict, "<", $dictname or die qq{cannot open dictionary file "$dictname": $!}; | |
$debug and warn qq{dictionary file "$dictname" open}; | |
}; | |
sub addword { # $w should be eq normalize($o) | |
my($w, $o) = @_; | |
defined($word{$w}) or | |
$word{$w} = $o; | |
} | |
sub readdict { | |
no locale; | |
my($until) = @_; | |
my($w, $l); | |
$! = 0; | |
while (defined($l = <$dict>)) { | |
chomp $l; | |
length($l)==$length or next; | |
checkword $l or next; | |
$w = normalize($l); | |
$w lt $until or last; | |
addword $w, $l; | |
} | |
error $dict and die qq{error readig dictionary file "$dictname": $!}; # see "http://www.perlmonks.com/?node_id=386996" | |
$debug and warn "there are ", (0+keys(%word)), " words in the hash now"; | |
} | |
# 5. Optimization: not to read the whole word list unless neccessary | |
our $phase; | |
sub seekpart; sub makepatts; sub dosearch; sub seekpart; | |
sub readpart { | |
my($prefix) = @_; | |
defined($prefix) or $prefix = ""; | |
seekpart $prefix; | |
readdict $prefix . "\xff\xff\xff"; # yes, this is a kludge, but I hope no word will have a triple \xff in it. | |
} | |
sub seekpart { | |
my($prefix) = @_; | |
my($l, $w); | |
my($a, $b) = (0, -s $dict); | |
defined($b) or die qq{error fstating the dictionary file "$dictname": $!}; | |
until ($b-$a<=2048) { | |
my $c = int(($a + $b)/2); | |
seek $dict, $c, 0 or | |
die qq{error seeking the dictionary file "$dictname" to $c during bsearch: $!}; | |
$! = 0; | |
$l = <$dict>; # get aligned to a newline | |
chomp($l = <$dict>) until | |
checkword $l or !defined($l); | |
error $dict and | |
die qq{error reading the dictionary file "$dictname" during bsearch: $!}; | |
$w = defined($l) ? normalize($l) : ("\xff" x 6); | |
($w gt $prefix) ? $b : $a = $c; | |
} | |
$debug and warn qq{bsearch done ("$prefix" $a)}; | |
seek $dict, $a, 0 or | |
die qq{error seeking the dictionary file "dictname" to $a, during bsearch, final: $!}; | |
$a>0 and do { # IMPORTANT! | |
<$dict>; | |
error $dict and | |
die qq{error reading the dictionary file "$dictname" during bsearch, final align: $!} | |
}; | |
} | |
our %readpart; | |
sub condreadpart { | |
my($l) = @_; | |
unless ($readpart{$l}) { | |
$debug and warn qq{processing prefix "$l" from the dictionary}; | |
readpart $l; | |
$readpart{$l} = 1; | |
} | |
} | |
ALL: { | |
if (!$unoptimize) { | |
if (!$unoptimize2) { | |
$phase = 2; | |
condreadpart substr($word1, 0, 2); | |
condreadpart substr($word2, 0, 2); | |
condreadpart substr($word1, 0, 1) . substr($word2, 1, 1); | |
condreadpart substr($word2, 0, 1) . substr($word1, 1, 1); | |
makepatts; | |
dosearch; | |
} | |
$phase = 1; | |
condreadpart substr($word1, 0, 1); | |
condreadpart substr($word2, 0, 1); | |
makepatts; | |
dosearch; | |
} | |
$phase = 0; | |
$debug and warn "processing the whole dictionary"; | |
readpart ""; | |
makepatts; | |
close $dict or die qq{error closing dictionary file "$dictname": $!}; | |
dosearch; | |
} | |
# 6. Create helper table so that we can find adjacent words faster | |
our %patt; | |
sub makepatts { | |
%patt = (); | |
for my $w (keys %word) { | |
for my $n (0..length($w)-1) { | |
my $p = substr($w, 0, $n) . "." . substr($w, $n+1); | |
push @{$patt{$p}}, $w; | |
} | |
} | |
$debug and warn "there are ", (0+keys(%word)), " patterns in the hash now"; | |
} | |
# 7. Do the actual search | |
our @qu; # the queue needed for the bsearch | |
our %seen; # what nodes were seen in the bsearch and from where | |
sub visit; sub visit1; sub success; sub neighbours; sub failure; | |
sub cleansearch { | |
%seen = (); | |
} | |
sub dosearch { | |
PHASE: { | |
cleansearch; | |
@qu = ( | |
[$word1, 1, undef], | |
1, | |
[$word2, 2, undef], | |
2, | |
); | |
while (my $e = shift @qu) { | |
if (ref $e) { | |
visit @$e; | |
} else { | |
push @qu, $e; | |
!ref($qu[0]) and failure ($e); | |
$debug and warn "$e\n"; | |
} | |
} | |
die "internal error: the markers have somehow disappeared"; | |
} | |
} | |
sub visit { | |
my($w, $dir, $from) = @_; | |
my $s = $seen{$w}[0]; | |
$s && $s!=$dir and | |
success $w, $dir, $from; | |
$s and return; | |
$seen{$w} = [$dir, $from]; | |
neighbours $w, sub | |
{ push @qu, [$_[0], $dir, $w]; }; | |
} | |
# 8. Find the neighbours of a given word | |
sub neighbours { | |
my($w, $cb) = @_; | |
for my $n (0..length($w)-1) { | |
my $p = substr($w, 0, $n) . "." . substr($w, $n+1); | |
@{$patt{$p}}<=1 and next; | |
for my $x (@{$patt{$p}}) { | |
&$cb($x) unless $x eq $w; | |
} | |
} | |
} | |
# 9. Output the result, either word ladder or failure | |
sub failure { | |
$debug and warn "phase $phase failed, word$_[0] was more isolated,"; | |
$phase or do { | |
die "no word ladder found\n"; | |
}; | |
no warnings "exiting"; | |
next PHASE; # :-) | |
} | |
sub printladder1; sub printladder2; | |
sub success { | |
$debug and warn "about to print solution"; | |
my($pn, $dir, $po) = @_; | |
my($p1, $p2) = $dir==1 ? ($po, $pn) : ($pn, $po); | |
printladder1 $p1; | |
printladder2 $p2; | |
no warnings "exiting"; | |
last ALL; | |
} | |
sub printladder1 { | |
my $r = $word{$_[0]}; | |
my $n = $seen{$_[0]}[1]; | |
$n and printladder1 $n; | |
print $r, "\n"; | |
} | |
sub printladder2 { | |
my $r = $word{$_[0]}; | |
my $n = $seen{$_[0]}[1]; | |
print $r, "\n"; | |
$n and printladder2 $n; | |
} | |
__END__ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment