Last active
August 29, 2015 14:17
-
-
Save e-monson/7d4b15a9a62399ee0ffb to your computer and use it in GitHub Desktop.
This generates silly text based on a given text file. Based on http://www.cs.princeton.edu/courses/archive/spr05/cos126/assignments/markov.html Usage: ./sillytext.pl <k-gram length> <iterations> <your text file>
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
#use criticism 'brutal'; | |
use File::Open qw(fopen); | |
use Data::Munge; | |
my $k = shift; # k-gram length | |
my $n = shift; # number of iterations | |
my $file = shift; | |
my $long_string = slurp fopen $file; | |
# loop through each k-gram; if no entry exists in %markov_chain, | |
# initialize it to an empty array. In either case, push the next | |
# k-gram into the array. | |
my %markov_chain; # hash of arrays | |
for my $i (0 .. (length $long_string) - 1) { | |
my $curr = substr $long_string, $i, $k; | |
my $next = substr $long_string, $i + 1, $k; | |
$markov_chain{$curr} //= []; | |
push $markov_chain{$curr}, $next; | |
} | |
# Take the first k-gram from the original document. Draw a random | |
# k-gram from this k-gram's entry in %markov_chain. This will be the | |
# k-gram for the next iteration. Print the last character of the | |
# k-gram. | |
sub rand_elem { $_[rand @_] } | |
my $seed = substr $long_string, 0, $k; | |
print $seed; | |
for my $i (0 .. $n) { | |
$seed = rand_elem @{$markov_chain{$seed}}; | |
print substr $seed, -1; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment