Created
January 25, 2018 01:35
-
-
Save ggorlen/dc6a396268c89764d05bf95da9ade5f4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
if (isset($_GET['source'])) { | |
highlight_file($_SERVER['SCRIPT_FILENAME']); | |
exit; | |
} | |
?> | |
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<title>Markov generator</title> | |
<style> | |
body, input { | |
font-family: monospace, 'lucida console', 'courier new'; | |
font-size: 13px; | |
} | |
.main { | |
margin-top: 10px; | |
margin-left: auto; | |
margin-right: auto; | |
width: 500px; | |
padding: 10px; | |
} | |
form table td { | |
padding: 5px; | |
} | |
textarea { | |
width: 100%; | |
height: 150px; | |
} | |
</style> | |
</head> | |
<body> | |
<h2 style='text-align: center;'>Markov generator</h2> | |
<div class='main'> | |
<form name='markovinput' method='POST' action='markov.php' onreset='clearText();'> | |
<table style="width: 100%;"> | |
<tr> | |
<td> | |
Source:<br> | |
<textarea autofocus required id='inputtextarea' name='text'><?php | |
if (!count($_POST) || !isset($_POST['text'])) { | |
// load file | |
$kafkaFile = fopen('pg5200.txt', 'r') or die('File error'); | |
$kafka = fread($kafkaFile, filesize('pg5200.txt')); | |
fclose($kafkaFile); | |
// split on whitespace | |
$kafka = preg_split('/\s+/', $kafka); | |
// disregard headers and footers | |
$kafka = array_slice($kafka, 126, -2974); | |
print implode(' ', $kafka); | |
} | |
else if (isset($_POST['text'])) { | |
print $_POST['text']; | |
} | |
?></textarea> | |
</td> | |
</tr> | |
</table> | |
<table> | |
<tr> | |
<td> | |
Order:<br> | |
<input size=4 name='order' value=2> | |
</td> | |
<td> | |
Output length in words:<br> | |
<input size=8 name='size' value=2000> | |
</td> | |
</tr> | |
<tr> | |
<td> | |
<input type='submit'> <input type='reset'> | |
</td> | |
</tr> | |
<tr> | |
<td> | |
<a href='../'>Return home</a> | |
</td> | |
</tr> | |
</table> | |
</form> | |
<hr> | |
</div> | |
<div class='main'> | |
<?php | |
(function() { | |
/* Markov generator | |
* | |
* resources: | |
* http://stackoverflow.com/questions/4081662/explain-markov-chain-algorithm-in-laymans-terms | |
* http://vivin.net/projects/sulekha/ | |
* https://blog.codinghorror.com/markov-and-you/ | |
* http://greenteapress.com/thinkpython/html/thinkpython014.html | |
* | |
*/ | |
// get user input | |
if (count($_POST) && isset($_POST['order']) && | |
isset($_POST['text']) && isset($_POST['size'])) { | |
$order = $_POST['order']; | |
$size = $_POST['size']; | |
$text = $_POST['text']; | |
} | |
else { return; } | |
// split on whitespace | |
$text = preg_split('/\s+/', $text); | |
// validate input | |
if ($order >= count($text)) { | |
print "Order must be smaller than the number | |
of words in the source text."; | |
return; | |
} | |
// create a map to hold the analysis | |
$map = []; | |
// map prefixes to suffixes | |
for ($i = 0; $i < count($text) - $order; $i++) { | |
$prefix = []; | |
for ($j = 0; $j < $order; $j++) { | |
array_push($prefix, $text[$i + $j]); | |
} | |
// append suffix or add new prefix to the map | |
$prefix = implode(' ', $prefix); | |
if (array_key_exists($prefix, $map)) { | |
array_push($map[$prefix], $text[$i + $j]); | |
} | |
else { | |
$map[$prefix] = [$text[$i + $j]]; | |
} | |
} | |
// grab a random prefix from the map to start with | |
$keys = array_keys($map); | |
$prefix = $keys[array_rand($keys)]; | |
for ($i = 0; $i < $size; $i++) { | |
// retrieve list of suffixes for this prefix or try another | |
while (!array_key_exists($prefix, $map)) { | |
$prefix = $keys[array_rand($keys)]; | |
} | |
$temp = $map[$prefix]; | |
// pick a random suffix that follows this prefix and print | |
$suffix = $temp[array_rand($temp)]; | |
print $suffix . ' '; | |
// create a new prefix by appending the suffix to | |
// the prefix and dropping the first word | |
$prefixArr = explode(' ', $prefix); | |
$prefix = implode(' ', array_slice($prefixArr, 1)) . ' ' . $suffix; | |
} | |
})(); | |
?> | |
</div> | |
<script> | |
function clearText() { | |
document.getElementById('inputtextarea').innerHTML = ''; | |
} | |
<?php | |
if (isset($_POST['size'])) { | |
print "document.forms['markovinput'].elements['size'].value = " . | |
$_POST['size'] . ";\n"; | |
} | |
if (isset($_POST['order'])) { | |
print "document.forms['markovinput'].elements['order'].value = " . | |
$_POST['order'] . ";\n"; | |
} | |
?> | |
</script> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment