Skip to content

Instantly share code, notes, and snippets.

@ja-k-e
Last active August 29, 2015 14:21
Show Gist options
  • Select an option

  • Save ja-k-e/f80c1e34571ac3b5cf90 to your computer and use it in GitHub Desktop.

Select an option

Save ja-k-e/f80c1e34571ac3b5cf90 to your computer and use it in GitHub Desktop.
Random English "Word" Generation
<main>
<div class="word-main">
<h1 id="word"></h1>
<h2><span id="length"></span></h2>
<ul id="charlist"></ul>
<button id="new_word">New</button>
</div>
<div class="description">
<h3>Random "Word" Generation</h3>
<p>First, a random word length is generated based on the frequency with which any word length from 1-19 occurs in the English language. Letters are then generated based on the frequency with which they appear in the English language.</p>
<p>There are NO relative factors in the calculation (a one letter word needing to be a vowel, it being impossible for the same letter to appear 3 or more times in a row, or letters appearing more frequently next to other letters). This is simply to demonstrate the probability of English words being formed randomly based on their presence in the English language.</p>
<p>Beneath the word is its character count and the frequency of that word length in the English language. Beneath that is the breakdown for each letter with the frequency of that letter's occurance in the English language.</p>
<h4>If you manage to randomly generate an actual word, be sure to throw it in the comments as it is a fairly rare occurance!</h4>
</div>
</main>
<aside>
<ul id="history"></ul>
</aside>

Random English "Word" Generation

I was curious about the probability of real words being randomly generated based on word length frequency and character frequency in the English language...so I made this. It validates the existence of randomly generated words in a scrabble dictionary.

It's just a rough concept for now, but my hope is that it will stay relatively "unintelligent" and deal with the probability of randomly forming order instead of becoming something I try to guide into making words consistently.

A Pen by Jake Albaugh on CodePen.

License.

#
# randomly forming an english "word"
#
# loading dictionary
# The dictionary lookup object
dict = []
# Do a jQuery Ajax request for the text dictionary
$.get "https://s3-us-west-2.amazonaws.com/s.cdpn.io/111863/dictionary.txt", (txt) ->
# dictionary taken from http://zyzzyva.net/lexicons/WWF.txt
# Get an array of all the words
words = txt.split("\n")
# And add them as properties to the dictionary lookup
# This will allow for fast lookups later
i = 0
while i < words.length
dict.push words[i]
i++
# dictionary loaded
# console.log 'dictionary loaded'
# go!
writeWord(randomWord())
return
# random letter based on english language frequency taken from https://gist.github.com/rorsach/4054073
randomAtoZ = ((lookup) ->
->
random = Math.random() * 100000
char = undefined
prev = 0
for char of lookup
charfreq = lookup[char]
chance = (charfreq - prev) / 1000 + '%'
return {char, charfreq, chance} if random < charfreq
prev = charfreq
return
)({
a: 8167, b: 9659, c: 12441, d: 16694,
e: 29396, f: 31624, g: 33639, h: 39733,
i: 46699, j: 46852, k: 47624, l: 51649,
m: 54055, n: 60804, o: 68311, p: 70240,
q: 70335, r: 76322, s: 82649, t: 91705,
u: 94463, v: 95441, w: 97801, x: 97951,
y: 99925, z: 100000
})
# empty word frequency array
wordfreq = []
randomWordLength = () ->
# generating word frequency array with given percentages
total = 0
# word length frequency in the english language from 1-19 characters
# taken from http://www.ravi.io/language-word-lengths
percentages = [0.1, 0.6, 2.6, 5.2, 8.5, 12.2, 14.0, 14.0, 12.6, 10.1, 7.5, 5.2, 3.2, 2.0, 1.0, 0.6, 0.3, 0.2, 0.1]
# building array of percentages
for percent in percentages
amount = total + ((percent / 100) * 100000)
wordfreq.push(amount)
total = amount
# getting random number
random = Math.random() * 100000
length = undefined
# lookup object
lookup = {
1: wordfreq[0], 2: wordfreq[1], 3: wordfreq[2], 4: wordfreq[3],
5: wordfreq[4], 6: wordfreq[5], 7: wordfreq[6], 8: wordfreq[7],
9: wordfreq[8], 10: wordfreq[9], 11: wordfreq[10], 12: wordfreq[11],
13: wordfreq[12], 14: wordfreq[13], 15: wordfreq[14], 16: wordfreq[15],
17: wordfreq[16], 18: wordfreq[17], 19: wordfreq[18]
}
prev = 0
for length of lookup
lengthfreq = lookup[length]
chance = Math.round(lengthfreq - prev) / 1000 + '%'
return {length, chance} if random < lengthfreq
prev = lengthfreq
return
# building the word
randomWord = () ->
# get word length
length = randomWordLength()
length_i = length.length
chance = length.chance
# initiate word
word = ''
# character frequency map
char_map = []
# build the word
for i in [1..length_i]
# random character
character = randomAtoZ()
word += character.char
char_map.push character
# check if it is a word
if $.inArray(word, dict) > -1
alert '"' + word + '" is a word in the dictionary. Congrats.'
isword = true
else
isword = false
return {length_i,chance,word,isword,char_map}
# history
word_history = []
# write word data to dom
writeWord = (generated_word) ->
document.getElementsByTagName('body')[0].className = ''
if generated_word.isword == true
isword = 'isword'
else
isword = 'isnotword'
word = document.getElementById 'word'
word.innerHTML = generated_word.word
document.getElementsByTagName('body')[0].className = isword
length = document.getElementById 'length'
length.innerHTML = generated_word.length_i + ' (' + generated_word.chance + ')'
charlist = document.getElementById 'charlist'
charlist_html = ''
for char in generated_word.char_map
charlist_html += '<li><span class="char">' + char.char + '</span> <span class="charchance">' + char.chance + '</span></li>'
charlist.innerHTML = charlist_html
word_history.unshift {word: generated_word.word, isword: generated_word.isword}
history_string = ''
for w in word_history
if w.isword == true
isword = 'isword'
else
isword = 'isnotword'
history_string += '<li class="' + isword + '">' + w.word + '</li>'
document.getElementById('history').innerHTML = history_string
# button click
document.getElementById('new_word').onclick = () ->
writeWord(randomWord())
@import url(http://fonts.googleapis.com/css?family=Roboto+Condensed:300,700);
$black: #3f3f3f;
$primary: #1ABC9C;
$secondary: complement($primary);
$primary-in: #FFF;
$light: lighten($primary,50%);
$lighter: lighten($primary,55%);
$light2: lighten($secondary,50%);
$lighter2: lighten($secondary,55%);
body {
font-family: 'Roboto Condensed', sans-serif;
color: $black;
&.isword {
h1, h2 {
color: $primary;
}
button {
background-color: $primary;
&:hover, &:active {
background-color: lighten($primary,5%);
}
}
#charlist {
li {
color: $primary;
background-color: $lighter;
&:nth-child(odd) {
background-color: $light;
}
}
}
}
&.isnotword {
h1, h2 {
color: $secondary;
}
button {
background-color: $secondary;
&:hover, &:active {
background-color: lighten($secondary,5%);
}
}
#charlist {
li {
color: $secondary;
background-color: $lighter2;
&:nth-child(odd) {
background-color: $light2;
}
}
}
}
}
main {
width: 100%;
margin: 0 auto;
box-sizing: border-box;
padding: 0 1em;
.description {
bottom: 0;
box-sizing: border-box;
width: 60%;
font-size: 0.875em;
margin: 4em auto 0;
min-width: 420px;
}
}
aside {
width: 20%;
position: fixed;
top: 0;
right: -15%;
bottom: 0;
opacity: 0.2;
transition:
right 400ms ease-in-out,
opacity 400ms ease-in-out;
cursor: pointer;
&:hover {
right: 0;
opacity: 1;
}
background-color: $black;
color: $primary-in;
box-sizing: border-box;
overflow: scroll;
#history {
width: 100%;
list-style: none;
margin: 0;
padding: 0;
li {
padding: 4px 12px;
&.isword {
background-color: $primary;
}
&.isnotword {
background-color: $secondary;
}
}
}
}
button {
-webkit-appearance: none;
font-size: 1.4em;
background-color: $black;
color: $primary-in;
border: none;
font-family: 'Roboto Condensed', sans-serif;
font-weight: 300;
padding: 0.5em 1em;
&:hover, &:active {
background-color: lighten($black,5%);
}
}
.word-main {
text-align: center;
margin-top: 2em;
}
h1, h2 {
margin: 0;
text-align: center;
}
h1 {
font-weight: 700;
font-size: 4em;
}
h2 {
font-size: 2em;
font-weight: 300;
}
h3 {
font-size: 1.4em;
margin: 0;
font-weight: 700;
}
h4 {
font-weight: 700;
}
p {
margin: 0.5em 0 0;
line-height: 1.4;
}
$char-di: 60px;
#charlist {
display: table;
width: 50%;
max-width: $char-di * 19;
min-width: $char-di * 5;
height: $char-di * 2;
margin: 2em auto;
line-height: 1.4;
padding: 0;
li {
display: table-cell;
font-size: 0.875em;
height: $char-di;
width: $char-di;
vertical-align: middle;
box-sizing: border-box;
color: $primary;
background-color: $lighter;
padding: 0.25em 0.5em;
opacity: 0;
animation: letter 200ms forwards;
&:nth-child(odd) {
background-color: $light;
}
@for $i from 1 through 19 {
&:nth-child(#{$i}) {
animation-delay: $i * 50ms;
}
}
span { display: block; }
.char { font-weight: 700; }
.charchance { font-weight: 300; }
}
}
@keyframes letter {
0% { opacity: 0 }
100% { opacity: 1 }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment