Created
November 20, 2012 14:30
-
-
Save anastasop/4118242 to your computer and use it in GitHub Desktop.
A toy program that generates sentences in english where the first word in one letter long, the second word 2 letters and so on.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
An Amazing sentence in English from | |
https://plus.google.com/photos/117176908342196183611/albums/5812811361700087857/5812811362489240738 | |
> I do not know where family doctors acquired illegibly | |
> perplexing handwriting; nevertheless, extraordinary | |
> pharmaceutical intellectuality counterbalancing | |
> indecipherability transendentalizes intercommunication's | |
> incomprehensibleness | |
The first word is one letter long, the second word is two letters, | |
the third word three letters long and it goes on like this | |
until the twentieth word | |
This is a java program that tries to generate such sentences. | |
It read a large text file and it builds a graph, where the nodes | |
are words and edges go from a word to another iff i) the second word | |
has exactly one letter more and ii) there is a place in the text file | |
that it follows the first word. | |
After constructing the graph it traverses it to generate sentences. | |
This version generates all possible sentences, which is a large amount | |
of data. Maybe i should add a bit randomness in it to make it more | |
practical. | |
I tried it with the Odyssey and the Bible from the project gutenberg. | |
The best i could get was 9/10-word sentences, readable but pretty meaningless | |
Odyssey: I to and till night sprang towards Penelope therefore | |
Bible: O ye his seed shall thrust another brethren therefore understand | |
Enjoy | |
Spyros http://twitter.com/anastasop | |
*/ | |
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.FileNotFoundException; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.io.UnsupportedEncodingException; | |
import java.util.ArrayList; | |
import java.util.HashMap; | |
import java.util.HashSet; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.Set; | |
class Word { | |
String word; | |
Set<Word> followers = new HashSet<Word>(); | |
Word(String s) { | |
word = s; | |
} | |
void addFollower(Word w) { | |
followers.add(w); | |
} | |
int length() { | |
return word.length(); | |
} | |
@Override | |
public int hashCode() { | |
return word.hashCode(); | |
} | |
@Override | |
public boolean equals(Object obj) { | |
return word.equals(obj); | |
} | |
} | |
public class IncreasingBlabla { | |
public static void main(String[] args) { | |
if (args.length != 2) { | |
System.err.println("usage: blabla <min sentence len> <file>"); | |
System.exit(2); | |
} | |
int minLength = Integer.valueOf(args[0]); | |
String text = ""; | |
try { | |
File f = new File(args[1]); | |
byte[] content = new byte[(int)f.length()]; | |
InputStream ist = new FileInputStream(f); | |
ist.read(content); // lazy, but it's OK for local files | |
ist.close(); | |
text = new String(content, "UTF-8"); | |
} catch (Exception e) { | |
System.err.println("error: " + e.getMessage()); | |
System.exit(2); | |
} | |
Map<String, Word> words = new HashMap<String, Word>(); | |
Word start = new Word(""); | |
words.put("", start); | |
Word prevWord = start; | |
String[] textTokens = text.split("\\s"); | |
for (String textToken: textTokens) { | |
String[] wordTokens = textToken.split("\\p{Punct}"); | |
for (String wordToken: wordTokens) { | |
Word currWord = words.get(wordToken); | |
if (currWord == null) { | |
currWord = new Word(wordToken); | |
words.put(wordToken, currWord); | |
} | |
if (currWord.length() - prevWord.length() == 1) { | |
prevWord.addFollower(currWord); | |
} | |
prevWord = currWord; | |
} | |
} | |
for (Word startWord: words.values()) { | |
if (startWord.length() == 1) { | |
traverseGraph(startWord, minLength, new ArrayList<Word>()); | |
} | |
} | |
} | |
static void traverseGraph(Word w, int minLength, List<Word> currSentence) { | |
currSentence.add(w); | |
if (w.followers.size() == 0) { | |
if (currSentence.size() >= minLength) { | |
for (Word cw: currSentence) { | |
System.out.print(cw.word); | |
System.out.print(" "); | |
} | |
System.out.println(""); | |
} | |
} else { | |
for (Word nextWord: w.followers) { | |
traverseGraph(nextWord, minLength, currSentence); | |
} | |
} | |
currSentence.remove(currSentence.size() - 1); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment