Created
May 28, 2012 21:52
-
-
Save xoebus/2821338 to your computer and use it in GitHub Desktop.
*Very* Hack-y Markov Chain Sentence Generator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# Usage: markov.rb [tweet|irc] [FILE] | |
# Trains on the FILE based on the formatting given in the first argument. | |
# | |
# tweet - parses the t twitter gem csv output | |
# irc - parses the standard irssi log | |
# | |
class MarkovChain | |
def initialize(words) | |
@words = Hash.new | |
words.each_with_index do |word, index| | |
current_word = word | |
next_word = words[index + 1] | |
reached_end = index > words.size - 2 | |
add(current_word, next_word) unless reached_end | |
end | |
end | |
def add(word, next_word) | |
@words[word] = Hash.new(0) unless word_present?(word) | |
@words[word][next_word] += 1 | |
end | |
def get(word) | |
return "" unless word_present?(word) | |
followers = @words[word] | |
sum = followers.inject(0) { |sum, hash| sum += hash[1] } | |
random = rand(sum) + 1 | |
partial_sum = 0 | |
next_word = followers.find do |word, count| | |
partial_sum += count | |
partial_sum >= random | |
end.first | |
next_word | |
end | |
def word_present?(word) | |
!!@words[word] | |
end | |
def generate_sentence(start_word, length = 4) | |
sentence = "" | |
word = start_word | |
until sentence.split(" ").size == length | |
sentence << word << " " | |
word = get(word) | |
break if word.empty? | |
end | |
sentence | |
end | |
def random_word | |
@words.keys.sample | |
end | |
end | |
words = [] | |
file = File.read(ARGV[1]) | |
if ARGV[0] == "irc" | |
# IRC | |
file.each_line do |line| | |
line.split(" ")[3..-1].each do |word| | |
words << word | |
end | |
end | |
else | |
# Tweets | |
file.each_line do |line| | |
next if line.size == 1 | |
msg = line.split(",")[3..-1] | |
next if msg.nil? | |
msg.join(",").chomp.split.each { |word| words << word } | |
end | |
words.pop | |
end | |
markov = MarkovChain.new(words) | |
start_word = markov.random_word | |
sentence = markov.generate_sentence(start_word, 12) | |
puts sentence.capitalize |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment