Created
May 11, 2015 08:41
-
-
Save sunny/b54ff088db470c67e006 to your computer and use it in GitHub Desktop.
Markov chain generator
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Markov Chain generator from a text file. | |
# | |
# Example: | |
# source = Markov::FileSource.new('jackson.txt') | |
# chain = Markov::Chain.new(source) | |
# | |
# 10.times do | |
# puts chain.generate_uniq | |
# end | |
# | |
module Markov | |
class FileSource | |
# Create a source by giving a path to a text file with one line per sentence | |
def initialize(path) | |
@path = path | |
end | |
# Array of phrases from the source file | |
def phrases | |
@phrases ||= File.readlines(@path) | |
.map { |p| p.gsub(/["()*]/, '') } | |
.map { |p| p.gsub(/ +/, ' ').strip } | |
.reject { |p| p.split(' ').size < 2 } | |
.uniq | |
end | |
# Array of words that are possible starting points | |
def first_words | |
@first_words ||= phrases.map { |p| p.split(' ').first } | |
end | |
end | |
class Chain | |
def initialize(source) | |
@source = source | |
end | |
# Hash of words where the key is a word and the value is an array of words | |
# that can follow. | |
# | |
# Example source: | |
# The world is the best place I think | |
# I think this is the place to be | |
# I should be the best in the world | |
# | |
# Example links: | |
# { | |
# "The" => ["world"], | |
# "world" => ["is", nil], | |
# "is" => ["the", "the"], | |
# "the" => ["best", "place", "best", "world"], | |
# "best" => ["place", "in"], | |
# "place" => ["I", "to", nil], | |
# "I" => ["think", "should", "think"], | |
# "think" => ["this", nil], | |
# "this" => ["is"], | |
# "to" => ["be"], | |
# "be" => ["the", nil], | |
# "should" => ["be"], | |
# "the" => ["best"], | |
# "in" => ["the"], | |
# } | |
def links | |
@links ||= begin | |
links = {} | |
@source.phrases.each do |phrase| | |
words = phrase.split(' ') | |
words.each_with_index do |word, index| | |
next_word = words[index + 1] | |
links[word] ||= [] | |
links[word] << next_word | |
end | |
end | |
links | |
end | |
end | |
# Create a random phrase. | |
# Example: | |
# chain.generate # => "I should be the world" | |
# chain.generate # => "The world is the place to be" | |
# chain.generate # => "I think this is the place to be" | |
def generate | |
phrase = [] | |
word = @source.first_words.sample | |
while word | |
phrase << word | |
word = links[word].sample | |
end | |
phrase.join(' ') | |
end | |
# Generate a random phrase that is not included in the original source | |
def generate_uniq | |
phrase = generate | |
@source.phrases.include?(phrase) ? generate_uniq : phrase | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment