Created
May 29, 2011 00:56
-
-
Save muffinista/997369 to your computer and use it in GitHub Desktop.
A Horoscope Randomizer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# | |
# original source for markov code: http://blog.segment7.net/articles/2006/02/25/markov-chain | |
# | |
require 'rubygems' | |
require 'optparse' | |
require 'sqlite3' | |
@db = SQLite3::Database.new("horoscopes.db" ) | |
@db.results_as_hash = true | |
@db.execute("CREATE TABLE IF NOT EXISTS horoscopes (id INTEGER, data TEXT)") | |
@db.execute("DELETE FROM horoscopes") | |
def starter?(x) | |
tmp = x[0,1] | |
tmp == tmp.capitalize || tmp == '"' | |
end | |
def get_starter | |
# our seed phrase | |
$caps[rand($caps.length)] | |
end | |
def fix_stupid_quotes!(s) | |
s.gsub!(/\x82/,',') | |
s.gsub!(/\x84/,',,') | |
s.gsub!(/\x85/,'...') | |
s.gsub!(/\x88/,'^') | |
s.gsub!(/\x89/,'o/oo') | |
s.gsub!(/\x8b/,'<') | |
s.gsub!(/\x8c/,'OE') | |
s.gsub!(/\x91|\x92/,"'") | |
s.gsub!(/\x93|\x94/,'"') | |
s.gsub!(/\x95/,'*') | |
s.gsub!(/\x96/,'-') | |
s.gsub!(/\x97/,'--') | |
s.gsub!(/\x98/,'~') | |
s.gsub!(/\x99/,'TM') | |
s.gsub!(/\x9b/,'>') | |
s.gsub!(/\x9c/,'oe') | |
s.gsub!(/\(/, '') | |
s.gsub!(/\)/, '') | |
end | |
args = { | |
# amount of state (order-k) | |
:phrase_length => 3, | |
:max_words => 1000, | |
:max_output => 15000, | |
:char_limit => 140, | |
:cache_file => "", | |
:write_file => "" | |
} | |
opts = OptionParser.new | |
opts.on('-l', '--length [MATCH]') { |m| args[:phrase_length] = m.to_i } | |
opts.on('-w', '--write [MATCH]') { |m| args[:write_file] = m } | |
opts.on('-w', '--words [MATCH]') { |m| args[:max_words] = m.to_i } | |
opts.on('-o', '--output [MATCH]') { |m| args[:max_output] = m.to_i } | |
opts.on('-c', '--chars [MATCH]') { |m| args[:char_limit] = m.to_i } | |
opts.on('-f', '--file [MATCH]') { |m| args[:cache_file] = m } | |
#opts.on('-f', '--chars [MATCH]') { |m| args[:char_limit] = m.to_i } | |
#opts.on('-v', '--verbose') { @options.verbose = true } | |
opts.parse!(ARGV) | |
raise ArgumentError, 'phrase length too short' if args[:phrase_length] < 1 | |
if args[:cache_file] != "" | |
data = File.open(args[:cache_file], "rb") { |f| Marshal.load(f) } | |
$phrases = data[:phrases] | |
$caps = data[:caps] | |
else | |
words = [] | |
ARGF.each_line { |line| | |
fix_stupid_quotes!(line) | |
words.push(*line.scan(/\S+/)) | |
} | |
words.reject! { |word| | |
word.include?("http://") or | |
word.include?(".com") or | |
word.include?(".org") or | |
word.include?(".net") or | |
word.include?("www") or | |
word.include?("@") or | |
word.include?(".htm") | |
} | |
# phrase => next-word possibilities | |
# $phrases = Hash.new { |h,k| h[k] = [] } | |
$phrases = {} | |
# Track capitalized words -- we'll always start off with one | |
$caps = [] | |
# build up a list of phrases and their next-word possibilities (k-grams) | |
words.each_with_index do |word, index| | |
phrase = words[index, args[:phrase_length]] # current phrase | |
$phrases[phrase] = [] unless $phrases.has_key?(phrase) | |
$phrases[phrase] << words[index + args[:phrase_length]] # next possibility | |
$caps << phrase if starter?(phrase.first) | |
end | |
if args[:write_file] != "" | |
File.open(args[:write_file], "wb") do |file| | |
file.puts Marshal.dump({:phrases => $phrases, :caps => $caps}) | |
end | |
end | |
end | |
args[:max_output].times do |i| | |
phrase = get_starter | |
output = [] | |
graph_break = rand(3) + 3 | |
current_sentence = 0 | |
args[:max_words].times do | |
# grab all possibilities for our state | |
options = $phrases[phrase] | |
tmpword = phrase.shift | |
break if !tmpword || output.join(' ').size + tmpword.size > args[:char_limit].to_i | |
if [".", "?", "!"].include?(tmpword[-1,1]) | |
current_sentence += 1 | |
if current_sentence > graph_break | |
graph_break = rand(3) + 3 | |
current_sentence = 0 | |
tmpword << "\n\n" | |
else | |
tmpword < " " | |
end | |
end | |
# add the first word to our output and discard | |
output << tmpword | |
# select at random and add it to our phrase | |
phrase.push options[rand(options.length)] unless options.nil? | |
# the last phrase of the input text will map to an empty array of | |
# possibilities so exit cleanly. | |
break if phrase.compact.empty? # all out of words | |
end | |
# print out our output | |
result = output.join(' ').lstrip.rstrip.gsub("\n", " ") | |
# don't bother if we didn't get much data back | |
if result && result.size > 80 | |
# if you want to output html snippets | |
# aFile = File.new("output/dump#{i}.html", "w") | |
# aFile.write(result.gsub(/^(.*)$/, '<p>\1</p>')) | |
# aFile.close | |
# if you want to store in db | |
@db.execute("INSERT INTO horoscopes (id, data) VALUES(?, ?)", i, result) | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Good vibes in the air will help make today's Full Moon pleasant. You feel genuinely sympathetic and sensitive to the needs of partners and close friends. Gosh, golly, gee! | |
Today, the only Full Moon in your sign all year takes place. Fortunately for you, other planetary aspects nicely smooth everything over. (Looking good!) | |
This is an interesting day for love affairs, romance and sweet flirtations. Similarly, the arts and sports also are pleasantly blessed. Enjoy good times with children. | |
Even though there is some tension with today's Full Moon, it's still a very pleasant day with family members. People are gentle and kind to each other. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment