Created
January 9, 2014 17:32
-
-
Save Pablo-Merino/8338312 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Summarizer | |
def initialize(text) | |
@text = text | |
end | |
def summarize | |
summary = [] | |
paragraphs = split_paragraphs | |
ranks = get_ranks | |
paragraphs.each do |p| | |
sentence = best_sentence(p, ranks) | |
summary << sentence if sentence | |
end | |
summary.join("\n") | |
end | |
private | |
def split_paragraphs | |
@text.split("\n\n") | |
end | |
def split_sentences(content = @text) | |
content.gsub("\n", ". ").split(". ") | |
end | |
def intersect(sentence1, sentence2) | |
split1 = sentence1.split(" ") | |
split2 = sentence2.split(" ") | |
return 0 if (split1.length + split2.length) == 0 | |
((split1 & split2).length) / ((split1.length + split2.length) / 2).to_f | |
end | |
def format(sentence) | |
sentence.gsub(/\W+/, '') | |
end | |
def get_ranks | |
sentences = split_sentences | |
sentences_length = sentences.length | |
values = [] | |
sentences_length.times.each do |i| | |
values << [] | |
sentences_length.times.each do |x| | |
values[i] << 0 | |
end | |
end | |
sentences_length.times.each do |i| | |
sentences_length.times.each do |x| | |
values[i][x] = intersect(sentences[i], sentences[x]) | |
end | |
end | |
sentences_hash = {} | |
sentences_length.times.each do |i| | |
score = 0 | |
sentences_length.times.each do |x| | |
next if i == x | |
score += values[i][x] | |
end | |
sentences_hash[format(sentences[i])] = score | |
end | |
sentences_hash | |
end | |
def best_sentence(paragraph, sentences_hash) | |
sentences = split_sentences(paragraph) | |
return "" if sentences.length < 2 | |
best = "" | |
max_value = 0 | |
sentences.each do |s| | |
stripped = format(s) | |
if stripped | |
if sentences_hash[stripped] > max_value | |
max_value = sentences_hash[stripped] | |
best = s | |
end | |
end | |
end | |
best | |
end | |
end | |
# USAGE | |
text = "Twitter's first-party Android app has been steadily getting better over the years, but it has generally maintained the same look and feel, with four navigation tabs located at the top of your timeline. That's going to change drastically in the near future however, as a completely redesigned version of the app is now available to those that have signed up for beta testing.\n\nThe app is listed as version 5.0, and completely ditches the old tabbed layout for a sliding panel menu. It's still possible to swipe between columns for Home, Notifications, Messages, and your profile, but instead of only four columns, the app now has seven. The three new columns are dedicated to Activity, Trending, and finding new people to follow on the service." | |
puts Summarize.new(text).summarize |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment