Skip to content

Instantly share code, notes, and snippets.

@arn-e
Created December 2, 2012 06:50
Show Gist options
  • Save arn-e/4187393 to your computer and use it in GitHub Desktop.
Save arn-e/4187393 to your computer and use it in GitHub Desktop.
text_analysis_sandbox
require 'uri'
require 'net/http'
require 'net/https'
require 'json'
require 'tf_idf'
def get_access_token
request, http = set_connection_parameters("https://api.github.com/repos/pengwynn/octokit/issues?state=closed", 443)
response = http.request(request)
parsed = JSON.parse(response.body)
end
def set_connection_parameters(url, port = 80)
uri = URI.parse(url)
uri.port = port
http = Net::HTTP.new(uri.host, uri.port)
if port == 443
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
end
request = Net::HTTP::Get.new(uri.request_uri)
[request, http]
end
def write_file(data)
file = File.open("temp_data_file.txt","w")
data.each_with_index do |i, idx|
# p i
# p i["title"]
file.write("#{idx} ::: #{i["title"]} ::: #{i["body"]}")
end
file.close
end
def read_file
file = File.open("temp_data_file.txt","r")
end
def most_frequent_words
file = read_file
word_hash = Hash.new(0)
file.each_with_index do |i, idx|
words = i.split(' ').delete_if {|i| i == ":::"}
words.each_with_index do |j, idx|
word_hash[j] += 1
end
end
freq = word_hash.sort_by {|key, value| value}
freq.each {|i| p i}
end
def word_importance
file = read_file
document = []
file.each_with_index do |i, idx|
words = i.split(' ').delete_if {|i| i == ":::"}
document << words
end
# document.each {|i| p i}
a = TfIdf.new(document)
combined = Hash.new(0)
a.tf_idf.each do |i|
i.each do |key, value|
(combined[key] = value) if (value > combined[key])
end
end
combined = combined.sort_by {|key, value| value}
combined.each {|i| p i}
end
word_importance
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment