Skip to content

Instantly share code, notes, and snippets.

@rtanglao
Created March 15, 2010 06:20
Show Gist options
  • Save rtanglao/332578 to your computer and use it in GitHub Desktop.
Save rtanglao/332578 to your computer and use it in GitHub Desktop.
gettags.rb
#!/usr/bin/env ruby
require 'json'
require 'net/http'
require 'pp'
require 'Time'
require 'tempfile'
STOP_WORDS = ["thunderbird", "email", "e-mail", "mail", "thunderbird3", "tbird", "tbird3", "tb", "emails", "mails",
"e-mails", "tb3", "tb2", "support", "help", "error", "support", "please", "new", "ok", "message", "messages",
"thanks", "got", "page", "two", "etc", "etc", "e.g.", "i.e", "fix", "computer", "seems", "right", "like",
"fine", "also", "first", "fix", "worked", "something", "trying", "even", "much", "every", 'client',
"different", "may", "since", "default", "problem", "many", "hi", "mozilla", "bug", "feature", "already",
"unable", "using", "use", "one", "anyone", "however", "anything", "wrong", "now", "think", "found",
"see", "still", "want", "might", "answer", "going", "question", "else", "used", "user", "appears",
"line", "problems", "questions", "works", "thank", "works", "really", "great", "good", "well",
"everything", "mac", "lot", "nothing", "nothing", "correct", "firefox", "people", "just", "get",
"set", "apple", "thunderbird 3", "bugzilla" ]
def randomFileNameSuffix (numberOfRandomchars)
s = ""
numberOfRandomchars.times { s << (65 + rand(26)) }
s
end
def getResponse(url)
http = Net::HTTP.new("api.getsatisfaction.com",80)
url = "/" + url
resp, data = http.get(url, nil)
if resp.code != "200"
printf(STDERR,"Error:%d\n", resp.code)
return ""
end
result = JSON.parse(data)
return result
end
if ARGV.length < 6
puts "usage: #{$0} yyyy mm dd yyyy mmm dd"
exit
end
metrics_start = Time.utc(ARGV[0], ARGV[1], ARGV[2], 0, 0)
metrics_start -= 1
metrics_stop = Time.utc(ARGV[3], ARGV[4], ARGV[5], 23, 59)
metrics_stop += 1
topic_page = 0
end_program = false
spreadsheet = {}
while true
topic_page += 1
skip = false
topic_url = "products/mozilla_thunderbird/topics.json?sort=recently_active&page=" << "%d" % topic_page << "&limit=30"
printf(STDERR, "topic_url")
begin
topics = getResponse(topic_url)
rescue JSON::ParserError
printf(STDERR, "Parser error in topic:%s\n", topic_url)
skip = true
end
if skip
skip = false
next
end
topics["data"].each do|topic|
last_active_at = Time.parse(topic["last_active_at"])
last_active_at = last_active_at.utc
printf(STDERR, "TOPIC last_active_at:%s\n", last_active_at)
if (last_active_at <=> (metrics_start + 1)) == -1
printf(STDERR, "ending program\n")
end_program = true
break
end
printf(STDERR, "START*** of topic\n")
PP::pp(topic,$stderr)
printf(STDERR, "\nEND*** of topic\n")
get_tags_str = "topics/" + topic["slug"] + "/tags.json"
PP::pp(get_tags_str, $stderr)
tags = getResponse(get_tags_str)
num_tags = tags["total"]
printf(STDERR, "#of tags:%d\n", num_tags)
if num_tags != 0
tags["data"].each do|tag|
printf(STDERR, "START*** of tag\n")
PP::pp(tag, $stderr)
printf(STDERR, "\nEND*** of tag\n")
tag_name = tag["name"]
tag_is_stop_word = false
STOP_WORDS.each do|stop_word|
if stop_word == tag_name
tag_is_stop_word = true
break
end
end
if !tag_is_stop_word
if spreadsheet.has_key?(tag_name)
spreadsheet[tag_name] += 1
else
spreadsheet[tag_name] = 1
end
printf(STDOUT, "%s ", tag_name.gsub(/ /, '~'))
end # each tag
printf(STDOUT, "\n")
end # if num_tags = 0
end
end
if end_program
break
end
end
csv_file = File.new("gettags."+ARGV[0]+ARGV[1]+ARGV[2]+ARGV[3]+ARGV[4]+ARGV[5]+"."+randomFileNameSuffix(4) + ".csv", "w")
spreadsheet.sort{|tag,count| tag[1]<=>count[1]}.each { |row|
csv_file.puts "#{row[0]}, #{row[1]}"
}
csv_file.close
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment