rtanglao · March 15, 2010 06:20
diff --git a/gettags.rb b/gettags.rb
 #!/usr/bin/env ruby
 require 'json'
 require 'net/http'
 require 'pp'
 require 'Time'
 require 'tempfile'

 STOP_WORDS = ["thunderbird", "email", "e-mail", "mail", "thunderbird3", "tbird", "tbird3", "tb", "emails", "mails", 
 "e-mails", "tb3", "tb2", "support", "help", "error", "support", "please", "new", "ok", "message", "messages", 
 "thanks", "got", "page", "two", "etc", "etc", "e.g.", "i.e", "fix", "computer", "seems", "right", "like", 
 "fine", "also", "first", "fix", "worked", "something", "trying", "even", "much", "every", 'client',
 "different", "may", "since", "default", "problem", "many", "hi", "mozilla", "bug", "feature", "already", 
 "unable", "using", "use", "one", "anyone", "however", "anything", "wrong", "now", "think", "found", 
 "see", "still", "want", "might", "answer", "going", "question", "else", "used", "user", "appears", 
 "line", "problems", "questions", "works", "thank", "works", "really", "great", "good", "well", 
 "everything", "mac", "lot", "nothing", "nothing", "correct", "firefox", "people", "just", "get", 
 "set", "apple", "thunderbird 3", "bugzilla" ]

 def randomFileNameSuffix (numberOfRandomchars)
  s = ""
  numberOfRandomchars.times { s << (65 + rand(26))  }
  s
 end

 def getResponse(url)

  http = Net::HTTP.new("api.getsatisfaction.com",80)

  url = "/" + url 

  resp, data = http.get(url, nil)
   
  if resp.code != "200"
    printf(STDERR,"Error:%d\n", resp.code)
    return ""
  end

  result = JSON.parse(data)
  return result
 end

 if ARGV.length < 6
  puts "usage: #{$0} yyyy mm dd yyyy mmm dd"
  exit
 end

 metrics_start = Time.utc(ARGV[0], ARGV[1], ARGV[2], 0, 0)
 metrics_start -= 1
 metrics_stop =  Time.utc(ARGV[3], ARGV[4], ARGV[5], 23, 59)
 metrics_stop += 1
 topic_page = 0
 end_program = false
 spreadsheet = {}

 while true
  topic_page += 1
  skip = false
  topic_url = "products/mozilla_thunderbird/topics.json?sort=recently_active&page=" << "%d" % topic_page << "&limit=30"
  printf(STDERR, "topic_url")
  begin
    topics = getResponse(topic_url)
  rescue JSON::ParserError
    printf(STDERR, "Parser error in topic:%s\n", topic_url)
    skip = true
  end
  if skip
    skip = false
    next
  end
  topics["data"].each do|topic|

    last_active_at = Time.parse(topic["last_active_at"])
    last_active_at = last_active_at.utc
    printf(STDERR, "TOPIC last_active_at:%s\n", last_active_at)

    if (last_active_at <=> (metrics_start + 1)) == -1 
      printf(STDERR, "ending program\n")
      end_program = true
      break
    end

    printf(STDERR, "START*** of topic\n")
    PP::pp(topic,$stderr)
    printf(STDERR, "\nEND*** of topic\n")

    get_tags_str = "topics/" + topic["slug"] + "/tags.json"
    PP::pp(get_tags_str, $stderr)

    tags = getResponse(get_tags_str)

    num_tags = tags["total"]
    printf(STDERR, "#of tags:%d\n", num_tags)
    if num_tags != 0
      tags["data"].each do|tag|

        printf(STDERR, "START*** of tag\n")
        PP::pp(tag, $stderr)
        printf(STDERR, "\nEND*** of tag\n")

        tag_name = tag["name"]

        tag_is_stop_word = false
        STOP_WORDS.each do|stop_word|
          if stop_word == tag_name 
            tag_is_stop_word = true
            break
          end
        end
        if !tag_is_stop_word
          if spreadsheet.has_key?(tag_name)
            spreadsheet[tag_name] += 1
          else
            spreadsheet[tag_name] = 1
          end 
          printf(STDOUT, "%s ", tag_name.gsub(/ /, '~'))
        end # each tag
        printf(STDOUT, "\n")
      end # if num_tags = 0
    end
  end 
  if end_program
    break
  end
 end

 csv_file = File.new("gettags."+ARGV[0]+ARGV[1]+ARGV[2]+ARGV[3]+ARGV[4]+ARGV[5]+"."+randomFileNameSuffix(4) + ".csv", "w")

 spreadsheet.sort{|tag,count| tag[1]<=>count[1]}.each { |row|
 csv_file.puts "#{row[0]}, #{row[1]}"
 }
 csv_file.close
	#!/usr/bin/env ruby
	require 'json'
	require 'net/http'
	require 'pp'
	require 'Time'
	require 'tempfile'

	STOP_WORDS = ["thunderbird", "email", "e-mail", "mail", "thunderbird3", "tbird", "tbird3", "tb", "emails", "mails",
	"e-mails", "tb3", "tb2", "support", "help", "error", "support", "please", "new", "ok", "message", "messages",
	"thanks", "got", "page", "two", "etc", "etc", "e.g.", "i.e", "fix", "computer", "seems", "right", "like",
	"fine", "also", "first", "fix", "worked", "something", "trying", "even", "much", "every", 'client',
	"different", "may", "since", "default", "problem", "many", "hi", "mozilla", "bug", "feature", "already",
	"unable", "using", "use", "one", "anyone", "however", "anything", "wrong", "now", "think", "found",
	"see", "still", "want", "might", "answer", "going", "question", "else", "used", "user", "appears",
	"line", "problems", "questions", "works", "thank", "works", "really", "great", "good", "well",
	"everything", "mac", "lot", "nothing", "nothing", "correct", "firefox", "people", "just", "get",
	"set", "apple", "thunderbird 3", "bugzilla" ]

	def randomFileNameSuffix (numberOfRandomchars)
	s = ""
	numberOfRandomchars.times { s << (65 + rand(26)) }
	s
	end

	def getResponse(url)

	http = Net::HTTP.new("api.getsatisfaction.com",80)

	url = "/" + url

	resp, data = http.get(url, nil)

	if resp.code != "200"
	printf(STDERR,"Error:%d\n", resp.code)
	return ""
	end

	result = JSON.parse(data)
	return result
	end

	if ARGV.length < 6
	puts "usage: #{$0} yyyy mm dd yyyy mmm dd"
	exit
	end

	metrics_start = Time.utc(ARGV[0], ARGV[1], ARGV[2], 0, 0)
	metrics_start -= 1
	metrics_stop = Time.utc(ARGV[3], ARGV[4], ARGV[5], 23, 59)
	metrics_stop += 1
	topic_page = 0
	end_program = false
	spreadsheet = {}

	while true
	topic_page += 1
	skip = false
	topic_url = "products/mozilla_thunderbird/topics.json?sort=recently_active&page=" << "%d" % topic_page << "&limit=30"
	printf(STDERR, "topic_url")
	begin
	topics = getResponse(topic_url)
	rescue JSON::ParserError
	printf(STDERR, "Parser error in topic:%s\n", topic_url)
	skip = true
	end
	if skip
	skip = false
	next
	end
	topics["data"].each do\|topic\|

	last_active_at = Time.parse(topic["last_active_at"])
	last_active_at = last_active_at.utc
	printf(STDERR, "TOPIC last_active_at:%s\n", last_active_at)

	if (last_active_at <=> (metrics_start + 1)) == -1
	printf(STDERR, "ending program\n")
	end_program = true
	break
	end

	printf(STDERR, "START*** of topic\n")
	PP::pp(topic,$stderr)
	printf(STDERR, "\nEND*** of topic\n")

	get_tags_str = "topics/" + topic["slug"] + "/tags.json"
	PP::pp(get_tags_str, $stderr)

	tags = getResponse(get_tags_str)

	num_tags = tags["total"]
	printf(STDERR, "#of tags:%d\n", num_tags)
	if num_tags != 0
	tags["data"].each do\|tag\|

	printf(STDERR, "START*** of tag\n")
	PP::pp(tag, $stderr)
	printf(STDERR, "\nEND*** of tag\n")

	tag_name = tag["name"]

	tag_is_stop_word = false
	STOP_WORDS.each do\|stop_word\|
	if stop_word == tag_name
	tag_is_stop_word = true
	break
	end
	end
	if !tag_is_stop_word
	if spreadsheet.has_key?(tag_name)
	spreadsheet[tag_name] += 1
	else
	spreadsheet[tag_name] = 1
	end
	printf(STDOUT, "%s ", tag_name.gsub(/ /, '~'))
	end # each tag
	printf(STDOUT, "\n")
	end # if num_tags = 0
	end
	end
	if end_program
	break
	end
	end

	csv_file = File.new("gettags."+ARGV[0]+ARGV[1]+ARGV[2]+ARGV[3]+ARGV[4]+ARGV[5]+"."+randomFileNameSuffix(4) + ".csv", "w")

	spreadsheet.sort{\|tag,count\| tag[1]<=>count[1]}.each { \|row\|
	csv_file.puts "#{row[0]}, #{row[1]}"
	}
	csv_file.close
No results found