Created
September 13, 2011 16:56
-
-
Save pdxmph/1214303 to your computer and use it in GitHub Desktop.
Clean up twitter handles
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require "rubygems" | |
require "active_record" | |
require "uri" | |
require "yaml" | |
config_file = File.open(File.expand_path('~') + "/etc/smt_config.yml") | |
config = YAML.load(config_file) | |
sites = config["sites"] | |
sites.each do |s| | |
puts "*** #{s[0]}" | |
db = "stage_#{s[1]['db']}" | |
ActiveRecord::Base.establish_connection( | |
:adapter => "mysql", | |
:host => "127.0.0.1", | |
:port => 3334, | |
:username => "", | |
:password => "", | |
:database => db | |
) | |
class Profile < ActiveRecord::Base | |
set_table_name "content_type_profile" | |
end | |
tally = Hash.new(0) | |
Profile.all.each do |p| | |
twitter = p.field_twitter_value | |
next if twitter == nil | |
begin | |
case | |
when twitter.downcase.match(/^(www\.|)twitter.com/) | |
new_twitter = "@" + twitter.downcase.sub(/.+?witter.com\//, "") | |
puts "#{twitter} is in non-URI URL format. Converting to => #{new_twitter}" | |
tally["non-URI URL"] +=1 | |
when twitter.downcase.match(/^http(s:|:).+twitter\.com\/\#\!/) | |
new_twitter = "@" + twitter.sub(/^.+?\#\!(.+?)/, "\0").downcase | |
puts "#{twitter} is in hashbang format. Converting to => #{new_twitter}" | |
tally["hashbang"] +=1 | |
when twitter.match(/^@\w{1,}/) | |
next | |
tally["ok"] +=1 | |
puts "#{twitter} is in @handle format" | |
when twitter.downcase.match(/^http.+?twitter.com\/\w+?$/) | |
new_twitter = "@" + URI.parse(twitter).path.gsub(/^\//,"").downcase | |
puts "#{twitter} is in URL format. Converting to => #{new_twitter}" | |
tally["URL"] +=1 | |
else | |
new_twitter = "@#{twitter.downcase}" | |
puts "#{twitter} is in non-@ handle format. Converting to => #{new_twitter}" | |
tally["non-@ handle"] +=1 | |
end | |
rescue => ex | |
puts ex.message | |
end | |
end | |
puts "\n\n\n*** Tally for #{s[0]}" | |
tally.each do |t,v| | |
puts "#{t}: #{v}" | |
end | |
puts "\n\n-----------------------------------\n\n" | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment