Created
July 20, 2009 11:14
-
-
Save timfel/150260 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Just a simple script to grab AddressBook cards from the social network StudiVZ (which is still lacking an API to do just this) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'studi_crawler' | |
def getpassword username | |
print "Password for "+username+": " | |
system "stty -echo" | |
pw = gets.delete("\n") | |
system "stty echo" | |
puts | |
pw | |
end | |
def getuser | |
print "Username: " | |
user = gets.delete("\n") | |
puts | |
user | |
end | |
username = getuser | |
password = getpassword(username) | |
StudiCrawler.new(username, password).export_friends "studi" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby1.9 | |
require 'mechanize' | |
require 'vcard' | |
class StudiCrawler | |
attr_accessor :login_url | |
@login_url = 'https://secure.studivz.net/Login' | |
def initialize mail,pw | |
@agent = WWW::Mechanize.new | |
@page = @agent.get login_url | |
login mail,pw | |
end | |
# tries to fill in the forms and login | |
def login mail,pw | |
forms = @page.forms.first | |
forms.email=mail | |
forms.password=pw | |
@page = forms.click_button | |
end | |
# Goes to a page | |
# This can be either a full url, a sub-url | |
# (which is then appended) or a friends name, | |
# for which an initial url-lookup is done | |
def goto newPage | |
unless friends[newPage.to_sym].nil? | |
uri = friends[newPage.to_sym].uri | |
end | |
uri ||= newPage | |
uri = @page.uri+uri unless uri.to_s =~ /^http/u | |
@page = @agent.get uri | |
@page | |
end | |
# lazy accessor | |
def friends | |
crawl_friends if @friends.nil? | |
@friends | |
end | |
def friend name | |
@friends[name.to_sym] | |
end | |
# Accessing details. Lazily tries to | |
# retrieve them using the get_DETAIL method | |
[:image, :birthday].each do |item| | |
define_method(item) do |name| | |
f = friend(name) | |
if f.send(item).nil? | |
curpage = @page | |
unless @page.uri.to_s =~ Regexp.new(f.uri.to_s+"$") | |
goto f.uri | |
end | |
send(("get_"+item.to_s).to_sym, f) | |
@page = curpage | |
end | |
f.send(item) | |
end | |
end | |
def get_friends | |
@friends ||= {} | |
#+TODO : refactor this out! | |
afterIdx = @page.links.index(@page.link_with(:text => 'Alle Freunde')) | |
curfriends = @page.links[afterIdx+1..-3].select do |l| | |
((@page.links[@page.links.index(l)+2].text.include? 'Freunde') and (l.uri.to_s =~ /\/Profile\//)) | |
end | |
curfriends.each do |item| | |
@friends[item.text.to_sym] = VCard.new | |
@friends[item.text.to_sym].uri = item.uri | |
@friends[item.text.to_sym].name = item.text | |
end | |
@friends | |
end | |
def get_birthday friend | |
page = @page.link_with(:text => /^[0-9][0-9]\.[0-9][0-9]\.[1-9][0-9][0-9][0-9]$/u) | |
friend.birthday = page.text unless page.nil? | |
end | |
def extract_image_uri(id) | |
img = @page.search(id) | |
unless img.nil? | |
img.to_a.compact! | |
unless img.first.nil? | |
return img.first.attributes['src'] | |
end | |
end | |
nil | |
end | |
def get_image(friend) | |
friend.image = extract_image_uri("#profileImage") | |
end | |
def get_next_symbol | |
@page.links.select do |l| | |
begin | |
l.uri.to_s =~ /\/p\/2$/u | |
rescue URI::InvalidURIError | |
end | |
end.compact.last.text | |
end | |
def fill_details | |
get_friends.each do |name,values| | |
oldpage = @page | |
goto values.uri | |
birthday name | |
image name | |
print "." | |
@page = oldpage | |
end | |
end | |
def crawl_friends | |
oldpage = @page | |
# make sure we're on the friends list | |
goto @page.link_with(:text => 'Meine Freunde').uri | |
next_page_sym = get_next_symbol | |
fill_details | |
while [email protected]_with(:text => next_page_sym).nil? | |
puts "Next page!" | |
goto @page.link_with(:text => next_page_sym).uri | |
fill_details | |
end | |
@page = oldpage | |
end | |
def fill_img_uri | |
extract_image_uri("#PhotoContainer") | |
end | |
def crawl_album link | |
oldpage = @page | |
goto link.as_uri | |
fill_img_uri | |
while [email protected]_with(:text => "nächstes Foto >>").nil? | |
puts "Next Picture!" | |
goto @page.link_with(:text => "nächstes Foto >>").uri | |
fill_img_uri | |
end | |
@page = oldpage | |
end | |
def export_friends filename | |
friends.each do |name,vCard| | |
vCard.export(filename) | |
end | |
end | |
end | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'ostruct' | |
require 'base64' | |
require 'curb' | |
# This is a very simple implementation to | |
# create vCards. Details are simply added | |
# directly through the OpenStruct interface. | |
# On exporting the vCard, for each mapping | |
# that exists in the class-dictionary the | |
# named property in the OpenStruct is checked | |
# and a formatted output hopefully adhering to | |
# the vCard standard V3 is generated | |
class VCard < OpenStruct | |
def export filename | |
filename += ".vcf" | |
File.open(filename, 'a') do |f| | |
f << "BEGIN:VCARD\nVERSION:3.0\n" | |
@@mapping.each do |field,lambda| | |
if respond_to? field | |
f << @@mapping[field].call(self.send(field)) | |
end | |
end | |
f << "END:VCARD\n" | |
end | |
end | |
@@mapping = { | |
:name => lambda { |n| | |
s = n.split | |
"N:"+s[1..-1].join(" ")+";"+s.first+";;;\n"+"FN:"+n+"\n" }, | |
:phone => lambda { |n| | |
"TEL:"+n+"\n" }, | |
:birthday => lambda { |n| | |
"BDAY;value=date:"+n.split(".").reverse.join+"\n" }, | |
:image => lambda { |n| | |
img = Base64.encode64(Curl::Easy.perform(n).body_str).gsub(/\s+/s, "\n ") | |
"PHOTO;BASE64:\n " + img.gsub(/ $/, "") | |
} | |
} | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment