Created
June 17, 2013 15:01
-
-
Save surrealdetective/5797539 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rubygems' | |
require 'nokogiri' | |
require 'open-uri' | |
require 'sqlite3' | |
def getname(webpage) | |
name = webpage.search('h4.ib_main_header').children | |
name.inner_text | |
end | |
def getbiography(webpage) | |
biography = webpage.search("div.services p").children.first | |
biography.inner_text.strip | |
end | |
def getpersonal (webpage) | |
personal = webpage.search(".social-icons a").last | |
personal.attributes["href"].value | |
end | |
def gettwitter(webpage) | |
twitlink = webpage.search('.social-icons a').first | |
twitlink.attributes['href'].value | |
end | |
################## | |
def getlinkedin(webpage) | |
linkedin = webpage.search("div.page-title a[href*='linkedin']")[0]['href'] | |
#linkedin.attributes['href'].value | |
end | |
################## | |
def getgithub(webpage) | |
github = webpage.search("div.page-title a[href*='github']")[0]['href'] | |
end | |
def getcodeschool (webpage) | |
codeschool = webpage.search("div.coder-cred a[href*='codeschool']")[0]['href'] | |
end | |
def getcoderwall (webpage) | |
coderwall = webpage.search("div.coder-cred a[href*='coderwall']")[0]['href'] | |
end | |
def gettreehouse (webpage) | |
treehouse = webpage.search("div.coder-cred a[href*='treehouse']")[0]['href'] | |
end | |
def geteducation(webpage) | |
edhist = webpage.search('.services ul li').inner_text.inspect | |
end | |
def getworkexperience (webpage) | |
#adapt for format differences in the future | |
workexperience = webpage.search("#ok-text-column-4 h4").children.inner_text | |
end | |
def getfavorites (webpage) | |
#could have done this initially | |
container ||= [] | |
getfavorites = webpage.search("#equalize.services-wrap a").each do |links| | |
container << links.attributes["href"].value | |
end | |
container[4..-1] | |
end | |
def grabs_link (indexpage) | |
link_uri = [] | |
indexpage.search('.section-blog a').each do |url| | |
link_uri << ("http://students.flatironschool.com/" + url.attributes["href"].value) | |
end | |
return link_uri | |
end | |
indexdoc = Nokogiri::HTML(open('http://students.flatironschool.com/')) | |
puts grabs_link(indexdoc).inspect | |
holderdoc = Nokogiri::HTML(open('http://students.flatironschool.com/students/stevenbrooks.html')) | |
#take variables below and turn them into a single hash | |
def makes_student_hash(holderdoc) #requires parameters? | |
student_hash = {} | |
student_hash[:getname] = "#{getname holderdoc}" | |
student_hash[:getbiography] = "#{getbiography holderdoc}" | |
student_hash[:gettwitter] = "#{gettwitter holderdoc}" | |
student_hash[:getlinkedin] = "#{getlinkedin holderdoc}" | |
student_hash[:getgithub] = "#{getgithub holderdoc}" | |
student_hash[:getpersonal] = "#{getpersonal holderdoc}" | |
student_hash[:getcodeschool] = "#{getcodeschool holderdoc}" | |
student_hash[:getcoderwall] = "#{getcoderwall holderdoc}" | |
student_hash[:gettreehouse] = "#{gettreehouse holderdoc}" | |
# student_hash[:geteducation] = "#{geteducation holderdoc}" | |
# student_hash[:getworkexperience] = "#{getworkexperience holderdoc}" | |
# student_hash[:getfavorites] = "#{getfavorites(holderdoc)}" | |
#returns a hash | |
return student_hash | |
end | |
# puts student_hash.inspect | |
def db_insert(array) #this takes a string from an array that consists of hash elements, where hash is the student key value pairs | |
insert = "" | |
array.each_with_index do |student, index| #student is a hash here. | |
insert = "INSERT INTO students (id, #{student.keys.join(",")}) VALUES (#{index+1}" | |
student.each do |attribute, value| | |
insert << ", #{value}" # INSERT INTO students VALUES (1, "Avi", "Hi" | |
end | |
insert << ")" | |
end | |
insert | |
end | |
#Database stuff begins | |
student_hash = [makes_student_hash(holderdoc)] | |
insert = db_insert(student_hash) | |
# puts student_hash.inspect | |
# puts insert.inspect | |
begin | |
db = SQLite3::Database.open "students.db" | |
db.execute "DROP TABLE IF EXISTS students" #need to test if exists | |
db.execute "CREATE TABLE IF NOT EXISTS students(id INTEGER PRIMARY KEY, | |
getname TEXT, getbiography TEXT, gettwitter TEXT, | |
getlinkedin, TEXT, getgithub TEXT, getpersonal TEXT, getcodeschool TEXT, | |
getcoderwall TEXT, gettreehouse TEXT)" | |
db.execute "CREATE TABLE IF NOT EXISTS schoolinfo(student_id INTEGER, | |
geteducation TEXT)" | |
db.execute "CREATE TABLE IF NOT EXISTS workinfo(student_id INTEGER, | |
getworkexperience TEXT)" | |
db.execute "CREATE TABLE IF NOT EXISTS favorites(student_id INTEGER, | |
getfavorites TEXT)" | |
ins = db.prepare('insert into students (id, getname, getbiography, | |
gettwitter, getlinkedin, getgithub, getpersonal, getcodeschool, | |
getcoderwall, gettreehouse) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)') | |
student_hash.each_with_index { |s, index| ins.execute(index+1, "#{student_hash[index][:getname]}", | |
"#{student_hash[index][:getbiography]}", "#{student_hash[index][:gettwitter]}", | |
"#{student_hash[index][:getlinkedin]}", "#{student_hash[index][:getgithub]}", | |
"#{student_hash[index][:getpersonal]}", "#{student_hash[index][:getcodeschool]}", | |
"#{student_hash[index][:getcoderwal]}", "#{student_hash[index][:gettreehouse]}") } | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment