Last active
November 4, 2015 19:43
-
-
Save msepcot/e403e0866beff0863f49 to your computer and use it in GitHub Desktop.
Script to add GreatSchools scores to Craigslist listings.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'active_support/all' | |
require 'gmail' | |
require 'great_schools' | |
require 'nokogiri' | |
require 'open-uri' | |
require 'sqlite3' | |
def db | |
@db ||= begin | |
db = SQLite3::Database.open('craigslist.db') | |
db.execute_batch <<-SQL.squish | |
CREATE TABLE IF NOT EXISTS houses( | |
id INTEGER, title TEXT, date TEXT, href TEXT, price INTEGER, sqft INTEGER, preview TEXT, | |
location TEXT, latitude REAL, longitude REAL, images INTEGER, checked INTEGER, rank REAL | |
); | |
CREATE TABLE IF NOT EXISTS schools( | |
id INTEGER, name TEXT, href TEXT, type TEXT, grade_range TEXT, | |
latitude REAL, longitude REAL, district_id INTEGER, score TEXT | |
); | |
CREATE TABLE IF NOT EXISTS neighborhood_schools(house_id INTEGER, school_id INTEGER); | |
SQL | |
db | |
end | |
end | |
def create_home(id, data = {}) | |
unless row = db.get_first_row('SELECT * FROM houses WHERE id = ?', id) | |
db.execute( | |
'INSERT INTO houses(id, title, date, href, price, preview, sqft, location, latitude, longitude, images, checked) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', | |
id, data[:title], data[:date], data[:href], data[:price], data[:preview], data[:sqft], data[:location], data[:latitude], data[:longitude], data[:images], 0 | |
) | |
end | |
end | |
def create_school(id, data = {}) | |
unless row = db.get_first_row('SELECT * FROM schools WHERE id = ?', id) | |
db.execute( | |
'INSERT INTO schools(id, name, href, type, grade_range, latitude, longitude, district_id) VALUES(?, ?, ?, ?, ?, ?, ?, ?)', | |
id, data[:name], data[:href], data[:type], data[:grade_range], data[:latitude], data[:longitude], data[:district_id] | |
) | |
end | |
end | |
def create_mapping(house_id, school_id) | |
db.execute('INSERT INTO neighborhood_schools(house_id, school_id) VALUES(?, ?)', house_id, school_id) | |
end | |
# = Find Homes For Rent | |
yesterday = Date.today - 1 | |
debug = [] | |
[ 'http://sfbay.craigslist.org/search/apa?s=000&bedrooms=2&housing_type=4&housing_type=6&housing_type=9&maxAsk=3100&minAsk=2000&minSqft=1000', | |
'http://sfbay.craigslist.org/search/apa?s=100&bedrooms=2&housing_type=4&housing_type=6&housing_type=9&maxAsk=3100&minAsk=2000&minSqft=1000', | |
'http://sfbay.craigslist.org/search/apa?s=200&bedrooms=2&housing_type=4&housing_type=6&housing_type=9&maxAsk=3100&minAsk=2000&minSqft=1000' | |
].each do |uri| | |
Nokogiri::HTML(open(uri)).css('p.row').each do |node| | |
# Filter to San Francisco, Peninsula, and the South Bay; Must be able to map the location | |
href = "http://sfbay.craigslist.org#{node.at_css('.pl a')['href']}" | |
next unless %w(sfc pen).include?(href[28..30]) # %w(sfc pen sby) | |
location = Nokogiri::HTML(open(href)).at_css('#map') | |
next unless location && location['data-latitude'].present? | |
# only looking for listings going back to yesterday (3 pages max) | |
date = Date.parse(node.at_css('time')['datetime']) | |
break if date.eql?(yesterday - 1) | |
preview = node.at_css('a.i')['data-ids'].try(:split, ',').try(:first).try(:[], 2..-1) | |
data = { | |
title: node.at_css('.pl a').text.strip, | |
date: date.to_s, | |
href: href, | |
price: node.at_css('.price').try(:text).try(:[], /\$(\d+)/, 1), | |
sqft: node.at_css('.l2').text[/- (\d+).+ -/, 1], | |
location: (node.at_css('.l2 .pnr small').try(:text) || '').strip[1..-2].try(:strip), | |
latitude: location['data-latitude'], | |
longitude: location['data-longitude'], | |
images: (!!(node.at_css('.l2 .pnr .px').text =~ /img|pic/) ? 1 : 0), | |
preview: (preview ? "http://images.craigslist.org/#{preview}_300x300.jpg" : nil), | |
} | |
debug << data | |
begin | |
create_home(node['data-pid'].to_i, data) | |
rescue => e | |
puts e.inspect, data.inspect | |
end | |
end | |
end | |
# = Find Schools Around The Homes | |
GreatSchools::API.key = 'APIKEY' | |
db.execute('SELECT id, latitude, longitude FROM houses WHERE checked = 0').each do |id, latitude, longitude| | |
GreatSchools::School.nearby('CA', latitude: latitude, longitude: longitude, limit: 10).each do |school| | |
data = { | |
name: school.name, | |
href: school.overview_link, | |
type: school.type, | |
grade_range: school.grade_range, | |
latitude: school.latitude, | |
longitude: school.longitude, | |
district_id: school.district_id | |
} | |
create_school(school.id.to_i, data) | |
create_mapping(id, school.id) | |
db.execute('UPDATE houses SET checked = 1 WHERE id = ?', id) | |
end | |
end | |
db.execute('SELECT id FROM schools WHERE type = "public" AND score IS NULL').flatten.each do |id| | |
score = GreatSchools::Score.for_school('CA', id) | |
db.execute('UPDATE schools SET score = ? WHERE id = ?', score.rank.score.to_i, id) | |
end | |
db.execute('SELECT id FROM houses WHERE date >= ?', yesterday.to_s).flatten.each do |id| | |
scores = db.execute('SELECT schools.score FROM schools INNER JOIN neighborhood_schools n ON n.school_id = schools.id WHERE n.house_id = ? AND schools.type = "public"', id).flatten.map(&:to_i) | |
db.execute('UPDATE houses SET rank = ? WHERE id = ?', (scores.reduce(:+).to_f / scores.size), id) | |
end | |
# = Build HTML Email | |
homes = db.execute('SELECT * FROM houses WHERE date >= ? ORDER BY houses.rank DESC', yesterday.to_s) | |
html = "<html>\n<head>\n <title>Craigslist - GreatSchools</title>\n</head>\n<body>\n <h3>#{homes.size} Home#{'s' unless homes.size == 1 } Matching Criteria: < $3100, House/Duplex/Townhouse, 2+ BR, 1000+ sqft in the Peninsula</h3>\n" | |
homes.each do |house| | |
html << " <h4>$#{house[4]}" | |
html << "/#{house[5]}sqft" if house[5] | |
html << " - <a href=\"#{house[3]}\">#{house[1]}</a> (#{house[7]})</h4>\n" | |
html << " <img src=\"#{house[6]}\" height=\"200\" alt=\"\" /><br />\n" if house[6] | |
html << " <ul>\n" | |
db.execute('SELECT schools.* FROM schools INNER JOIN neighborhood_schools n ON n.school_id = schools.id WHERE n.house_id = ?', house[0]).each do |school| | |
html << " <li>(#{school[8] || school[3]}) <a href=\"#{school[2]}\">#{school[1]}</a></li>\n" | |
end | |
html << " </ul>\n" | |
end and nil | |
html << "</body>\n</html>" | |
# = Send Email | |
Gmail.new('EMAIL', 'PASSWORD') do |gmail| | |
gmail.deliver do | |
to 'EMAIL' | |
subject "Craigslist Summary for #{yesterday.to_s}" | |
html_part do | |
content_type 'text/html; charset=UTF-8' | |
body(html) | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment