Created
September 10, 2013 15:49
-
-
Save jdjkelly/6511411 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
task :scrape_blog_to => :environment do | |
blog_to = "http://www.blogto.com" | |
lists = ["/toronto/the_best_antique_stores_in_toronto/", "/toronto/the_best_art_supply_stores_in_toronto/", "/toronto/the_best_new_art_galleries_in_toronto_2008/", "/toronto/the_best_new_art_galleries_in_toronto_2009/", "/toronto/best_place_to_buy_a_suit_in_toronto/", "/toronto/the_best_baby_stores_in_toronto/", "/toronto/the_best_backyard_patios_in_toronto/", "/toronto/the_best_barber_shops_in_toronto/", "/toronto/the_best_bespoke_tailors_in_toronto/", "/toronto/the_best_bike_repair_shops_in_toronto/", "/toronto/the_best_bike_stores_in_toronto/", "/toronto/the_best_blues_bars_in_toronto/", "/toronto/the_best_bookstores_in_toronto/", "/toronto/the_best_boxing_gym_in_toronto/", "/toronto/the_best_bridal_stores_in_toronto/", "/toronto/the_best_bridesmaid_dresses_in_toronto/", "/toronto/the_best_card_paper_and_stationery_shops_in_toronto/", "/toronto/the_best_catering_companies_in_toronto/", "/toronto/the_best_comic_shops_in_toronto/", "/toronto/the_best_consignment_stores_in_toronto/", "/toronto/the_best_contemporary_art_galleries_for_emerging_artists_in_toronto/", "/toronto/the_best_contemporary_art_galleries_in_toronto/", "/toronto/the_best_crossfit_gyms_in_toronto/", "/toronto/the_best_custom_t-shirts_in_toronto/", "/toronto/the_best_new_clothing_stores_in_toronto_2008/", "/toronto/the_best_diy_spots_in_toronto/", "/toronto/the_best_dog_parks_in_toronto/", "/toronto/the_best_dry_cleaners_in_toronto/", "/toronto/the_best_new_design_stores_in_toronto_2009/", "/toronto/the_best_new_design_stores_in_toronto_2010/", "/toronto/the_best_eyeglasses_in_toronto/", "/toronto/the_best_fabric_stores_in_toronto/", "/toronto/the_best_fashion_designers_in_toronto/", "/toronto/the_best_film_festivals_in_toronto/", "/toronto/the_best_fitness_clubs_in_toronto/", "/toronto/the_best_florists_in_toronto/", "/toronto/the_best_furniture_stores_in_toronto/", "/toronto/the_best_new_fashion_stores_in_toronto_2009/", "/toronto/the_best_green_retailers_in_toronto/", "/toronto/the_best_green_services_in_toronto/", "/toronto/the_best_green_wedding_services_in_toronto/", "/toronto/the_best_hair_salons_in_toronto/", "/toronto/the_best_halloween_costume_stores_in_toronto/", "/toronto/the_best_hostels_in_toronto/", "/toronto/the_best_hot_yoga_in_toronto/", "/toronto/the_best_hotels_in_toronto/", "/toronto/the_best_indoor_sports_fields_in_toronto/", "/toronto/the_best_jazz_bars_in_toronto/", "/toronto/the_best_jewellery_stores_in_toronto/", "/toronto/the_best_kitchen_supply_stores_in_toronto/", "/toronto/the_best_laundromats_in_toronto/", "/toronto/the_best_live_music_venues_in_toronto/", "/toronto/the_best_martial_arts_in_toronto/", "/toronto/the_best_menswear_stores_in_toronto/", "/toronto/the_best_movers_in_toronto/", "/toronto/the_best_musical_instrument_stores_in_toronto/", "/toronto/the_best_outdoor_sports_fields_in_toronto/", "/toronto/the_best_personal_trainers_in_toronto/", "/toronto/the_best_pet_grooming_and_daycare_in_toronto/", "/toronto/the_best_pet_stores_in_toronto/", "/toronto/the_best_photography_galleries_in_toronto/", "/toronto/the_best_pilates_in_toronto/", "/toronto/the_best_place_to_watch_a_film_in_toronto/", "/toronto/the_best_places_to_find_stuff_made_by_local_designers/", "/toronto/the_best_public_swimming_pools_in_toronto/", "/toronto/the_best_salvage_and_reclaimed_furniture_in_toronto/", "/toronto/the_best_self_storage_in_toronto/", "/toronto/the_best_sex_shops_in_toronto/", "/toronto/the_best_shoe_stores_in_toronto/", "/toronto/the_best_skateboard_shops_in_toronto/", "/toronto/the_best_sneaker_shops_in_toronto/", "/toronto/the_best_spas_in_toronto/", "/toronto/the_best_sunglasses_in_toronto/", "/toronto/the_best_tattoo_parlours_in_toronto/", "/toronto/the_best_tennis_clubs_in_toronto/", "/toronto/the_best_theatre_production_companies_in_toronto/", "/toronto/the_best_used_bookstores_in_toronto/", "/toronto/the_best_used_cd_stores_in_toronto/", "/toronto/the_best_video_stores_in_toronto/", "/toronto/the_best_vintage_clothing_stores_in_toronto/", "/toronto/the_best_vintage_furniture_stores_in_toronto/", "/toronto/the_best_vinyl_record_stores_in_toronto/", "/toronto/the_best_waxing_salons_in_toronto/", "/toronto/the_best_yoga_studios_in_toronto/"] | |
lists.each do |list_url| | |
html = Nokogiri::HTML.parse(RestClient.get("#{blog_to}#{list_url}")) | |
title = /The Best (.+) in Toronto/.match(html.css('h1').first.content) | |
if title | |
title = title[1] | |
case title | |
when "Antique Stores" | |
title = "antiques" | |
when "Art Supply Stores" | |
title = "art supplies" | |
when "New Art Galleries" | |
title = "art" | |
when "Baby Stores" | |
title = "baby supplies" | |
when "Bespoke Tailors" | |
title = "bespoke suits" | |
when "Bike Repair Shops" | |
title = "bikes" | |
when "Bike Stores" | |
title = "bikes" | |
when "Bookstores" | |
title = "books" | |
when "Bridal Stores" | |
title = "bridal gowns" | |
when "Bridesmaid Dresses" | |
title = "bridesmaid dresses" | |
when "Card, Paper and Stationery Shops" | |
title = "stationery" | |
when "Comic Shops" | |
title = "comics" | |
when "Contemporary Art Galleries for Emerging Artists" | |
title = "art" | |
when "Contemporary Art Galleries" | |
title = "art" | |
when "Custom T-Shirts" | |
title = "t-shirts" | |
when "Eyeglasses" | |
title = "glasses" | |
when "Fabric Stores" | |
title = "fabric" | |
when "Fashion Designers" | |
title = "designer fashion" | |
when "Florists" | |
title = "flowers" | |
when "Furniture Stores" | |
title = "furniture" | |
when "New Fashion Stores" | |
title = "designer fashion" | |
when "Halloween Costume Stores" | |
title = "halloween costumes" | |
when "Jewellery Stores" | |
title = "jewellery" | |
when "Kitchen Supply Stores" | |
title = "kitchen supplies" | |
when "Menswear Stores" | |
title = "menswear" | |
when "Musical Instrument Stores" | |
title = "musical instruments" | |
when "Pet Stores" | |
title = "pet supplies" | |
when "Salvage and Reclaimed Furniture" | |
title = "vintage furniture" | |
when "Sex Shops" | |
title = "sex toys" | |
when "Shoe Stores" | |
title = "shoes" | |
when "Skateboard Shops" | |
title = "skateboards" | |
when "Sneaker Shops" | |
title = "sneakers" | |
when "Sunglasses" | |
title = "sunglasses" | |
when "Used Bookstores" | |
title = "books" | |
when "Used CD Stores" | |
title = "CDs" | |
when "Video Stores" | |
title = "DVDs" | |
when "Vintage Clothing Stores" | |
title = "vintage clothing" | |
when "Vintage Furniture Stores" | |
title = "vintage furniture" | |
when "Vinyl Record Stores" | |
title = "vinyl records" | |
else | |
@stop = true | |
end | |
else | |
@stop = true | |
end | |
unless @stop | |
noun = Noun.find_or_create_by(name: title) | |
html.css(".torontolists-item h2 .name").each do |location| | |
foursquare_response = JSON.parse(RestClient.get("https://api.foursquare.com/v2/venues/search?near=Toronto&query=#{URI.escape(location.content)}&limit=1&client_id=O3TYYRX0Q1XKSL10XKY2L4YXOJEBETNVE2ZQO4WJ1YTQFIBF&client_secret=AOB1CBYH2OBVUPSAC2VEGD3YP0SHZD1BM0QG0V5EY02RIXP4&v=20121006")) | |
case location.content | |
when "Kantelberg + Co." | |
location.content = "Kantelberg & Co." | |
when "Gwartzman's Art Supplies" | |
location.content = "Gwartzmans Art Supplies" | |
when "Curry's Artists' Materials" | |
location.content = "Curry's Artist's Materials" | |
when "Georgia Sherman Projects" | |
location.content = "Georgia Scherman projects" | |
when "107Shaw Gallery" | |
location.content = "107 Shaw Gallery" | |
when "Macklem's" | |
location.content = "Macklems" | |
when "Ella + Elliot" | |
location.content = "Ella + Elliot" | |
when "Moms to be and More" | |
location.content = "Moms to be... and More" | |
when "Hello Sunshine" | |
location.content = "hello sunshine creative baby shop" | |
when "Aquarius Menswear" | |
location.content = "Aquarius Men's Wear" | |
when "Walter Beauchamp Tailors" | |
location.content = "Walter Beauchamp" | |
when "Harry Rosen (Bloor)" | |
location.content = "Harry Rosen Menswear" | |
when "Dave Fix My Bike" | |
location.content = "Dave...Fix My Bike" | |
when "Cyclepath Danforth" | |
location.content = "Cyclepath Danforth" | |
when "Bakka Phoenix" | |
location.content = "Bakka-Phoenix Books" | |
when "Becker's Bridal" | |
location.content = "Beckers Bridal" | |
when "Jealous Bridesmaids Bridal Studio" | |
location.content = "Jealous Bridesmaids" | |
when "Your White Dress Bridal Outlet" | |
location.content = "Your White Dress" | |
when "Lea Ann Belter" | |
location.content = "Lea-Ann Belter Bridal" | |
when "Pink Tartan/Seventy Seven" | |
location.content = "Pink Tartan" | |
when "Atomic Age Comix" | |
location.content = "Atomic Age" | |
when "Don't Tell Mama Gallery" | |
location.content = "Don't Tell Mama" | |
when "Stylegarage/Gus" | |
location.content = "Gus* Studio@STYLEGARAGE" | |
when "Linda Penwarden Jewellery" | |
location.content = "Linda Penwarden" | |
when "Jonathan + Olivia" | |
location.content = "Jonathan & Olivia" | |
when "The Academy of Music" | |
location.content = "Academy of Music" | |
when "Helmutt's Pet Supply" | |
location.content = "Helmutt's" | |
when "David's" | |
location.content = "Davids" | |
when "Vortex Records" | |
location.content = "Vortex Used Records" | |
when "Around Again Records" | |
location.content = "Around Again" | |
when "Penny Arcade Vintage" | |
location.content = "Penny Arcade" | |
when "Phil'z 20th Century" | |
location.content = "Phil'z" | |
when "Vortex Records" | |
location.content = "Vortex Used Records" | |
end | |
if foursquare_response["response"]["venues"].length > 0 && foursquare_response["response"]["venues"].first["name"].downcase.match(location.content.downcase) | |
# puts "#{location.content} succeeded against #{foursquare_response["response"]["venues"].first["name"]}" | |
location = Location.find_or_create_by(name: foursquare_response["response"]["venues"].first["name"]) | |
location.update_attributes( | |
foursquare_venue_object: foursquare_response["response"]["venues"].first, | |
coordinates: [foursquare_response["response"]["venues"].first["location"]["lng"], foursquare_response["response"]["venues"].first["location"]["lat"]], | |
city: [foursquare_response["response"]["venues"].first["location"]["city"], | |
country: [foursquare_response["response"]["venues"].first["location"]["country"], | |
state: [foursquare_response["response"]["venues"].first["location"]["state"], | |
street_address: [foursquare_response["response"]["venues"].first["location"]["address"] | |
) | |
elsif foursquare_response["response"]["venues"].length > 0 | |
puts "#{location.content} did not match #{foursquare_response["response"]["venues"].first["name"]}" | |
else | |
puts "#{location.content} failed" | |
end | |
end | |
end | |
@stop = false | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment