Skip to content

Instantly share code, notes, and snippets.

@ashaw
Created January 1, 2011 20:57
Show Gist options
  • Save ashaw/762004 to your computer and use it in GitHub Desktop.
Save ashaw/762004 to your computer and use it in GitHub Desktop.
require 'rubygems'
require 'rest_client'
require 'nokogiri'
require 'pp'
def escape_csv(string)
string.gsub(/,/,'/')
end
class Crunch
attr_reader :company_data
def initialize(page)
@crunch = Nokogiri::HTML(RestClient.get("http://www.crunchbase.com/search/advanced/companies/605026?page=#{page}"))
@company_data = []
end
def parse
@crunch.css(".search_result_name a").each do |company_link|
crunch_link = "http://crunchbase.com" + company_link.attr("href")
company = {:name => escape_csv(company_link.content), :url => crunch_link, :employees => company_people(crunch_link)}
@company_data << company
puts company_link.content
end
end
def company_people(company_link)
employees = []
titles = []
company_page = Nokogiri::HTML(RestClient.get(company_link))
company_page.css(".col1_people_name a").each do |person|
employees << escape_csv(person.content)
end
company_page.css(".col1_people_title").each do |title|
titles << escape_csv(title.content)
end
employees_with_titles = []
(0..employees.length).each do |number|
employees_with_titles << {employees[number], titles[number]}
end
p employees_with_titles
employees_with_titles
end
end
class CSVBuilder
FIELDS = "name,url,employees"
attr_reader :doc
def initialize(company_array)
@doc = ""
doc << FIELDS + "\n"
company_array.each do |company|
employees = ""
company[:employees].each do |employee|
employee.each do |name,title|
employees << "#{name}"
p "#{name}"
if title && !title.strip.empty?
employees << " (#{title})"
p " (#{title})"
end
employees << ","
end
end
doc << "#{company[:name]},#{company[:url]},#{employees}\n"
end
@doc
end
end
master_company_data = []
(1..2).each do |page|
c = Crunch.new(page)
c.parse
master_company_data << c.company_data
end
csv = CSVBuilder.new(master_company_data.flatten)
f = File.open('crunchbase.csv', 'w+') do |data|
data.write csv.doc
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment