Created
February 22, 2015 18:03
-
-
Save st/0c8512f916b944a3a23d to your computer and use it in GitHub Desktop.
Planète bleue scrapping
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'nokogiri' | |
require 'open-uri' | |
@plays = {} | |
def extract_plays(url, i) | |
doc = Nokogiri::HTML(open(url)) | |
artist_names = doc.xpath("//a[contains(@href,'artist')]").collect {|node| node.text.strip} | |
artist_names.each do |artist_name| | |
name_upcase = artist_name.upcase | |
emissions = @plays[name_upcase] || [] | |
emissions << i | |
@plays[name_upcase] = emissions | |
end | |
end | |
def most_played(n) | |
@plays.sort{|a, b| b[1].count <=> a[1].count}[0..(n-1)] | |
end | |
def show_plays | |
names = @plays.keys.sort | |
names.each do |name| | |
puts "#{name} : #{@plays[name]}" | |
end | |
end | |
def show_popular | |
most_played(10).each do |name_plays| | |
puts "#{name_plays[0]} played #{name_plays[1].count} times" | |
end | |
end | |
def extract_all | |
start = ARGV[0] | |
finish = ARGV[1] | |
for i in start..finish do | |
url = "http://www.laplanetebleue.com/emission-#{i}" | |
extract_plays(url, i) | |
end | |
end | |
def usage | |
puts "pb: Usage" | |
puts "\tpb.rb <start> <end>" | |
puts "\tWill list how many times artists were played per emission" | |
puts "\tbetween emission # <start> and emisson # <end>" | |
end | |
if ARGV.count==2 | |
extract_all | |
show_plays | |
show_popular | |
else | |
usage() | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment