Skip to content

Instantly share code, notes, and snippets.

@dulltz
Created July 3, 2013 07:21
Show Gist options
  • Save dulltz/5916059 to your computer and use it in GitHub Desktop.
Save dulltz/5916059 to your computer and use it in GitHub Desktop.
しょぼいカレンダーからスタッフとキャストをパースして保存する
# coding: utf-8
require 'nokogiri'
require 'open-uri'
module My
class SyoboiParser
attr_accessor :anime_number
def initialize(anime_number)
@anime_number = anime_number
end
def get_html()
@doc = Nokogiri::HTML(open("http://cal.syoboi.jp/tid/#{@anime_number}"))
end
def add_hash(person_name, hash)
hash[person_name] = 1.0
end
def pick_staffandcast(hash)
hash["title"] = @doc.search("//div[@id='main']//h1").to_html.gsub(/<\/?[^>]*>/, ",").split(",")[1]
striped_elem = Array.new
@doc.search("//table[@class='section staff']//a").each do |elem|
striped_elem = elem.to_html.gsub(/<\/?[^>]*>/, "")
next if striped_elem.length < 1
puts "add_hash ok." if add_hash(striped_elem, hash)
end
@doc.search("//table[@class='section cast']//a").each do |elem|
striped_elem = elem.to_html.gsub(/<\/?[^>]*>/, "")
next if striped_elem.length < 1
if add_hash(striped_elem, hash)
end
end
end
end
end
anime_number = 1
syoboi = My::SyoboiParser.new(anime_number)
anime_hash = Array.new
while syoboi.get_html() do
puts anime_number
anime_hash[anime_number] = Hash.new(0)
syoboi.pick_staffandcast(anime_hash[anime_number])
p anime_hash[anime_number]
# $B%k!<%W$9$k2s?t(B(=$B$7$g$\$$%+%l%s%@!<$KEPO?$5$l$F$$$kHVAH?t(B)
# ($B%O!<%I%3!<%G%#%s%0$9$.$k$,!";~4V$,L5$$$N$G!D(B)
howmany = 2842
break if anime_number >= howmany
anime_number += 1
syoboi = My::SyoboiParser.new(anime_number)
end
all_list=[]
for anime_hash_each in anime_hash.each
if anime_hash_each.is_a?(Hash)
for word in anime_hash_each.keys
all_list << word
end
end
end
all_list_uniq = all_list.uniq
for word in all_list_uniq.each
open("data.txt","a"){|f| f.print "#{word}\t"}
end
for anime_hash_each in anime_hash.each
if anime_hash_each.is_a?(Hash)
open("data.txt","a"){|f| f.puts }
for word in all_list_uniq.each
open("data.txt","a"){|f| f.print "#{anime_hash_each[word]}\t"}
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment