|
require 'webrick' |
|
require 'erb' |
|
require 'mechanize' |
|
|
|
$filename = 'questionnaire.htm' |
|
$dir = "html" |
|
$template = "main.html.template" |
|
|
|
class Util |
|
@@name_patterns = { |
|
/^世界を知る:/ => "", |
|
/(【\d+】|:.+$)/ => "", |
|
} |
|
|
|
@@place_patterns = { |
|
"大岡山" => "oookayama", |
|
"すずかけ台" => "suzukakedai", |
|
/^遠隔.+$/ => "remote", |
|
} |
|
@@time_patterns = { |
|
"月" => "Mon", |
|
"火" => "Tue", |
|
"水" => "Wed", |
|
"木" => "Thu", |
|
"金" => "Fri", |
|
"土" => "Sat", |
|
"日" => "Sun", |
|
} |
|
|
|
def self.parse filename |
|
html = Nokogiri::HTML(File.read(filename)) |
|
html.css('.tblEnquete')[1].css('tr')[1..-1].map{|tr| |
|
type = tr.css('td')[0].text.strip |
|
season = tr.css('td')[2].text.strip |
|
code = tr.css('td')[6].text.strip |
|
|
|
query = name = tr.css('td')[4].text.strip |
|
en_place = place = tr.css('td')[1].text.strip |
|
en_time = time = tr.css('td')[5].text.strip |
|
|
|
@@place_patterns.each{|k,v| en_place = en_place.gsub(k,v)} |
|
@@name_patterns.each{|k,v| query = query.gsub(k,v)} |
|
@@time_patterns.each{|k,v| en_time = en_time.gsub(k,v)} |
|
|
|
{ |
|
type: type, |
|
place: place, |
|
season: season, |
|
name: name, |
|
time: time, |
|
code: code, |
|
query: query, |
|
en_place: en_place, |
|
en_time: en_time, |
|
} |
|
} |
|
end |
|
|
|
def self.make_main_html data |
|
shaped = {} |
|
keys = [:place, :type, :season, :time] |
|
data.each do |d| |
|
t = shaped |
|
keys.each do |k| |
|
unless t[d[k]] |
|
t[d[k]] = k == :time ? [] : {} |
|
end |
|
t = t[d[k]] |
|
end |
|
t << d |
|
end |
|
|
|
content = "<ul>\n" |
|
shaped.each do |place, p| |
|
content += self.print_begining place |
|
p.each do |type, y| |
|
content += self.print_begining type |
|
y.each do |season, s| |
|
content += self.print_begining season |
|
s.each do |time, t| |
|
content += self.print_begining time |
|
t.each do |d| |
|
dir = "#{d[:en_place]}/#{d[:type]}/#{d[:season]}/#{d[:en_time]}/" |
|
content += "<a href=./#{dir}#{d[:code]}.html>#{d[:name]}</a><br>\n" |
|
end |
|
content += print_ending |
|
end |
|
content += print_ending |
|
end |
|
content += print_ending |
|
end |
|
content += print_ending |
|
end |
|
content += "</ul>\n" |
|
|
|
html = File.read($template) |
|
erb = ERB.new(html) |
|
File.write("#{$dir}/main.html", erb.result(binding)) |
|
end |
|
|
|
private |
|
def self.print_begining str |
|
"<li>#{str}\n<ul>\n" |
|
end |
|
|
|
def self.print_ending |
|
"</ul>\n</li>\n" |
|
end |
|
end |
|
|
|
class Client |
|
BASE = 'http://www.ocw.titech.ac.jp' |
|
ENDPOINT = 'http://www.ocw.titech.ac.jp/index.php?module=General&action=Search&search_category=1&lang=JA' |
|
|
|
def initialize |
|
@agent = Mechanize.new |
|
@agent.user_agent_alias = 'Mac Safari' |
|
|
|
end |
|
|
|
def save_lecture_info data |
|
res = @agent.post(ENDPOINT, {query: data[:query]}) |
|
html = Nokogiri::HTML(res.body) |
|
unless (trs = html.css('.searchResult > tbody > tr')).empty? |
|
url = BASE + trs.first.css('a').attr('href').value |
|
page = @agent.get(url) |
|
|
|
dir = "#{$dir}/#{data[:en_place]}/#{data[:type]}/#{data[:season]}/#{data[:en_time]}/" |
|
unless Dir.exists? dir |
|
FileUtils.mkdir_p dir |
|
end |
|
page.save_as(dir + data[:code] + ".html") |
|
else |
|
false |
|
end |
|
end |
|
|
|
end |
|
|
|
clnt = Client.new |
|
unless Dir.exists? $dir |
|
Dir.mkdir $dir |
|
data = Util.parse $filename |
|
data.each do |d| |
|
clnt.save_lecture_info d |
|
end |
|
Util.make_main_html data |
|
end |
|
|
|
WEBrick::HTTPServer.new({ |
|
DocumentRoot: "./#{$dir}", |
|
BindAddress: '127.0.0.1', |
|
Port: '8000', |
|
}).start |