Skip to content

Instantly share code, notes, and snippets.

@eggplants
Last active September 10, 2019 06:40
Show Gist options
  • Save eggplants/4a2da367b28bf4b6f8a1048cda74a5af to your computer and use it in GitHub Desktop.
Save eggplants/4a2da367b28bf4b6f8a1048cda74a5af to your computer and use it in GitHub Desktop.
require "open-uri"
out=[]
file_ext="html"
[*400..550].each{|id|
print uid="s1811#{id.to_s}"
[*1..9].each{|i|[*1..10].each{|j|["","s"].each{|prot|
begin
url=%Q!http#{prot}://cgi.u.tsukuba.ac.jp/~#{uid}/cje/e#{i.to_s}-#{j.to_s}.#{file_ext}!
data=open(url)
out<<[uid,url] if data.status[0]=="200"
puts "=>200!\r"
rescue OpenURI::HTTPError
print "=>404.\r"
end
}}}
}
File.open("silly.csv","w"){|f|out.each{|a|
f.puts a.join(" ")
}}
require "open-uri"
require "benchmark"
Benchmark.bm do |x|;x.report{
out=[]
[*400..550].each{|id|
uid="s1811#{id.to_s}"
file_ext="html"
["","s"].each{|prot|%w(cje opac).each{|folder|%w(search index opac klis).each{|word|
begin
url=%Q!http#{prot}://cgi.u.tsukuba.ac.jp/~#{uid}/#{folder}/#{word}.#{file_ext}!
data=open(url)
out<<[uid,url] if data.status[0]=="200"
print "=>200!\r"
sleep(1)
rescue OpenURI::HTTPError
print "=>404.\r"
end
}}}
}
File.open("silly.csv","w"){|f|out.each{|a|
f.puts a.join(" ")
}}
};end
require "open-uri"
`ls /www|egrep '^s[0-9]+'>student_id`
out,count = [], 0
ID_LIST = open("student_id").read.split
ID_LIST.each { | id | %w(http https).each { | protocol | %w(htm html).each { | sort |
print "#{count}/#{ID_LIST.size*4}"
begin
url = "#{protocol}://www.u.tsukuba.ac.jp/~#{id}/index.#{sort}"
out.push(url) if open(url).status[0] == "200"
print "=>200!\r"
count += 1
rescue OpenURI::HTTPError
print "=>404.\r"
count += 1
end
} } }
File.open('LIST.csv','w'){ | f | out.each{| row |
f.puts row
} }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment