Last active
October 11, 2019 07:44
-
-
Save eggplants/984c80817a25f9c1eb9318a13995eff1 to your computer and use it in GitHub Desktop.
食べログの点数の{度数分布,平均値,中央値}を表示するおもちゃ
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# [SETTING] # | |
#pref='osaka/' | |
#region='' | |
#scores=[] | |
#freq=0.1 | |
#pagelimit=60 | |
#scoreshow=false | |
$ ruby tabelog_score.rb | |
CURRENT TIME IS 2019-10-11 00:55:32 +0900. | |
PREF:osaka/, REGION_CODE: | |
60 PAGE PROCESSING... | |
ANARYZE STORE:1104 | |
[0.30,0.40):0 | |
[0.40,0.50):0 | |
[0.50,0.60):0 | |
[0.60,0.70):0 | |
[0.70,0.80):0 | |
[0.80,0.90):0 | |
[0.90,1.00):0 | |
[1.00,1.10):0 | |
[1.10,1.20):0 | |
[1.20,1.30):0 | |
[1.30,1.40):0 | |
[1.40,1.50):0 | |
[1.50,1.60):0 | |
[1.60,1.70):0 | |
[1.70,1.80):0 | |
[1.80,1.90):0 | |
[1.90,2.00):0 | |
[2.00,2.10):0 | |
[2.10,2.20):0 | |
[2.20,2.30):0 | |
[2.30,2.40):0 | |
[2.40,2.50):0 | |
[2.50,2.60):0 | |
[2.60,2.70):0 | |
[2.70,2.80):0 | |
[2.80,2.90):0 | |
[2.90,3.00):29 | |
[3.00,3.10):509 | |
[3.10,3.20):164 | |
[3.20,3.30):149 | |
[3.30,3.40):123 | |
[3.40,3.50):90 | |
[3.50,3.60):67 | |
[3.60,3.70):10 | |
[3.70,3.80):3 | |
[3.80,3.90):0 | |
[3.90,4.00):0 | |
[4.00,4.10):0 | |
[4.10,4.20):0 | |
[4.20,4.30):0 | |
[4.30,4.40):0 | |
[4.40,4.50):0 | |
[4.50,4.60):0 | |
[4.60,4.70):0 | |
[4.70,4.80):0 | |
[4.80,4.90):0 | |
[4.90,5.00):0 | |
AVERAGE... | |
3.19 | |
MEDIAN... | |
3.12 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# SETTING | |
pref='' | |
region='' | |
scores=[] | |
freq=0.5 | |
pagelimit=60 | |
scoreshow=false | |
# TIME&SEARCH_CITY INFO | |
puts "CURRENT TIME IS #{Time.now}." | |
puts "PREF:#{pref}, REGION_CODE:#{region}" | |
#SCRAPING | |
preflist.each{|pref|puts pref=pref+'/' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "open-uri" | |
require "time" | |
#PREF-DATA | |
preflist=[ | |
"hokkaido","aomori","iwate", | |
"miyagi","akita","yamagata", | |
"fukushima","ibaraki","tochigi", | |
"gunma","saitama","chiba", | |
"tokyo","kanagawa","niigata", | |
"toyama","ishikawa","fukui", | |
"yamanashi","nagano","gifu", | |
"shizuoka","aichi","mie", | |
"shiga","kyoto","osaka", | |
"hyogo","nara","wakayama", | |
"tottori","shimane","okayama", | |
"hiroshima","yamaguchi","tokushima", | |
"kagawa","ehime","kochi", | |
"fukuoka","saga","nagasaki", | |
"kumamoto","oita","miyazaki", | |
"kagoshima","okinawa"] | |
# SETTING | |
pref='ibaraki/' | |
region='C8220/' | |
scores=[] | |
freq=0.5 | |
pagelimit=60 | |
scoreshow=false | |
# TIME&SEARCH_CITY INFO | |
puts "CURRENT TIME IS #{Time.now}." | |
puts "PREF:#{pref}, REGION_CODE:#{region}" | |
#SCRAPING | |
[*1..60].map{|n| | |
url="https://tabelog.com/#{pref}#{region}rstLst/#{n}" | |
print "\r#{n} PAGE PROCESSING..." | |
pagedata=open(url).read | |
pagescore=pagedata.scan(/rating-val">([^<]+)</).flatten | |
scores<<pagescore | |
break if !pagedata.scan('このページを表示することができません')[0].nil? | |
break if n==pagelimit | |
} | |
puts | |
scores.flatten!.map!(&:to_f).sort! | |
puts scores if scoreshow | |
n=scores.size | |
puts "ANARYZE STORE:#{n}" | |
puts "FREQUENCY DISTRIBUTION..." | |
0.0.step(5.0-freq,freq){|rank| | |
print "[#{"%.2f"%rank},#{"%.2f"%(rank+freq)}):" | |
puts scores.select{|rate|rank<=rate&&rate<rank+freq}.size | |
} | |
puts "AVERAGE..." | |
puts "%.2f" % (scores.sum.to_f/n) | |
puts "MEDIAN..." | |
puts "%.2f" % (n%2==0?(scores[n/2-1]+scores[n/2])/2.0:scores[n/2]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# [SETTING] # | |
#pref='tokyo/' | |
#region='' | |
#scores=[] | |
#freq=0.1 | |
#pagelimit=60 | |
#scoreshow=false | |
$ ruby tabelog_score.rb | |
CURRENT TIME IS 2019-10-11 00:52:03 +0900. | |
PREF:tokyo/, REGION_CODE: | |
60 PAGE PROCESSING... | |
ANARYZE STORE:1141 | |
[0.30,0.40):0 | |
[0.40,0.50):0 | |
[0.50,0.60):0 | |
[0.60,0.70):0 | |
[0.70,0.80):0 | |
[0.80,0.90):0 | |
[0.90,1.00):0 | |
[1.00,1.10):0 | |
[1.10,1.20):0 | |
[1.20,1.30):0 | |
[1.30,1.40):0 | |
[1.40,1.50):0 | |
[1.50,1.60):0 | |
[1.60,1.70):0 | |
[1.70,1.80):0 | |
[1.80,1.90):0 | |
[1.90,2.00):0 | |
[2.00,2.10):0 | |
[2.10,2.20):0 | |
[2.20,2.30):0 | |
[2.30,2.40):0 | |
[2.40,2.50):0 | |
[2.50,2.60):0 | |
[2.60,2.70):0 | |
[2.70,2.80):0 | |
[2.80,2.90):0 | |
[2.90,3.00):16 | |
[3.00,3.10):435 | |
[3.10,3.20):156 | |
[3.20,3.30):176 | |
[3.30,3.40):119 | |
[3.40,3.50):139 | |
[3.50,3.60):89 | |
[3.60,3.70):30 | |
[3.70,3.80):5 | |
[3.80,3.90):1 | |
[3.90,4.00):1 | |
[4.00,4.10):0 | |
[4.10,4.20):0 | |
[4.20,4.30):0 | |
[4.30,4.40):0 | |
[4.40,4.50):0 | |
[4.50,4.60):0 | |
[4.60,4.70):0 | |
[4.70,4.80):0 | |
[4.80,4.90):0 | |
[4.90,5.00):0 | |
AVERAGE... | |
3.23 | |
MEDIAN... | |
3.18 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment