Last active
September 28, 2017 09:02
-
-
Save billy3321/fafcfd8914df21202cff768fc7a37d7d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# brew install chromedriver geckodriver imagemagick@6 | |
# gem install selenium-webdriver mechanize rmagick nokogiri | |
require 'selenium-webdriver' | |
require 'mechanize' | |
require 'rmagick' | |
require 'time' | |
require 'uri' | |
require 'resolv' | |
require 'json' | |
require 'nokogiri' | |
$urls = ["www.ey.gov.tw", | |
"www.ndc.gov.tw", | |
"domain.edu.tw/index.html", | |
"www.arte.gov.tw", | |
"www.cksmh.gov.tw", | |
"www.edu.tw", | |
"www.naer.edu.tw", | |
"www.ncl.edu.tw", | |
"www.ner.gov.tw", | |
"www.nlpi.edu.tw", | |
"www.nmmst.gov.tw", | |
"www.nmns.edu.tw", | |
"www.nstc.org.tw", | |
"www.nstm.gov.tw", | |
"www.ntl.edu.tw/mp.asp?mp=1", | |
"www.ntsec.gov.tw", | |
"www.sa.gov.tw/wSite/mp?mp=11", | |
"www.yda.gov.tw", | |
"ecare.moe.gov.tw", | |
"www.nmmba.gov.tw", | |
"www.mac.gov.tw", | |
"www.ftc.gov.tw", | |
"www.pcc.gov.tw", | |
"www.dgbas.gov.tw/mp.asp?mp=1", | |
"www.cga.gov.tw", | |
"www.npm.gov.tw", | |
"www.ncc.gov.tw", | |
"www.ocac.gov.tw", | |
"www.epa.gov.tw", | |
"thcdc.hakka.gov.tw/wSite/mp?mp=1", | |
"www.hakka.gov.tw", | |
"www.apc.gov.tw", | |
"www.tacp.gov.tw", | |
"www.dgpa.gov.tw", | |
"www.hrd.gov.tw/content/index01/index01.aspx", | |
"www.rad.gov.tw", | |
"www.cbc.gov.tw", | |
"www.cepp.gov.tw", | |
"www.cmc.gov.tw", | |
"www.boca.gov.tw", | |
"www.mofa.gov.tw", | |
"www.mofa.gov.tw/idia", | |
"www.aec.gov.tw", | |
"www.iner.gov.tw", | |
"www.trmc.aec.gov.tw", | |
"www.ctsp.gov.tw", | |
"www.most.gov.tw", | |
"www.sipa.gov.tw", | |
"www.stsp.gov.tw", | |
"www.banking.gov.tw", | |
"www.feb.gov.tw", | |
"www.fsc.gov.tw", | |
"www.ib.gov.tw", | |
"www.sfb.gov.tw", | |
"gpwd.mnd.gov.tw", | |
"mna.gpwb.gov.tw", | |
"www.gwsm.gov.tw", | |
"www.mnd.gov.tw", | |
"www.ydn.com.tw", | |
"www.cdc.gov.tw/", | |
"www.fda.gov.tw", | |
"www.hpa.gov.tw/BHPNet/Web/index", | |
"www.hso.mohw.gov.tw", | |
"www.hwwtc.mohw.gov.tw/index.php", | |
"www.mohw.gov.tw", | |
"www.nricm.edu.tw", | |
"www.sfaa.gov.tw", | |
"douliou.wda.gov.tw", | |
"thmr.wda.gov.tw", | |
"tkyhkm.wda.gov.tw", | |
"www.blf.gov.tw", | |
"www.bli.gov.tw", | |
"www.ilosh.gov.tw", | |
"www.labor.gov.tw", | |
"www.mol.gov.tw", | |
"www.osha.gov.tw", | |
"www.wda.gov.tw/index.jsp", | |
"yct168.wda.gov.tw", | |
"event.culture.tw/CHCSEC", | |
"www.bamid.gov.tw/bin/home.php", | |
"www.boch.gov.tw", | |
"www.chcsec.gov.tw", | |
"www.moc.gov.tw", | |
"www.ncfta.gov.tw/ncfta_ce/main/index.aspx", | |
"www.nhclac.gov.tw/home", | |
"www.nhrm.gov.tw/home", | |
"www.nmh.gov.tw/zh/index.htm", | |
"www.nmp.gov.tw", | |
"www.nmth.gov.tw", | |
"www.nmtl.gov.tw", | |
"www.ntcri.gov.tw", | |
"www.ntm.gov.tw", | |
"www.ntso.gov.tw/home", | |
"www.tncsec.gov.tw", | |
"www.ttcsec.gov.tw", | |
"www.wac.gov.tw", | |
"www.yatsen.gov.tw/tw", | |
"www.ntmofa.gov.tw", | |
"www.cec.gov.tw", | |
"www.chec.gov.tw", | |
"www.cycec.gov.tw", | |
"www.cyec.gov.tw", | |
"www.hccec.gov.tw", | |
"www.hcec.gov.tw", | |
"www.hlec.gov.tw", | |
"www.ilec.gov.tw", | |
"www.khec.gov.tw", | |
"www.klec.gov.tw", | |
"www.kmec.gov.tw", | |
"www.lcec.gov.tw", | |
"www.MECT.gov.tw", | |
"www.mlec.gov.tw", | |
"www.ntec.gov.tw", | |
"www.phec.gov.tw", | |
"www.ptec.gov.tw", | |
"www.tcec.gov.tw", | |
"www.tnec.gov.tw", | |
"www.tpcec.gov.tw", | |
"www.ttec.gov.tw", | |
"www.tyec.gov.tw", | |
"www.ylec.gov.tw", | |
"e-school.ysnp.gov.tw", | |
"group.moi.gov.tw", | |
"iff.immigration.gov.tw/mp.asp?mp=iff_en", | |
"news.immigration.gov.tw", | |
"reflect.ysnp.gov.tw/reflect/new/chi/chief3.aspx", | |
"snnp.cpami.gov.tw", | |
"www.abri.gov.tw", | |
"www.cpami.gov.tw", | |
"www.cpu.edu.tw", | |
"www.kmnp.gov.tw", | |
"www.ktnp.gov.tw/cht/index.aspx", | |
"www.lceb.gov.tw", | |
"www.marine.gov.tw", | |
"www.moi.gov.tw", | |
"www.nca.gov.tw", | |
"www.nlsc.gov.tw", | |
"www.npa.gov.tw", | |
"www.ris.gov.tw", | |
"www.spnp.gov.tw", | |
"www.taroko.gov.tw", | |
"www.tjnp.gov.tw", | |
"www.ymsnp.gov.tw", | |
"www.ysnp.gov.tw", | |
"www.ysnp.gov.tw/YsnpApply", | |
"nv2.npa.gov.tw/NM103-604Client", | |
"www.immigration.gov.tw/mp.asp?mp=1", | |
"kaohsiung.customs.gov.tw/mp.asp?mp=8", | |
"keelung.customs.gov.tw/mp.asp?mp=6", | |
"taichung.customs.gov.tw", | |
"taipei.customs.gov.tw/mp.asp?mp=5", | |
"web.customs.gov.tw", | |
"www.bot.com.tw", | |
"www.dot.gov.tw", | |
"www.eximbank.com.tw", | |
"www.fia.gov.tw", | |
"www.fnp.gov.tw", | |
"www.fnpc.gov.tw", | |
"www.fnpn.gov.tw", | |
"www.fnps.gov.tw", | |
"www.landbank.com.tw", | |
"www.mof.gov.tw", | |
"www.mofti.gov.tw", | |
"www.nta.gov.tw", | |
"www.ntbca.gov.tw", | |
"www.ntbk.gov.tw", | |
"www.ntbsa.gov.tw", | |
"www.ppmof.gov.tw", | |
"www.ttl.com.tw", | |
"www.twfhc.com.tw", | |
"www.twfhclife.com.tw", | |
"webchat.landbank.com.tw:8443/mmccmedia/webchat.html", | |
"www.ntbna.gov.tw/etwmain", | |
"www.ntbt.gov.tw", | |
"246.swcb.gov.tw", | |
"agrstat.coa.gov.tw/sdweb/public/book/Book.aspx", | |
"chiayi.forest.gov.tw", | |
"doie.coa.gov.tw/index.asp", | |
"dongshih.forest.gov.tw", | |
"ewin.pabp.gov.tw", | |
"hsinchu.forest.gov.tw", | |
"hualien.forest.gov.tw", | |
"luodong.forest.gov.tw", | |
"m.tndais.gov.tw", | |
"nantou.forest.gov.tw", | |
"phis.baphiq.gov.tw", | |
"pingtung.forest.gov.tw", | |
"taitung.forest.gov.tw", | |
"www.afa.gov.tw", | |
"www.afasi.gov.tw", | |
"www.caes.gov.tw", | |
"www.coa.gov.tw", | |
"www.forest.gov.tw", | |
"www.hdares.gov.tw", | |
"www.kdais.gov.tw", | |
"www.mdais.gov.tw", | |
"www.nvri.gov.tw", | |
"www.swcb.gov.tw", | |
"www.tactri.gov.tw", | |
"www.tari.gov.tw", | |
"www.tdais.gov.tw", | |
"www.tesri.gov.tw", | |
"www.tfrin.gov.tw", | |
"www.tlri.gov.tw", | |
"www.tndais.gov.tw", | |
"www.tres.gov.tw/show_index.php", | |
"www.tss.gov.tw", | |
"www.ttdares.gov.tw/show_index.php", | |
"www.tydares.gov.tw", | |
"www.baphiq.gov.tw", | |
"www.pabp.gov.tw", | |
"cms03p.vghks.gov.tw/Chinese/MainSite", | |
"taitungfarm.com", | |
"wd.vghtpe.gov.tw/anes", | |
"wd.vghtpe.gov.tw/Cancer_Cen", | |
"wd.vghtpe.gov.tw/ccm", | |
"wd.vghtpe.gov.tw/cmd", | |
"wd.vghtpe.gov.tw/ctm", | |
"wd.vghtpe.gov.tw/dent", | |
"wd.vghtpe.gov.tw/derm", | |
"wd.vghtpe.gov.tw/doma", | |
"wd.vghtpe.gov.tw/ent", | |
"wd.vghtpe.gov.tw/fm", | |
"wd.vghtpe.gov.tw/GERM", | |
"wd.vghtpe.gov.tw/im", | |
"wd.vghtpe.gov.tw/imsc", | |
"wd.vghtpe.gov.tw/mre", | |
"wd.vghtpe.gov.tw/nmed", | |
"wd.vghtpe.gov.tw/NS", | |
"wd.vghtpe.gov.tw/nurs", | |
"wd.vghtpe.gov.tw/nutr", | |
"wd.vghtpe.gov.tw/obgy", | |
"wd.vghtpe.gov.tw/oph", | |
"wd.vghtpe.gov.tw/orth", | |
"wd.vghtpe.gov.tw/osh", | |
"wd.vghtpe.gov.tw/path", | |
"wd.vghtpe.gov.tw/ped", | |
"wd.vghtpe.gov.tw/pharm", | |
"wd.vghtpe.gov.tw/pmr", | |
"wd.vghtpe.gov.tw/RAD", | |
"wd.vghtpe.gov.tw/rc", | |
"wd.vghtpe.gov.tw/sg", | |
"wd.vghtpe.gov.tw/vghneuro", | |
"wd.vghtpe.gov.tw/vghpsy", | |
"wd.vghtpe.gov.tw/vghtper_er", | |
"www.cingjing.gov.tw", | |
"www.fcea.gov.tw", | |
"www.fushoushan.com.tw", | |
"www.kaohsiungfarm.com.tw", | |
"www.pulivh.gov.tw", | |
"www.savh.gov.tw", | |
"www.tyvh.gov.tw/", | |
"www.vac.gov.tw", | |
"www.vghtc.gov.tw/GipOpenWeb/wSite/mp?mp=1", | |
"www.vghtc.gov.tw/GipOpenWeb/wSite/mp?mp=1", | |
"www.vghtpe.gov.tw", | |
"www.vhct.gov.tw", | |
"www.vhcy.gov.tw/vhcy", | |
"www.vhcy.gov.tw/vhwc", | |
"www.vhlc.gov.tw", | |
"www.vhyk.gov.tw", | |
"www.vhyl.gov.tw", | |
"www.vtc.gov.tw/Content/myWebMain.aspx", | |
"www2.wuling-farm.com.tw", | |
"cus93.trade.gov.tw/FSCI", | |
"lib.sirdp.org.tw/opac/index.aspx", | |
"new.cpc.com.tw/division/epb", | |
"new.cpc.com.tw/division/lpgb", | |
"new.cpc.com.tw/division/mb", | |
"new.cpc.com.tw/division/scb", | |
"new.cpc.com.tw/Home", | |
"www.bsmi.gov.tw", | |
"www.ciic.org.tw", | |
"www.dechnology.com.tw", | |
"www.dois.moea.gov.tw/main.asp", | |
"www.hlfd.gov.tw/Zh-tw/index.aspx", | |
"www.jhfd.gov.tw/", | |
"www.moea.gov.tw", | |
"www.moea.gov.tw/Mns/cnc/home/Home.aspx", | |
"www.moea.gov.tw/MNS/doit", | |
"www.moea.gov.tw/MNS/doit_e", | |
"www.moea.gov.tw/MNS/ptc/home/Home.aspx", | |
"www.moeaboe.gov.tw", | |
"www.moeacgs.gov.tw/main.jsp", | |
"www.moeaic.gov.tw", | |
"www.moeaidb.gov.tw", | |
"www.moeaidb.gov.tw/iphw", | |
"www.moeasmea.gov.tw", | |
"www.sirdp.org.tw", | |
"www.taipower.com.tw", | |
"www.taisugar.com.tw", | |
"www.tefd.gov.tw/", | |
"www.tipo.gov.tw", | |
"www.tnfd.gov.tw", | |
"www.trade.gov.tw", | |
"www.water.gov.tw", | |
"www.wra.gov.tw", | |
"www.wra01.gov.tw", | |
"www.wra02.gov.tw", | |
"www.wra03.gov.tw", | |
"www.wra04.gov.tw", | |
"www.wra05.gov.tw", | |
"www.wra06.gov.tw", | |
"www.wra07.gov.tw", | |
"www.wra08.gov.tw", | |
"www.wra09.gov.tw", | |
"www.wra10.gov.tw", | |
"www.wracb.gov.tw", | |
"www.wranb.gov.tw", | |
"www.wrap.gov.tw", | |
"www.wrasb.gov.tw", | |
"www.wratb.gov.tw", | |
"www.epza.gov.tw", | |
"itriap7.itri.org.tw/lims/web/LimsBaseQuery.aspx", | |
"www.moeaidb.gov.tw/iphw/ytipc", | |
"www.cosmetic.org.tw", | |
"www.mine.gov.tw", | |
"www.moeaitc.gov.tw", | |
"220.128.208.14/MRT", | |
"admin.taiwan.net.tw", | |
"bac.thb.gov.tw", | |
"cmvttc.thb.gov.tw", | |
"cyi2.thb.gov.tw", | |
"fmodg.gov.tw", | |
"gip.taneeb.gov.tw", | |
"hl.twport.com.tw", | |
"hl.twport.com.tw/en", | |
"hl.twport.com.tw/kids", | |
"hmv.thb.gov.tw", | |
"hti.thb.gov.tw", | |
"hul.thb.gov.tw", | |
"kh.twport.com.tw", | |
"kh.twport.com.tw/en", | |
"kh.twport.com.tw/kids", | |
"kl.twport.com.tw", | |
"kl.twport.com.tw/en", | |
"kl.twport.com.tw/kids", | |
"kl.twport.com.tw/su", | |
"kl.twport.com.tw/su_en", | |
"kl.twport.com.tw/su_kids", | |
"kl.twport.com.tw/tp", | |
"kl.twport.com.tw/tp_en", | |
"kl.twport.com.tw/tw_kids", | |
"klu.thb.gov.tw", | |
"komv.thb.gov.tw", | |
"kounan.thb.gov.tw", | |
"luj.thb.gov.tw", | |
"scweb.cwb.gov.tw", | |
"south.cwb.gov.tw", | |
"stc.thb.gov.tw", | |
"swcoast-nsa.travel", | |
"tc.twport.com.tw", | |
"tc.twport.com.tw/en", | |
"thbu1.thb.gov.tw", | |
"thbu2.thb.gov.tw", | |
"thbu3.thb.gov.tw", | |
"thbu4.thb.gov.tw", | |
"thbu5.thb.gov.tw", | |
"tmv.thb.gov.tw", | |
"tmvso.thb.gov.tw", | |
"travelmasters.eastcoast-nsa.gov.tw", | |
"www.ali-nsa.gov.tw", | |
"www.ali-nsa.net/user/Main.aspx", | |
"www.anws.gov.tw", | |
"www.atc.gov.tw", | |
"www.caa.gov.tw", | |
"www.cwb.gov.tw", | |
"www.cwb.gov.tw/eng/", | |
"www.cwb.gov.tw/m/", | |
"www.cya.gov.tw", | |
"www.dbnsa.gov.tw/user/main.aspx?Lang=1", | |
"www.eastcoast-nsa.gov.tw", | |
"www.erv-nsa.gov.tw", | |
"www.freeway.gov.tw", | |
"www.hsr.gov.tw", | |
"www.hulairport.gov.tw", | |
"www.iot.gov.tw", | |
"www.kia.gov.tw", | |
"www.maolin-nsa.gov.tw", | |
"www.maolin-nsa.gov.tw/gov", | |
"www.matsu-nsa.gov.tw/User/Main.aspx", | |
"www.mkport.gov.tw", | |
"www.motc.gov.tw", | |
"www.motcmpb.gov.tw", | |
"www.necoast-nsa.gov.tw/gov/main.aspx", | |
"www.necoast-nsa.gov.tw/user/Main.aspx", | |
"www.northguan-nsa.gov.tw", | |
"www.railway.gov.tw", | |
"www.rrb.gov.tw", | |
"www.siraya-nsa.gov.tw", | |
"www.siraya-nsa.gov.tw/MainGOV/Main.aspx", | |
"www.sunmoonlake.gov.tw/index.aspx#1", | |
"admin.swcoast-nsa.gov.tw", | |
"www.taiwan.net.tw", | |
"www.taoyuanairport.com.tw", | |
"www.taoyuan-airport.com", | |
"www.tca.gov.tw/cht/index.php", | |
"www.thb.gov.tw", | |
"www.tipcmarine.com.tw/chinese", | |
"www.trimt-nsa.gov.tw/web", | |
"www.tsa.gov.tw", | |
"www.tsa.gov.tw/tsaLZN/zh/home.aspx", | |
"www.tsa.gov.tw/tsaMFK/zh/home.aspx", | |
"www.twport.com.tw", | |
"www.twport.com.tw/en", | |
"www.twport.com.tw/jp", | |
"www.twport.com.tw/kids", | |
"yil.thb.gov.tw", | |
"yul.thb.gov.tw", | |
"museum.post.gov.tw", | |
"suhua.thb.gov.tw", | |
"www.kma.gov.tw", | |
"www.penghu-nsa.gov.tw/index.aspx", | |
"www.post.gov.tw", | |
"www.tna.gov.tw", | |
"www.tta.gov.tw", | |
"www.aac.moj.gov.tw", | |
"www.bdo.moj.gov.tw/mp201.html", | |
"www.chc.moj.gov.tw", | |
"www.chd.moj.gov.tw", | |
"www.chp.moj.gov.tw", | |
"www.chr.moj.gov.tw", | |
"www.chy.moj.gov.tw", | |
"www.ctg.moj.gov.tw", | |
"www.cyc.moj.gov.tw", | |
"www.cyd.moj.gov.tw", | |
"www.cyp.moj.gov.tw", | |
"www.cyy.moj.gov.tw", | |
"www.dcv.moj.gov.tw", | |
"www.gip.moj.gov.tw", | |
"www.hlc.moj.gov.tw", | |
"www.hld.moj.gov.tw", | |
"www.hlh.moj.gov.tw", | |
"www.hlp.moj.gov.tw", | |
"www.hly.moj.gov.tw", | |
"www.ilc.moj.gov.tw", | |
"www.ilp.moj.gov.tw", | |
"www.ily.moj.gov.tw", | |
"www.jcp.moj.gov.tw", | |
"www.klc.moj.gov.tw", | |
"www.kld.moj.gov.tw", | |
"www.klp.moj.gov.tw", | |
"www.kmc.moj.gov.tw", | |
"www.kmh.moj.gov.tw", | |
"www.kmp.moj.gov.tw", | |
"www.ksb.moj.gov.tw", | |
"www.ksc.moj.gov.tw", | |
"www.ksd.moj.gov.tw", | |
"www.ksh.moj.gov.tw", | |
"www.ksp.moj.gov.tw", | |
"www.ksw.moj.gov.tw", | |
"www.ksy.moj.gov.tw", | |
"www.ljc.moj.gov.tw", | |
"www.mjac.moj.gov.tw", | |
"www.mjib.gov.tw", | |
"www.mlc.moj.gov.tw", | |
"www.mld.moj.gov.tw", | |
"www.moj.gov.tw", | |
"www.mtp.moj.gov.tw", | |
"www.myg.moj.gov.tw", | |
"www.ntc.moj.gov.tw", | |
"www.ntd.moj.gov.tw", | |
"www.pcc.moj.gov.tw", | |
"www.pcy.moj.gov.tw", | |
"www.phc.moj.gov.tw", | |
"www.php.moj.gov.tw", | |
"www.ptc.moj.gov.tw", | |
"www.ptd.moj.gov.tw", | |
"www.ptp.moj.gov.tw", | |
"www.pty.moj.gov.tw", | |
"www.scc.moj.gov.tw", | |
"www.scd.moj.gov.tw", | |
"www.scp.moj.gov.tw", | |
"www.scy.moj.gov.tw", | |
"www.sdb.moj.gov.tw", | |
"www.slc.moj.gov.tw", | |
"www.sld.moj.gov.tw", | |
"www.sly.moj.gov.tw", | |
"www.tcc.moj.gov.tw", | |
"www.tcd.moj.gov.tw", | |
"www.tch.moj.gov.tw", | |
"www.tcj.moj.gov.tw", | |
"www.tcp.moj.gov.tw", | |
"www.tcw.moj.gov.tw", | |
"www.tcy.moj.gov.tw", | |
"www.thip.moj.gov.tw", | |
"www.tn2p.moj.gov.tw/mp203.html", | |
"www.tnc.moj.gov.tw", | |
"www.tnd.moj.gov.tw", | |
"www.tnh.moj.gov.tw", | |
"www.tnj.moj.gov.tw", | |
"www.tnp.moj.gov.tw", | |
"www.tny.moj.gov.tw", | |
"www.tpa.moj.gov.tw", | |
"www.tpc.moj.gov.tw", | |
"www.tpd.moj.gov.tw", | |
"www.tph.moj.gov.tw", | |
"www.tpi.moj.gov.tw", | |
"www.tpj.moj.gov.tw", | |
"www.tpk.moj.gov.tw", | |
"www.tpp.moj.gov.tw", | |
"www.tps.moj.gov.tw", | |
"www.tpy.moj.gov.tw", | |
"www.ttb.moj.gov.tw/mp061.html", | |
"www.ttc.moj.gov.tw", | |
"www.ttp.moj.gov.tw", | |
"www.tuv.moj.gov.tw", | |
"www.tyc.moj.gov.tw", | |
"www.typ.moj.gov.tw", | |
"www.tyr.moj.gov.tw", | |
"www.tyw.moj.gov.tw", | |
"www.tyy.moj.gov.tw", | |
"www.ulc.moj.gov.tw", | |
"www.ulp.moj.gov.tw", | |
"www.ulp2.moj.gov.tw", | |
"www.ywv.moj.gov.tw"] | |
def sleep_random_second | |
now = Time.now | |
if (1..5).include?(now.wday) and (8..18).include?(now.hour) | |
puts "it is at working hour" | |
seconds = Time.new(now.year, now.month, now.day, 19, 1, 0) - now | |
else | |
seconds = Random.rand(5..20) | |
# seconds = 1 | |
end | |
puts "sleep #{seconds} seconds..." | |
sleep(seconds) | |
end | |
def write_file(filename, content) | |
File.open(filename,"w") do |f| | |
f.write(content) | |
end | |
end | |
def test_url(url) | |
agent = Mechanize.new | |
agent.verify_mode = OpenSSL::SSL::VERIFY_NONE | |
agent.keep_alive = false | |
agent.open_timeout = 30 | |
agent.read_timeout = 30 | |
begin | |
page = agent.get(url) | |
return page.code.to_i < 400 | |
rescue | |
puts "An error occurred: #{$!}" | |
puts "error url: #{url}" | |
return false | |
end | |
end | |
def test_protocal(url) | |
http_url = "http://#{url}" | |
https_url = "https://#{url}" | |
if test_url(http_url) | |
return http_url | |
elsif test_url(https_url) | |
return https_url | |
else | |
return false | |
end | |
end | |
def get_domain(url) | |
uri = URI(url) | |
return "#{uri.scheme}://#{uri.host}" | |
end | |
def get_ip(url) | |
uri = URI(url) | |
ip = Resolv.getaddress(uri.host) | |
return "#{uri.scheme}://#{ip}" | |
end | |
def get_host(url) | |
uri = URI(url) | |
return uri.host | |
end | |
def get_protocol(url) | |
uri = URI(url) | |
return uri.scheme | |
end | |
def get_url_list(url) | |
url_list = [] | |
# agent = Mechanize.new | |
# agent.verify_mode = OpenSSL::SSL::VERIFY_NONE | |
# page = agent.get(url) | |
# html = page.parser | |
begin | |
driver = Selenium::WebDriver.for :chrome | |
driver.get(url) | |
html = Nokogiri::HTML(driver.page_source) | |
driver.quit | |
rescue | |
puts "An error occurred: #{$!}" | |
puts "error url: #{url}" | |
return url_list | |
end | |
url_list << url | |
domain = get_domain(url) | |
ip = get_ip(url) | |
protocol = get_protocol(url) | |
host = get_host(url) | |
anchors = html.xpath('//a/@href') | |
anchors.each do |anchor| | |
href = anchor.to_s | |
if href == "" | |
next | |
end | |
if href.match(/^#{domain}/) | |
url_list << href unless url_list.include?(href) | |
elsif href.match(/^#{ip}/) | |
url_list << href unless url_list.include?(href) | |
elsif href.match(/^javascript:/) | |
next | |
elsif href.match(/^http:/) | |
next | |
elsif href.match(/^https:/) | |
next | |
elsif href.match(/^\/\//) | |
next | |
elsif href.match(/^\/.+/) | |
href = "#{domain}#{href}" | |
url_list << href unless url_list.include?(href) | |
elsif href.match(/^[^#].+/) | |
href = "#{domain}/#{href}" | |
url_list << href unless url_list.include?(href) | |
else | |
end | |
end | |
return url_list | |
end | |
def take_whole_page_screenshot(browser, url) | |
begin | |
driver = Selenium::WebDriver.for browser | |
driver.get(url) | |
sleep 1 | |
width = driver.execute_script("return Math.max(document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth);") | |
height = driver.execute_script("return Math.max(document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);") | |
driver.manage.window.resize_to(width+100, height+100) | |
sleep 1 | |
img_name = driver.browser.to_s + ' ' + url.gsub(':', ' ').gsub('/', '!') + '.png' | |
driver.save_screenshot(img_name) | |
driver.quit | |
return img_name | |
rescue | |
driver.quit | |
puts "An error occurred: #{$!}" | |
puts "error url: #{url}" | |
return false | |
end | |
end | |
def compare_image(img1, img2, url) | |
img_name = 'diff ' + url.gsub(':', ' ').gsub('/', '!') + '.png' | |
sleep 1 | |
img1 = Magick::Image.read(img1) | |
img2 = Magick::Image.read(img2) | |
diff_img, diff_metric = img1[0].compare_channel( img2[0], Magick::MeanSquaredErrorMetric ) | |
diff_img.write(img_name) | |
return diff_metric | |
end | |
def scan_page(url) | |
result_csv = "url,safari_img,chrome_img,diff,error\n" | |
dirname = url.gsub('/', '!').gsub('.', '-') | |
url = test_protocal(url) | |
unless url | |
return false | |
end | |
url_list = get_url_list(url) | |
unless url_list | |
return false | |
end | |
puts url_list.to_json | |
unless File.directory?(dirname) | |
Dir.mkdir(dirname) | |
end | |
Dir.chdir(dirname) | |
# chrome = Selenium::WebDriver.for :chrome | |
# safari = Selenium::WebDriver.for :safari | |
url_list.each do |url| | |
unless test_url(url) | |
next | |
else | |
img1 = take_whole_page_screenshot(:chrome, url) | |
img2 = take_whole_page_screenshot(:safari, url) | |
if img1 && img2 | |
puts "#{url} process done" | |
diff = compare_image(img1, img2, url) | |
result_csv += "#{url},#{img1},#{img2},#{diff},0\n" | |
else | |
result_csv += "#{url},#{img1},#{img2},none,1\n" | |
end | |
write_file('result.csv', result_csv) | |
end | |
end | |
write_file('result.csv', result_csv) | |
# chrome.quit | |
# safari.quit | |
Dir.chdir('..') | |
end | |
scan_page('jk.pdis.tw') | |
# $urls.each do |url| | |
# scan_page(url) | |
# done | |
# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment