Created
January 5, 2012 17:03
-
-
Save dingsdax/1566136 to your computer and use it in GitHub Desktop.
university ranking WebL wrapper script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// WebL Website (archived): http://web.archive.org/web/20070507043202/http://www.hpl.hp.com/downloads/crl/webl/index.html | |
// get unique countrynames for the 500 top world universities | |
// get number of universities per country, pretty print statistics with a horizontal bar chart | |
// data from 2009 version of http://www.arwu.org/ARWU2009.jsp | |
import Str; | |
var startpage = GetURL("http://www.arwu.org/ARWU2009.jsp"); | |
// get the link tags which contain an image (country flag) inside a table column | |
var getUniversities = fun(page) | |
Elem(page, "a") contain Elem(page, "img") inside Elem(page, "td"); | |
end; | |
// get the position of a country in the countrylist, also for checking membership of list | |
var posList = fun(clist, s) | |
var y = Size(clist); | |
var i = 0; | |
var ret = -1; | |
while i < y do | |
if clist[i].name == s then | |
ret = i; | |
i = y; | |
else | |
i = i + 1; | |
end; | |
end; | |
ret; | |
end; | |
var countryList = fun(startpage) | |
var result = []; | |
// get all consecutive page links (only once, after the table) | |
var pagelinks = (Elem(startpage, "a") contain Pat(startpage, "[0-9]{3}-[0-9]{3}") after Elem(startpage, "table")) + (Elem(startpage, "a") contain Pat(startpage, "Top") after Elem(startpage, "table")); | |
every pagelink in pagelinks do | |
var countries = getUniversities(GetURL(pagelink.href)); | |
every country in countries do | |
// split the link, only the country name will be used, get position | |
var countryname = Str_Split(country.href, "=")[1]; | |
var pos = posList(result, countryname); | |
// check if country is already in list, if so skip increase count by 1, if not add to list with count 1 | |
if pos == -1 then | |
result = result + [ [. name = countryname, count = 1 .] ]; | |
else | |
result[pos].count = result[pos].count + 1; | |
end; | |
end; | |
result | |
end; | |
end; | |
// pretty printout country list | |
var C = countryList(startpage); | |
C = Sort(C, fun(a, b) Sign(b.count - a.count) end); | |
every c in C do | |
var j = 0; | |
while j < c.count do | |
Print("❚"); | |
j = j + 1; | |
end; | |
Print(" "); | |
Print(c.name); | |
Print(" ("); | |
Print(c.count); | |
Print(")\n"); | |
end; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment