Created
December 29, 2009 08:45
-
-
Save apit/265219 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python -u | |
# @author apit - http://fizdoonk.multiply.com/ | |
# output: http://fizdoonk.blogspot.com/2009/12/not-oldest-one.html | |
import re | |
from lxml.html import parse | |
from GChartWrapper import * | |
def display_pie(stat, title=None): | |
G = Pie3D(stat.values()) | |
G.label(*stat.keys()) | |
if title: | |
G.title(title, 'cccccc', 36) | |
G.color('3072F3','red','00aaaa') | |
G.size((500, 300)) | |
print "<img src='%s' />" % G | |
def parse_page(url): | |
""" | |
""" | |
try: | |
doc = parse(url).getroot() | |
except IOError: | |
print "Failed to fetch the page. Check your connection." | |
exit(-1) | |
except: | |
raise | |
ages = {} | |
sexes = {'P': 0, 'W': 0} | |
for row in doc.cssselect('#AutoNumber10 table tr')[1:]: | |
row = row.cssselect('td') | |
id = re.sub(r'[\.\,]', '', row[2].text_content().strip()) | |
name = row[3].text_content().strip().title() | |
sex = row[4].text_content().strip() | |
m = re.findall(r'[7-8][0-9]', id[9:12]) | |
if m: | |
age = 2009 - (1900 + int(m[0])) | |
ages[age] = (ages[age] + 1) if ages.has_key(age) else 1 | |
else: | |
pass #print id | |
#print "%s: %d [%s]" % (name, age, sex) | |
sexes[sex] += 1 | |
return sexes, ages | |
sexes, ages = parse_page('http://www.cpns.bpk.go.id/home/detpesertalolos.php?j=S1%20Teknik%20Informatika/Komputer&kd=TK') | |
display_pie(sexes, 'Sex') | |
display_pie(ages, 'Age') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment