Created
February 17, 2009 06:43
-
-
Save johnboxall/65606 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from mechanize import Browser | |
from lxml import html as HTML | |
import pprint | |
ENDPOINT = "https://apps.cra-arc.gc.ca/ebci/rhpd/startLanguage.do?lang=English" | |
class PayrollCalculator(object): | |
def __init__(self): | |
self.b = Browser() | |
self.b.set_handle_robots(False) | |
self.b.open(ENDPOINT) | |
def calculate(self): | |
# 1) Pay Period | |
print "PAY" | |
print self.b.geturl() | |
self.b.select_form(name="payrollData") | |
self.b.form["year"] = ["4"] # January 1, 2009 | |
self.b.form["employerName"] = "Handi" | |
self.b.form["payPeriodEndingDate"] = "2009-02-01" # YYYY-MM-DD | |
self.b.form["province"] = ["9"] # British Columbia | |
self.b.form["payPeriod"] = ["2"] # Biweekly | |
self.b.submit(name="fwdSalary") | |
# 2) Salary / Bonus Etc. | |
print "SALARY" | |
print self.b.geturl() | |
self.b.select_form(name="payrollData") | |
self.b.form["employeeName"] = "John" | |
self.b.submit(name="fwdGrossSalary") | |
# 3) Gross Income | |
print "GROSS" | |
print self.b.geturl() | |
self.b.select_form(name="grossIncomeData") | |
self.b.form["incomeTypeAmount"] = "4000" | |
self.b.submit() | |
# 4) Salary / Bonus Etc. again | |
print "SALARY" | |
print self.b.geturl() | |
self.b.select_form(name="payrollData") | |
self.b.submit(nr=3) # 3 is the magic number for the Calculate button | |
# 5) Results page. Scraping time. | |
self.doc = HTML.fromstring(self.b.response().read(), self.b.geturl()) | |
self._needle() | |
def _needle(self): | |
# The data we want is trapped somewhere around here. | |
fields = ["Salary or wages for the pay period", | |
"Total EI insurable earnings for the pay period", | |
"Taxable income", | |
"Cash income for the pay period", | |
"Federal tax deductions", | |
"Provincial tax deductions", | |
"Requested additional tax deduction", | |
"Total tax on income", | |
"CPP deductions", | |
"EI deductions", | |
"Amounts deducted at source", | |
"Total deductions on income"] | |
values = [td.text.strip() for td in self.doc.xpath("//table[3]//td[2]")] | |
pprint.pprint(zip(fields, values)) | |
pc = PayrollCalculator() | |
pc.calculate() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment