Last active
September 4, 2022 18:56
-
-
Save code-simple/ee034194a6c2c28d897c82b8bdb3f103 to your computer and use it in GitHub Desktop.
PESCO.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This Python Script Scrapp PESCO Bill and Give all important information about Utility Bill | |
# Required modules BeautifulSoup & selenium , and Python 3.10 or latest | |
# To install modules write following command into Command | |
# e.g to install BeautifulSoup write in command 'pip install BeautifulSoup' | |
from bs4 import BeautifulSoup | |
from selenium import webdriver | |
from selenium.webdriver.firefox.options import Options | |
import re | |
#Change PESCO Bill link here | |
url = 'https://bill.pitc.com.pk/pescobill/general?refno=03262140036865' | |
options = Options() | |
options.headless = True | |
driver = webdriver.Firefox(options=options) | |
driver.get(url) | |
content = driver.page_source | |
# To work with Local File | |
# soup = BeautifulSoup(open("e.html"),"html.parser") | |
soup = BeautifulSoup(content,"html.parser") | |
data1 = (soup.findAll('td',attrs={"class":"border-b"})) | |
data2 = (soup.findAll('tr',attrs={'class':'fontsize'})) | |
# Data 1 | |
dueDate = data1[1].text.strip() | |
arrears = data1[12].text.strip() | |
currentBill = data1[13].text.strip() | |
totalFPA = re.findall('\d+', data1[17].text.strip())[0] | |
payableWithinDueDate = data1[18].text.strip() | |
payableAfterDate = data1[20].text.strip() | |
lpSurcharge = data1[19].text.strip() | |
# Data 2 | |
unitsConsumed = re.findall('\d+', data2[10].text.strip())[0] | |
referenceNo = data2[3].text.strip() | |
address_raw = data2[8].text.strip() | |
address = ((address_raw[address_raw.find("ADDRESS"):address_raw.find("Say")]).strip()) | |
message = """ | |
%s | |
Ref # : %s | |
Units Consumed : %s | |
Payable Amount : %s | |
After Due Date : %s | |
Surcharge : %s | |
Arrears : %s | |
Due Date : %s | |
Current Bill : %s | |
Total FPA : %s | |
""" % (address,referenceNo,unitsConsumed,payableWithinDueDate,payableAfterDate, | |
lpSurcharge,arrears,dueDate,currentBill,totalFPA) | |
print(message) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Python Script to scrap PESCO (Electricity utility bill) and grab all important information .
The good thing about this script is that in normal scrapping we have javascript disabled,
which creates problem and data doesn't show but in this method we use selenium module
to work with real browser , but browser won't show that is because we are using headerless
browser which impact processing and give fast results unlike custom scrapping e.g ParseHub, or Octaparse