Skip to content

Instantly share code, notes, and snippets.

@Cartmanishere
Last active October 16, 2021 05:16
Show Gist options
  • Save Cartmanishere/c5c83f1174f0259679894dbaa6b756d8 to your computer and use it in GitHub Desktop.
Save Cartmanishere/c5c83f1174f0259679894dbaa6b756d8 to your computer and use it in GitHub Desktop.
Scrape data usage information from Railwire account using Python
#!/Users/pranav/.pyenv/shims/python
"""
Requirements:
- Python3
PIP Install:
- bs4
- requests
- lxml
What?
A script to scrape the remaining and used data from your railwire account.
Why?
I like to keep track of how much data is remaining and so I check the account quite frequently.
For checking, I have to go the site, enter login creds, complete captcha and then see the data.
So instead of doing all that, I created this small utility.
How?
1. Install above mentioned requirements.
2. Add your username and password in the code (check the comments)
3. Run this script using python.
"""
import requests
from bs4 import BeautifulSoup
import json
class RailwireInfo:
"""
The authentication scheme works as follows.
1. Before logging in, you need the following things -
- Username
- Password
- Railwire cookie (This is set when the page login page loads)
- Captcha code
2. To get the captcha code, you need the captcha image and then OCR it.
"""
def __init__(self):
self.sess = requests.Session()
self.baseURL = "https://mh.railwire.co.in/"
self.loginURL = self.baseURL + "rlogin"
self.captchaURL = self.baseURL + "rlogin/getCaptchaImg"
self.username = "mh.shubham.gajjewar"
self.password = "subham123"
userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"
self.headers = {"User-Agent": userAgent}
self.data_url = self.baseURL + "subcntl/datausage/"
def getCaptchaDepracated(self):
"""
Get all the required information before logging in.
Deprecated: This function is no longer required.
"""
from PIL import Image
import pytesseract
from io import BytesIO
# Load page
page = self.sess.get(self.loginURL, headers=headers)
# Get captcha text
i = self.sess.get(self.captchaURL, headers={
"User-Agent": self.userAgent})
img = Image.open(BytesIO(i.content))
captcha = pytesseract.image_to_string(img)
return captcha
def getCaptcha(self):
"""
Get captch text embedded in the page.
"""
page = self.sess.get(self.loginURL, headers=self.headers)
soup = BeautifulSoup(page.content, "lxml")
captcha = soup.find('span', {'id': 'captcha_code'})
if not captcha:
print(" [*] Captcha not found on page. Cannot continue.")
exit(1)
return captcha.text.strip()
def login(self):
"""
Login to the Railwire page.
"""
code = self.getCaptcha()
data = {
"railwire_test_name": self.sess.cookies.get('railwire_cookie_name'),
"username": self.username,
"password": self.password,
"code": code
}
login = self.sess.post(self.loginURL, data=data, headers=self.headers)
if login.status_code != 200:
print("[*] Login to Railwire failed.\n[*] Exiting..")
exit(0)
home = BeautifulSoup(login.content, 'lxml')
return home
def fetch_info(self):
"""
Extract the required information
"""
home = self.login()
page = self.sess.get(self.data_url)
soup = BeautifulSoup(page.content, 'lxml')
footer = soup.select('tfoot > tr > td')[1:-1]
ip = soup.select('tbody > tr > td')[-1].text
upload, download, total = map(lambda x: float(x.text) / 1024, footer)
print('Download: {:.2f}GB'.format(download))
print('Upload: {:.2f}GB'.format(upload))
print('Total: {:.2f}GB'.format(total))
if __name__ == "__main__":
r = RailwireInfo()
r.fetch_info()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment