-
-
Save MS-Jahan/87d3c8ca3c422a74b7ba2a69b58d9a60 to your computer and use it in GitHub Desktop.
Get NU BBA Result & Save to Google Sheet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, sys | |
# os.system("pip3 install requests bs4 pygsheets") | |
import traceback | |
import requests | |
import time | |
from bs4 import BeautifulSoup | |
import pygsheets | |
from pygsheets.datarange import DataRange | |
from pygsheets import Cell | |
from pprint import pprint | |
from string import ascii_uppercase | |
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36', "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"} | |
reg_num = 20501004712 # First registration number -1 (for loop) | |
ALL_DATA = [] | |
rtext = None | |
college_name_p = "6469" | |
college_name = college_name_p | |
while reg_num < 20501004800: | |
if college_name_p not in college_name: | |
print("College Name Changed 1") | |
print(url) | |
break | |
retry = 1 | |
reg_num += 1 | |
print(reg_num) | |
url = f"http://103.113.200.7/bba/individual_result_show.php?roll_number={reg_num}&exam_year=2021&semester=5601" | |
while retry != 0: | |
try: | |
r = requests.get(url, headers=headers) | |
print(url) | |
# print(r.text.strip()) | |
rtext = r.text | |
soup = BeautifulSoup(r.text, "html.parser") | |
try: | |
first_table = soup.find_all("table")[2] | |
except IndexError: | |
print("Index Error", url) | |
break | |
data = { | |
"Name": None, | |
"REG": None, | |
"GPA": None, | |
# "STRUCTURED PROGRAMMING LANGUAGE": None, | |
# "STRUCTURED PROGRAMMING LANGUAGE LAB": None, | |
# "ELECTRICAL AND ELECTRONIC CIRCUIT": None, | |
# "ELECTRICAL AND ELECTRONIC CIRCUIT LAB": None, | |
# "CALCULAS": None, | |
# "PHYSICS": None, | |
# "ENGLISH": None, | |
} | |
name = first_table.find("tr").find_all("td")[1].text.strip() | |
# reg = first_table.find_all("tr")[4].find_all("td")[1].text.strip() | |
try: | |
gpa = first_table.find_all("tr")[5].find_all("td")[1].text.strip() | |
except: | |
gpa = "INCOMPLETE" | |
college_name = first_table.find_all("tr")[1].find_all("td")[1].text.strip() | |
data ["Name"] = name | |
data ["REG"] = reg_num | |
data ["GPA"] = gpa | |
print(name, reg_num, gpa, college_name) | |
if "6469" not in college_name: | |
print("College Name Changed 2") | |
print(url) | |
break | |
second_table = soup.find_all("table")[3] | |
number_of_trs = len(second_table.find_all("tr")) | |
for i in range(2, number_of_trs): | |
# print(second_table.find_all("tr")[i].find_all("td")[1].text.strip()) | |
# print(second_table.find_all("tr")[i].find_all("td")[3].text.strip()) | |
data[second_table.find_all("tr")[i].find_all("td")[1].text.strip()] = second_table.find_all("tr")[i].find_all("td")[2].text.strip() | |
# data["STRUCTURED PROGRAMMING LANGUAGE LAB"] = second_table.find_all("tr")[3].find_all("td")[3].text.strip() | |
# data["ELECTRICAL AND ELECTRONIC CIRCUIT"] = second_table.find_all("tr")[4].find_all("td")[3].text.strip() | |
# data["ELECTRICAL AND ELECTRONIC CIRCUIT LAB"] = second_table.find_all("tr")[5].find_all("td")[3].text.strip() | |
# data["CALCULAS"] = second_table.find_all("tr")[6].find_all("td")[3].text.strip() | |
# data["PHYSICS"] = second_table.find_all("tr")[7].find_all("td")[3].text.strip() | |
# data["ENGLISH"] = second_table.find_all("tr")[8].find_all("td")[3].text.strip() | |
print(data) | |
# os._exit(0) | |
ALL_DATA.append(data) | |
time.sleep(1) | |
retry = 3 | |
break | |
# break | |
except: | |
retry -= 1 | |
traceback.print_exc() | |
print("Error in {}".format(reg_num)) | |
# with open("error.txt", "a") as f: | |
# f.write(rtext) | |
# exit() | |
time.sleep(0.5) | |
# Authorize and open spreadsheet and then worksheet | |
gc = pygsheets.authorize("client_secret.json") | |
# sh = gc.create('NEW_COIN_DATA - ' + str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) | |
sh = gc.open_by_key("12Z7E5yyaXBTLRA-CywkY_4fs5bs4qqdqb0yXGc5pjmY") # Google Sheet ID | |
wks = sh.sheet1 | |
# Get whole sheet, get last row and create an array of names | |
end_row = 1 | |
cells = wks.get_all_values(include_tailing_empty_rows=False, include_tailing_empty=False, returnas='matrix') | |
end_row = len(cells) | |
prev_names = [] | |
if end_row > 1: | |
cells[0].pop(0) | |
for i in range(0, end_row): | |
prev_names.append(cells[i][0]) | |
print(prev_names) | |
# Write header row if it doesn't exist | |
if end_row == 1: | |
header_cells_array = list(ALL_DATA[0].keys()) | |
header_cells_array = [x.upper() for x in header_cells_array] | |
print(header_cells_array) | |
wks.update_values('A1', [header_cells_array]) | |
model_cell = Cell('A1') | |
model_cell.set_text_format('bold', True) | |
DataRange('A1',str(ascii_uppercase[len(header_cells_array)])+'1', worksheet=wks).apply_format(model_cell) | |
row_number = 2 | |
else: | |
row_number = end_row + 1 | |
# update the sheet with array | |
for row in ALL_DATA: | |
if row['Name'] not in prev_names: | |
row_array = list(row.values()) | |
for i, item in enumerate(row_array): | |
if type(item) is list: | |
row_array[i] = str(', '.join(item)) | |
else: | |
print(row['Name'], 'Name already exists in sheet') | |
continue | |
wks.update_values(f'A{row_number}', [row_array]) | |
print("ROW", row_number) | |
row_number += 1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment