-
-
Save MS-Jahan/b16378ff41da6d401aae4748dd168cc0 to your computer and use it in GitHub Desktop.
Get NU CSE Result & Save to Google Sheet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, sys | |
# os.system("pip3 install requests bs4 pygsheets") | |
import traceback | |
import requests | |
import time | |
from bs4 import BeautifulSoup | |
import pygsheets | |
from pygsheets.datarange import DataRange | |
from pygsheets import Cell | |
from pprint import pprint | |
from string import ascii_uppercase | |
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36', "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"} | |
reg_num = 20502004774 # First registration number -1 (for loop) | |
ALL_DATA = [] | |
rtext = None | |
college_name_p = "6469" | |
college_name = college_name_p | |
while reg_num < 20502004870: | |
if college_name_p not in college_name: | |
break | |
retry = 1 | |
reg_num += 1 | |
print(reg_num) | |
url = f"http://103.113.200.7/cse/cse_result.php?roll_number=®_no={reg_num}&exm_code=5611&exam_year=2021" | |
while retry != 0: | |
try: | |
r = requests.get(url, headers=headers) | |
# print(r.text.strip()) | |
rtext = r.text | |
soup = BeautifulSoup(r.text, "html.parser") | |
first_table = soup.find_all("table")[2] | |
data = { | |
"Name": None, | |
"REG": None, | |
"GPA": None, | |
# "STRUCTURED PROGRAMMING LANGUAGE": None, | |
# "STRUCTURED PROGRAMMING LANGUAGE LAB": None, | |
# "ELECTRICAL AND ELECTRONIC CIRCUIT": None, | |
# "ELECTRICAL AND ELECTRONIC CIRCUIT LAB": None, | |
# "CALCULAS": None, | |
# "PHYSICS": None, | |
# "ENGLISH": None, | |
} | |
name = first_table.find("tr").find_all("td")[1].text.strip() | |
# reg = first_table.find_all("tr")[4].find_all("td")[1].text.strip() | |
gpa = first_table.find_all("tr")[6].find_all("td")[1].text.strip() | |
college_name = first_table.find_all("tr")[3].find_all("td")[1].text.strip() | |
data ["Name"] = name | |
data ["REG"] = reg_num | |
data ["GPA"] = gpa | |
print(name, reg_num, gpa, college_name) | |
if "6469" not in college_name: | |
break | |
second_table = soup.find_all("table")[3] | |
number_of_trs = len(second_table.find_all("tr")) | |
for i in range(2, number_of_trs): | |
data[second_table.find_all("tr")[i].find_all("td")[1].text.strip()] = second_table.find_all("tr")[i].find_all("td")[3].text.strip() | |
# data["STRUCTURED PROGRAMMING LANGUAGE LAB"] = second_table.find_all("tr")[3].find_all("td")[3].text.strip() | |
# data["ELECTRICAL AND ELECTRONIC CIRCUIT"] = second_table.find_all("tr")[4].find_all("td")[3].text.strip() | |
# data["ELECTRICAL AND ELECTRONIC CIRCUIT LAB"] = second_table.find_all("tr")[5].find_all("td")[3].text.strip() | |
# data["CALCULAS"] = second_table.find_all("tr")[6].find_all("td")[3].text.strip() | |
# data["PHYSICS"] = second_table.find_all("tr")[7].find_all("td")[3].text.strip() | |
# data["ENGLISH"] = second_table.find_all("tr")[8].find_all("td")[3].text.strip() | |
print(data) | |
# os._exit(0) | |
ALL_DATA.append(data) | |
time.sleep(1) | |
retry = 3 | |
break | |
# break | |
except: | |
retry -= 1 | |
# traceback.print_exc() | |
print("Error in {}".format(reg_num)) | |
# with open("error.txt", "a") as f: | |
# f.write(rtext) | |
# exit() | |
time.sleep(0.5) | |
# Authorize and open spreadsheet and then worksheet | |
gc = pygsheets.authorize("client_secret.json") | |
# sh = gc.create('NEW_COIN_DATA - ' + str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) | |
sh = gc.open_by_key("1HktqKkGeYsUHEUMQlvNcuVQxr5RjHRrbA98IRUkCPEk") # Google Sheet ID | |
wks = sh.sheet1 | |
# Get whole sheet, get last row and create an array of names | |
end_row = 1 | |
cells = wks.get_all_values(include_tailing_empty_rows=False, include_tailing_empty=False, returnas='matrix') | |
end_row = len(cells) | |
prev_names = [] | |
if end_row > 1: | |
cells[0].pop(0) | |
for i in range(0, end_row): | |
prev_names.append(cells[i][0]) | |
print(prev_names) | |
# Write header row if it doesn't exist | |
if end_row == 1: | |
header_cells_array = list(ALL_DATA[0].keys()) | |
header_cells_array = [x.upper() for x in header_cells_array] | |
print(header_cells_array) | |
wks.update_values('A1', [header_cells_array]) | |
model_cell = Cell('A1') | |
model_cell.set_text_format('bold', True) | |
DataRange('A1',str(ascii_uppercase[len(header_cells_array)])+'1', worksheet=wks).apply_format(model_cell) | |
row_number = 2 | |
else: | |
row_number = end_row + 1 | |
# update the sheet with array | |
for row in ALL_DATA: | |
if row['Name'] not in prev_names: | |
row_array = list(row.values()) | |
for i, item in enumerate(row_array): | |
if type(item) is list: | |
row_array[i] = str(', '.join(item)) | |
else: | |
print(row['Name'], 'Name already exists in sheet') | |
continue | |
wks.update_values(f'A{row_number}', [row_array]) | |
print("ROW", row_number) | |
row_number += 1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment