Skip to content

Instantly share code, notes, and snippets.

@jahansabit
Last active January 29, 2023 13:24
Show Gist options
  • Save jahansabit/e3b024cfdb6c399b1b75ef1135adc7a5 to your computer and use it in GitHub Desktop.
Save jahansabit/e3b024cfdb6c399b1b75ef1135adc7a5 to your computer and use it in GitHub Desktop.
Get NU CSE Result & Save to Google Sheet
import os, sys
# os.system("pip3 install requests bs4 pygsheets")
import traceback
import requests
import time
from bs4 import BeautifulSoup
import pygsheets
from pygsheets.datarange import DataRange
from pygsheets import Cell
from pprint import pprint
from string import ascii_uppercase
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36', "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"}
reg_num = 20502004774 # First registration number -1 (for loop)
ALL_DATA = []
rtext = None
college_name_p = "6469"
college_name = college_name_p
while reg_num < 20502004870:
if college_name_p not in college_name:
break
retry = 1
reg_num += 1
print(reg_num)
url = f"http://103.113.200.7/cse/cse_result.php?roll_number=&reg_no={reg_num}&exm_code=5611&exam_year=2021"
while retry != 0:
try:
r = requests.get(url, headers=headers)
# print(r.text.strip())
rtext = r.text
soup = BeautifulSoup(r.text, "html.parser")
first_table = soup.find_all("table")[2]
data = {
"Name": None,
"REG": None,
"GPA": None,
# "STRUCTURED PROGRAMMING LANGUAGE": None,
# "STRUCTURED PROGRAMMING LANGUAGE LAB": None,
# "ELECTRICAL AND ELECTRONIC CIRCUIT": None,
# "ELECTRICAL AND ELECTRONIC CIRCUIT LAB": None,
# "CALCULAS": None,
# "PHYSICS": None,
# "ENGLISH": None,
}
name = first_table.find("tr").find_all("td")[1].text.strip()
# reg = first_table.find_all("tr")[4].find_all("td")[1].text.strip()
gpa = first_table.find_all("tr")[6].find_all("td")[1].text.strip()
college_name = first_table.find_all("tr")[3].find_all("td")[1].text.strip()
data ["Name"] = name
data ["REG"] = reg_num
data ["GPA"] = gpa
print(name, reg_num, gpa, college_name)
if "6469" not in college_name:
break
second_table = soup.find_all("table")[3]
number_of_trs = len(second_table.find_all("tr"))
for i in range(2, number_of_trs):
data[second_table.find_all("tr")[i].find_all("td")[1].text.strip()] = second_table.find_all("tr")[i].find_all("td")[3].text.strip()
# data["STRUCTURED PROGRAMMING LANGUAGE LAB"] = second_table.find_all("tr")[3].find_all("td")[3].text.strip()
# data["ELECTRICAL AND ELECTRONIC CIRCUIT"] = second_table.find_all("tr")[4].find_all("td")[3].text.strip()
# data["ELECTRICAL AND ELECTRONIC CIRCUIT LAB"] = second_table.find_all("tr")[5].find_all("td")[3].text.strip()
# data["CALCULAS"] = second_table.find_all("tr")[6].find_all("td")[3].text.strip()
# data["PHYSICS"] = second_table.find_all("tr")[7].find_all("td")[3].text.strip()
# data["ENGLISH"] = second_table.find_all("tr")[8].find_all("td")[3].text.strip()
print(data)
# os._exit(0)
ALL_DATA.append(data)
time.sleep(1)
retry = 3
break
# break
except:
retry -= 1
# traceback.print_exc()
print("Error in {}".format(reg_num))
# with open("error.txt", "a") as f:
# f.write(rtext)
# exit()
time.sleep(0.5)
# Authorize and open spreadsheet and then worksheet
gc = pygsheets.authorize("client_secret.json")
# sh = gc.create('NEW_COIN_DATA - ' + str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
sh = gc.open_by_key("1HktqKkGeYsUHEUMQlvNcuVQxr5RjHRrbA98IRUkCPEk") # Google Sheet ID
wks = sh.sheet1
# Get whole sheet, get last row and create an array of names
end_row = 1
cells = wks.get_all_values(include_tailing_empty_rows=False, include_tailing_empty=False, returnas='matrix')
end_row = len(cells)
prev_names = []
if end_row > 1:
cells[0].pop(0)
for i in range(0, end_row):
prev_names.append(cells[i][0])
print(prev_names)
# Write header row if it doesn't exist
if end_row == 1:
header_cells_array = list(ALL_DATA[0].keys())
header_cells_array = [x.upper() for x in header_cells_array]
print(header_cells_array)
wks.update_values('A1', [header_cells_array])
model_cell = Cell('A1')
model_cell.set_text_format('bold', True)
DataRange('A1',str(ascii_uppercase[len(header_cells_array)])+'1', worksheet=wks).apply_format(model_cell)
row_number = 2
else:
row_number = end_row + 1
# update the sheet with array
for row in ALL_DATA:
if row['Name'] not in prev_names:
row_array = list(row.values())
for i, item in enumerate(row_array):
if type(item) is list:
row_array[i] = str(', '.join(item))
else:
print(row['Name'], 'Name already exists in sheet')
continue
wks.update_values(f'A{row_number}', [row_array])
print("ROW", row_number)
row_number += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment