Skip to content

Instantly share code, notes, and snippets.

@AO8
Last active October 26, 2020 20:14
Show Gist options
  • Select an option

  • Save AO8/6bcab0c28542fbcc9dc85ef7dcc46cf2 to your computer and use it in GitHub Desktop.

Select an option

Save AO8/6bcab0c28542fbcc9dc85ef7dcc46cf2 to your computer and use it in GitHub Desktop.
Python webscraper + email report creator. Uses BeautifulSoup and pyautogui to scrape Green River's online class finder for BAS software development and programming prereq enrollments, then sends email report to stakeholders.
# Python Standard Library
import smtplib
import ssl
import webbrowser
from datetime import datetime as dt
from email.mime.text import MIMEText
from time import sleep
from urllib.request import urlopen
# Third-party
import pyautogui
from bs4 import BeautifulSoup
from send2trash import send2trash
def send_gmail(sender, password, receiver, subject, body):
"""Prepare and send a nicely formatted Gmail"""
msg = MIMEText(body)
msg["Subject"] = subject
msg["From"] = sender
msg["To"] = receiver
context = ssl.create_default_context()
with smtplib.SMTP_SSL("smtp.gmail.com", 465, context=context) as s:
s.login(sender, password)
s.sendmail(sender, receiver, msg.as_string())
# Navigate to IT web page
webbrowser.open("https://www.greenriver.edu/grc/classfinder/")
sleep(5)
# GUI automation to select IT from drop-down menu
pyautogui.click(1159, 390) # coordinates specific to local machine
sleep(1)
pyautogui.typewrite("IT")
pyautogui.press("enter")
sleep(1)
pyautogui.click(1656, 458) # coordinates specific to local machine
sleep(3)
# GUI automation to save IT enrollment page as local html file
pyautogui.click(button="right") # right click to save
sleep(5)
pyautogui.click(1641, 604) # save as, coordinates specific to local machine
sleep(5)
pyautogui.press("enter")
sleep(5)
# Create a BeautifulSoup object from local html file
html_url = r"file:///C:/Users/employee/Desktop/Class%20Finder%20_%20Green%20River%20College.html"
with urlopen(html_url) as html:
soup = BeautifulSoup(html.read(), "html.parser")
# Build enrollments list from BeautifulSoup object
aas_sd_courses = ["102", "201", "206", "207", "219", "220"]
bas_sd_courses = ["301", "305", "328", "333", "334",
"355", "372", "378", "426", "485", "486"]
enrollments = []
# A quirk with class finder; final item in this find_all
# always produces a noncritical error, hence the [0:-1]
for table in soup.find_all("table")[0:-1]:
course = table.attrs["data-courseid"]
# "IT 102"
if (course[-3:] in aas_sd_courses) or \
(course[-3:] in bas_sd_courses):
for td in table.find_all("td", {"data-th":"Status"}):
status = td.get_text()
enrollments.append(course + ": " + status)
# Send local html file to trash
send2trash(r"C:\Users\employee\Desktop\Class Finder _ Green River College.html")
# Calculate totals for enrollment report
aas_sd_count = 0
bas_sd_count = 0
waitlist = 0
for enrollment in enrollments:
# ['IT', '102:', 'OPEN', '22', 'Enrolled', '/', '2', 'seats', 'available']
# ['IT', '207:', 'WAITLIST', '24', 'Enrolled', '/', '10', 'students', 'on', 'waitlist']
items = enrollment.split()
if items[2] == "WAITLIST":
waitlist += int(items[6])
if items[2] != "Cancelled":
reg = int(items[3])
if items[1].rstrip(":") in aas_sd_courses:
aas_sd_count += reg
elif items[1].rstrip(":") in bas_sd_courses:
bas_sd_count += reg
# Prepare enrollment report
report_time = dt.now().strftime("%I:%M %p on %x")
body = "B894 enrollments as of " + report_time + ".\n\n\
AAS-SD: " + str(aas_sd_count) + "\n\
BAS-SD: " + str(bas_sd_count) + "\n\
Total: " + str(aas_sd_count + bas_sd_count) + "\n\
Waitlisted: " + str(waitlist) + "\n\n"
for enrollment in enrollments:
body += (enrollment + "\n")
body += "\nThis Python web scraper was built with love by AndyO."
# Email enrollment report
send_gmail("#sender gmail address",
"#gmail password",
"#recipient address",
"Spring Software Enrollments",
body)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment