Last active
October 26, 2020 20:14
-
-
Save AO8/6bcab0c28542fbcc9dc85ef7dcc46cf2 to your computer and use it in GitHub Desktop.
Python webscraper + email report creator. Uses BeautifulSoup and pyautogui to scrape Green River's online class finder for BAS software development and programming prereq enrollments, then sends email report to stakeholders.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Python Standard Library | |
| import smtplib | |
| import ssl | |
| import webbrowser | |
| from datetime import datetime as dt | |
| from email.mime.text import MIMEText | |
| from time import sleep | |
| from urllib.request import urlopen | |
| # Third-party | |
| import pyautogui | |
| from bs4 import BeautifulSoup | |
| from send2trash import send2trash | |
| def send_gmail(sender, password, receiver, subject, body): | |
| """Prepare and send a nicely formatted Gmail""" | |
| msg = MIMEText(body) | |
| msg["Subject"] = subject | |
| msg["From"] = sender | |
| msg["To"] = receiver | |
| context = ssl.create_default_context() | |
| with smtplib.SMTP_SSL("smtp.gmail.com", 465, context=context) as s: | |
| s.login(sender, password) | |
| s.sendmail(sender, receiver, msg.as_string()) | |
| # Navigate to IT web page | |
| webbrowser.open("https://www.greenriver.edu/grc/classfinder/") | |
| sleep(5) | |
| # GUI automation to select IT from drop-down menu | |
| pyautogui.click(1159, 390) # coordinates specific to local machine | |
| sleep(1) | |
| pyautogui.typewrite("IT") | |
| pyautogui.press("enter") | |
| sleep(1) | |
| pyautogui.click(1656, 458) # coordinates specific to local machine | |
| sleep(3) | |
| # GUI automation to save IT enrollment page as local html file | |
| pyautogui.click(button="right") # right click to save | |
| sleep(5) | |
| pyautogui.click(1641, 604) # save as, coordinates specific to local machine | |
| sleep(5) | |
| pyautogui.press("enter") | |
| sleep(5) | |
| # Create a BeautifulSoup object from local html file | |
| html_url = r"file:///C:/Users/employee/Desktop/Class%20Finder%20_%20Green%20River%20College.html" | |
| with urlopen(html_url) as html: | |
| soup = BeautifulSoup(html.read(), "html.parser") | |
| # Build enrollments list from BeautifulSoup object | |
| aas_sd_courses = ["102", "201", "206", "207", "219", "220"] | |
| bas_sd_courses = ["301", "305", "328", "333", "334", | |
| "355", "372", "378", "426", "485", "486"] | |
| enrollments = [] | |
| # A quirk with class finder; final item in this find_all | |
| # always produces a noncritical error, hence the [0:-1] | |
| for table in soup.find_all("table")[0:-1]: | |
| course = table.attrs["data-courseid"] | |
| # "IT 102" | |
| if (course[-3:] in aas_sd_courses) or \ | |
| (course[-3:] in bas_sd_courses): | |
| for td in table.find_all("td", {"data-th":"Status"}): | |
| status = td.get_text() | |
| enrollments.append(course + ": " + status) | |
| # Send local html file to trash | |
| send2trash(r"C:\Users\employee\Desktop\Class Finder _ Green River College.html") | |
| # Calculate totals for enrollment report | |
| aas_sd_count = 0 | |
| bas_sd_count = 0 | |
| waitlist = 0 | |
| for enrollment in enrollments: | |
| # ['IT', '102:', 'OPEN', '22', 'Enrolled', '/', '2', 'seats', 'available'] | |
| # ['IT', '207:', 'WAITLIST', '24', 'Enrolled', '/', '10', 'students', 'on', 'waitlist'] | |
| items = enrollment.split() | |
| if items[2] == "WAITLIST": | |
| waitlist += int(items[6]) | |
| if items[2] != "Cancelled": | |
| reg = int(items[3]) | |
| if items[1].rstrip(":") in aas_sd_courses: | |
| aas_sd_count += reg | |
| elif items[1].rstrip(":") in bas_sd_courses: | |
| bas_sd_count += reg | |
| # Prepare enrollment report | |
| report_time = dt.now().strftime("%I:%M %p on %x") | |
| body = "B894 enrollments as of " + report_time + ".\n\n\ | |
| AAS-SD: " + str(aas_sd_count) + "\n\ | |
| BAS-SD: " + str(bas_sd_count) + "\n\ | |
| Total: " + str(aas_sd_count + bas_sd_count) + "\n\ | |
| Waitlisted: " + str(waitlist) + "\n\n" | |
| for enrollment in enrollments: | |
| body += (enrollment + "\n") | |
| body += "\nThis Python web scraper was built with love by AndyO." | |
| # Email enrollment report | |
| send_gmail("#sender gmail address", | |
| "#gmail password", | |
| "#recipient address", | |
| "Spring Software Enrollments", | |
| body) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment