danthedaniel · August 20, 2018 21:17
diff --git a/tms.py b/tms.py
 """Drexel Web Term Master Schedule scraper."""

 import bs4 as bs
 import sqlite3
 import requests
 from datetime import datetime


 def db_setup(conn):
    """Create the table and indices."""
    c = conn.cursor()
    c.execute("""CREATE TABLE IF NOT EXISTS classes (
        subject_code TEXT,
        course_num TEXT,
        instr_type TEXT,
        instr_method TEXT,
        section TEXT,
        crn INTEGER,
        full BOOLEAN,
        title TEXT,
        days TEXT,
        start_time INTEGER,
        end_time INTEGER,
        instructor TEXT
    )""")
    c.execute("CREATE INDEX IF NOT EXISTS crn ON classes (crn)")
    c.execute("CREATE INDEX IF NOT EXISTS start_time ON classes (start_time)")
    c.execute("CREATE INDEX IF NOT EXISTS end_time ON classes (end_time)")
    conn.commit()


 def get_college(root, href, conn):
    """Handle a college's majors.

    Parameters
    ----------
    root : str
        The web address root.
    href : str
        Absolute path of the page to request.
    conn : sqlite connection
        Database connection.
    """
    content = requests.get(root + href).content
    bullets = bs.BeautifulSoup(content, "lxml").find(class_="collegePanel")

    for link in bullets.find_all("a"):
        get_field(root, link.get("href"), conn)


 def get_field(root, href, conn):
    """Handle a major's classes.

    Parameters
    ----------
    root : str
        The web address root.
    href : str
        Absolute path of the page to request.
    conn : sqlite connection
        Database connection.
    """
    content = requests.get(root + href).content
    sections = bs.BeautifulSoup(content, "lxml").find(class_="tableHeader")
    # Filter out stray strings
    sections = [
        x for x in sections.next_siblings
        if isinstance(x, bs.element.Tag)
    ]

    for section in sections:
        get_section(section, conn)


 def get_section(section, conn):
    """Insert a class section into the database.

    Parameters
    ----------
    section : bs4.element.Tag
        A table row from the class section lists.
    conn : sqlite connection
        Database connection.
    """
    c = conn.cursor()
    cells = section.find_all("td")

    # Only use this tag if it has all of the required fields
    if len(cells) != 11:
        return

    is_full = cells[5].find("p").attrs["title"] == "FULL"
    time_start, time_end = get_time_range(cells[9].get_text())
    c.execute(
        "INSERT INTO classes VALUES (?,?,?,?,?,?,?,?,?,?,?,?)",
        (
            cells[0].get_text(),
            cells[1].get_text(),
            cells[2].get_text(),
            cells[3].get_text(),
            cells[4].get_text(),
            int(cells[5].get_text()),
            is_full,
            cells[6].get_text(),
            cells[8].get_text(),
            time_start,
            time_end,
            cells[10].get_text()
        )
    )
    conn.commit()

    print("{}{} - {}".format(
        cells[0].get_text(),
        cells[1].get_text(),
        cells[4].get_text()
    ))


 def get_time_range(times):
    """From a string, read in a time range.

    Parameters
    ----------
    times : str
        String in the format: "08:00 AM - 09:00 AM"

    Returns
    -------
    Tuple of 2 integers or (None, None) if couldn't parse.
    """
    try:
        time_range = times.split(" - ")
        time_start = datetime.strptime(time_range[0], "%I:%M %p").time()
        time_end = datetime.strptime(time_range[1], "%I:%M %p").time()
        return (time_as_minutes(time_start), time_as_minutes(time_end))
    except ValueError:
        return (None, None)


 def time_as_minutes(time):
    """Convert a datetime.time to an integer number of minutes.

    Parameters
    ----------
    time : datetime.time
        The time object to convert from.

    Returns
    -------
    Integer number of minutes past midnight.
    """
    return time.hour * 60 + time.minute


 def main():
    """Read the Drexel Web TMS into a SQLite database."""
    root = "https://duapp2.drexel.edu"
    # This is the root address for a term in the Web TMS
    start_url = "/webtms_du/app?component=quarterTermDetails&page=Home&service=direct&sp=ZH4sIAAAAAAAAAFvzloG1uIhBPjWlVC%2BlKLUiNUcvs6hErzw1qSS3WC8lsSRRLyS1KJcBAhiZGJh9GNgTk0tCMnNTSxhEfLISyxL1iwtz9EECxSWJuQXWPgwcJUAtzvkpQBVCEBU5iXnp%2BsElRZl56TB5l9Ti5EKGOgamioKCEgY2IwNDc2NToJHBBSBVCoGliUVAZQqG5rqGFgD84zXupgAAAA%3D%3D"
    start_content = requests.get(root + start_url).content
    sidebar = bs.BeautifulSoup(start_content, "lxml").find(id="sideLeft")
    conn = sqlite3.connect('spring.sqlite')
    db_setup(conn)

    for link in sidebar.find_all("a"):
        get_college(root, link.get("href"), conn)


 if __name__ == "__main__":
    main()
	"""Drexel Web Term Master Schedule scraper."""

	import bs4 as bs
	import sqlite3
	import requests
	from datetime import datetime


	def db_setup(conn):
	"""Create the table and indices."""
	c = conn.cursor()
	c.execute("""CREATE TABLE IF NOT EXISTS classes (
	subject_code TEXT,
	course_num TEXT,
	instr_type TEXT,
	instr_method TEXT,
	section TEXT,
	crn INTEGER,
	full BOOLEAN,
	title TEXT,
	days TEXT,
	start_time INTEGER,
	end_time INTEGER,
	instructor TEXT
	)""")
	c.execute("CREATE INDEX IF NOT EXISTS crn ON classes (crn)")
	c.execute("CREATE INDEX IF NOT EXISTS start_time ON classes (start_time)")
	c.execute("CREATE INDEX IF NOT EXISTS end_time ON classes (end_time)")
	conn.commit()


	def get_college(root, href, conn):
	"""Handle a college's majors.

	Parameters
	----------
	root : str
	The web address root.
	href : str
	Absolute path of the page to request.
	conn : sqlite connection
	Database connection.
	"""
	content = requests.get(root + href).content
	bullets = bs.BeautifulSoup(content, "lxml").find(class_="collegePanel")

	for link in bullets.find_all("a"):
	get_field(root, link.get("href"), conn)


	def get_field(root, href, conn):
	"""Handle a major's classes.

	Parameters
	----------
	root : str
	The web address root.
	href : str
	Absolute path of the page to request.
	conn : sqlite connection
	Database connection.
	"""
	content = requests.get(root + href).content
	sections = bs.BeautifulSoup(content, "lxml").find(class_="tableHeader")
	# Filter out stray strings
	sections = [
	x for x in sections.next_siblings
	if isinstance(x, bs.element.Tag)
	]

	for section in sections:
	get_section(section, conn)


	def get_section(section, conn):
	"""Insert a class section into the database.

	Parameters
	----------
	section : bs4.element.Tag
	A table row from the class section lists.
	conn : sqlite connection
	Database connection.
	"""
	c = conn.cursor()
	cells = section.find_all("td")

	# Only use this tag if it has all of the required fields
	if len(cells) != 11:
	return

	is_full = cells[5].find("p").attrs["title"] == "FULL"
	time_start, time_end = get_time_range(cells[9].get_text())
	c.execute(
	"INSERT INTO classes VALUES (?,?,?,?,?,?,?,?,?,?,?,?)",
	(
	cells[0].get_text(),
	cells[1].get_text(),
	cells[2].get_text(),
	cells[3].get_text(),
	cells[4].get_text(),
	int(cells[5].get_text()),
	is_full,
	cells[6].get_text(),
	cells[8].get_text(),
	time_start,
	time_end,
	cells[10].get_text()
	)
	)
	conn.commit()

	print("{}{} - {}".format(
	cells[0].get_text(),
	cells[1].get_text(),
	cells[4].get_text()
	))


	def get_time_range(times):
	"""From a string, read in a time range.

	Parameters
	----------
	times : str
	String in the format: "08:00 AM - 09:00 AM"

	Returns
	-------
	Tuple of 2 integers or (None, None) if couldn't parse.
	"""
	try:
	time_range = times.split(" - ")
	time_start = datetime.strptime(time_range[0], "%I:%M %p").time()
	time_end = datetime.strptime(time_range[1], "%I:%M %p").time()
	return (time_as_minutes(time_start), time_as_minutes(time_end))
	except ValueError:
	return (None, None)


	def time_as_minutes(time):
	"""Convert a datetime.time to an integer number of minutes.

	Parameters
	----------
	time : datetime.time
	The time object to convert from.

	Returns
	-------
	Integer number of minutes past midnight.
	"""
	return time.hour * 60 + time.minute


	def main():
	"""Read the Drexel Web TMS into a SQLite database."""
	root = "https://duapp2.drexel.edu"
	# This is the root address for a term in the Web TMS
	start_url = "/webtms_du/app?component=quarterTermDetails&page=Home&service=direct&sp=ZH4sIAAAAAAAAAFvzloG1uIhBPjWlVC%2BlKLUiNUcvs6hErzw1qSS3WC8lsSRRLyS1KJcBAhiZGJh9GNgTk0tCMnNTSxhEfLISyxL1iwtz9EECxSWJuQXWPgwcJUAtzvkpQBVCEBU5iXnp%2BsElRZl56TB5l9Ti5EKGOgamioKCEgY2IwNDc2NToJHBBSBVCoGliUVAZQqG5rqGFgD84zXupgAAAA%3D%3D"
	start_content = requests.get(root + start_url).content
	sidebar = bs.BeautifulSoup(start_content, "lxml").find(id="sideLeft")
	conn = sqlite3.connect('spring.sqlite')
	db_setup(conn)

	for link in sidebar.find_all("a"):
	get_college(root, link.get("href"), conn)


	if __name__ == "__main__":
	main()